macroblock.c 79.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * macroblock.c: macroblock common functions
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
 *
6 7
 * Authors: Fiona Glaser <fiona@x264.com>
 *          Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Loren Merritt <lorenm@u.washington.edu>
Henrik Gramner's avatar
Henrik Gramner committed
9
 *          Henrik Gramner <hengar-6@student.ltu.se>
Laurent Aimar's avatar
Laurent Aimar committed
10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
24 25 26
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
27 28 29
 *****************************************************************************/

#include "common.h"
30
#include "encoder/me.h"
Laurent Aimar's avatar
Laurent Aimar committed
31

Fiona Glaser's avatar
Fiona Glaser committed
32 33 34 35
#define MC_LUMA(list,p) \
    h->mc.mc_luma( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \
                   &h->mb.pic.p_fref[list][i_ref][p*4], h->mb.pic.i_stride[p], \
                   mvx, mvy, 4*width, 4*height, \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
36
                   list ? x264_weight_none : &h->sh.weight[i_ref][p] );
Fiona Glaser's avatar
Fiona Glaser committed
37

38
static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
Laurent Aimar's avatar
Laurent Aimar committed
39
{
40 41 42 43
    int i8    = x264_scan8[0]+x+8*y;
    int i_ref = h->mb.cache.ref[0][i8];
    int mvx   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
    int mvy   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
Laurent Aimar's avatar
Laurent Aimar committed
44

Fiona Glaser's avatar
Fiona Glaser committed
45
    MC_LUMA( 0, 0 );
Dylan Yudaken's avatar
Dylan Yudaken committed
46

Fiona Glaser's avatar
Fiona Glaser committed
47 48 49 50 51 52 53
    if( CHROMA444 )
    {
        MC_LUMA( 0, 1 );
        MC_LUMA( 0, 2 );
    }
    else
    {
54
        int v_shift = CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
55 56
        // Chroma in 4:2:0 is offset if MCing from a field of opposite parity
        if( v_shift & MB_INTERLACED & i_ref )
Fiona Glaser's avatar
Fiona Glaser committed
57 58
            mvy += (h->mb.i_mb_y & 1)*4 - 2;

Henrik Gramner's avatar
Henrik Gramner committed
59 60 61 62 63
        int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
        height = 4*height >> v_shift;

        h->mc.mc_chroma( &h->mb.pic.p_fdec[1][offset],
                         &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
Fiona Glaser's avatar
Fiona Glaser committed
64
                         h->mb.pic.p_fref[0][i_ref][4], h->mb.pic.i_stride[1],
Henrik Gramner's avatar
Henrik Gramner committed
65
                         mvx, 2*mvy>>v_shift, 2*width, height );
Fiona Glaser's avatar
Fiona Glaser committed
66 67

        if( h->sh.weight[i_ref][1].weightfn )
Henrik Gramner's avatar
Henrik Gramner committed
68 69 70
            h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE,
                                                       &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE,
                                                       &h->sh.weight[i_ref][1], height );
Fiona Glaser's avatar
Fiona Glaser committed
71
        if( h->sh.weight[i_ref][2].weightfn )
Henrik Gramner's avatar
Henrik Gramner committed
72 73 74
            h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
                                                       &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
                                                       &h->sh.weight[i_ref][2], height );
Fiona Glaser's avatar
Fiona Glaser committed
75
    }
Laurent Aimar's avatar
Laurent Aimar committed
76
}
77
static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
Laurent Aimar's avatar
Laurent Aimar committed
78
{
79 80 81 82
    int i8    = x264_scan8[0]+x+8*y;
    int i_ref = h->mb.cache.ref[1][i8];
    int mvx   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
    int mvy   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
Laurent Aimar's avatar
Laurent Aimar committed
83

Fiona Glaser's avatar
Fiona Glaser committed
84
    MC_LUMA( 1, 0 );
Laurent Aimar's avatar
Laurent Aimar committed
85

Fiona Glaser's avatar
Fiona Glaser committed
86 87 88 89 90 91 92
    if( CHROMA444 )
    {
        MC_LUMA( 1, 1 );
        MC_LUMA( 1, 2 );
    }
    else
    {
93
        int v_shift = CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
94
        if( v_shift & MB_INTERLACED & i_ref )
Fiona Glaser's avatar
Fiona Glaser committed
95
            mvy += (h->mb.i_mb_y & 1)*4 - 2;
96

Henrik Gramner's avatar
Henrik Gramner committed
97 98 99
        int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
        h->mc.mc_chroma( &h->mb.pic.p_fdec[1][offset],
                         &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE,
Fiona Glaser's avatar
Fiona Glaser committed
100
                         h->mb.pic.p_fref[1][i_ref][4], h->mb.pic.i_stride[1],
Henrik Gramner's avatar
Henrik Gramner committed
101
                         mvx, 2*mvy>>v_shift, 2*width, 4*height>>v_shift );
Fiona Glaser's avatar
Fiona Glaser committed
102
    }
Laurent Aimar's avatar
Laurent Aimar committed
103 104
}

Fiona Glaser's avatar
Fiona Glaser committed
105 106
#define MC_LUMA_BI(p) \
    src0 = h->mc.get_ref( tmp0, &i_stride0, &h->mb.pic.p_fref[0][i_ref0][p*4], h->mb.pic.i_stride[p], \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
107
                          mvx0, mvy0, 4*width, 4*height, x264_weight_none ); \
Fiona Glaser's avatar
Fiona Glaser committed
108
    src1 = h->mc.get_ref( tmp1, &i_stride1, &h->mb.pic.p_fref[1][i_ref1][p*4], h->mb.pic.i_stride[p], \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
109
                          mvx1, mvy1, 4*width, 4*height, x264_weight_none ); \
Fiona Glaser's avatar
Fiona Glaser committed
110 111 112
    h->mc.avg[i_mode]( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \
                       src0, i_stride0, src1, i_stride1, weight );

113
static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
Laurent Aimar's avatar
Laurent Aimar committed
114
{
115 116 117 118 119 120 121 122 123 124
    int i8 = x264_scan8[0]+x+8*y;
    int i_ref0 = h->mb.cache.ref[0][i8];
    int i_ref1 = h->mb.cache.ref[1][i8];
    int weight = h->mb.bipred_weight[i_ref0][i_ref1];
    int mvx0   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
    int mvx1   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
    int mvy0   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
    int mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
    int i_mode = x264_size2pixel[height][width];
    int i_stride0 = 16, i_stride1 = 16;
125 126 127
    ALIGNED_ARRAY_16( pixel, tmp0,[16*16] );
    ALIGNED_ARRAY_16( pixel, tmp1,[16*16] );
    pixel *src0, *src1;
128

Fiona Glaser's avatar
Fiona Glaser committed
129
    MC_LUMA_BI( 0 );
130

Fiona Glaser's avatar
Fiona Glaser committed
131 132 133 134 135 136 137
    if( CHROMA444 )
    {
        MC_LUMA_BI( 1 );
        MC_LUMA_BI( 2 );
    }
    else
    {
138
        int v_shift = CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
139
        if( v_shift & MB_INTERLACED & i_ref0 )
Fiona Glaser's avatar
Fiona Glaser committed
140
            mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
Henrik Gramner's avatar
Henrik Gramner committed
141
        if( v_shift & MB_INTERLACED & i_ref1 )
Fiona Glaser's avatar
Fiona Glaser committed
142 143 144
            mvy1 += (h->mb.i_mb_y & 1)*4 - 2;

        h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
Henrik Gramner's avatar
Henrik Gramner committed
145
                         mvx0, 2*mvy0>>v_shift, 2*width, 4*height>>v_shift );
Fiona Glaser's avatar
Fiona Glaser committed
146
        h->mc.mc_chroma( tmp1, tmp1+8, 16, h->mb.pic.p_fref[1][i_ref1][4], h->mb.pic.i_stride[1],
Henrik Gramner's avatar
Henrik Gramner committed
147 148 149 150 151 152
                         mvx1, 2*mvy1>>v_shift, 2*width, 4*height>>v_shift );

        int chromapix = h->luma2chroma_pixel[i_mode];
        int offset = (4*FDEC_STRIDE>>v_shift)*y + 2*x;
        h->mc.avg[chromapix]( &h->mb.pic.p_fdec[1][offset], FDEC_STRIDE, tmp0,   16, tmp1,   16, weight );
        h->mc.avg[chromapix]( &h->mb.pic.p_fdec[2][offset], FDEC_STRIDE, tmp0+8, 16, tmp1+8, 16, weight );
Fiona Glaser's avatar
Fiona Glaser committed
153
    }
Laurent Aimar's avatar
Laurent Aimar committed
154 155
}

Fiona Glaser's avatar
Fiona Glaser committed
156 157 158
#undef MC_LUMA
#undef MC_LUMA_BI

159 160
void x264_mb_mc_8x8( x264_t *h, int i8 )
{
161 162
    int x = 2*(i8&1);
    int y = 2*(i8>>1);
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187

    if( h->sh.i_type == SLICE_TYPE_P )
    {
        switch( h->mb.i_sub_partition[i8] )
        {
            case D_L0_8x8:
                x264_mb_mc_0xywh( h, x, y, 2, 2 );
                break;
            case D_L0_8x4:
                x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
                x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
                break;
            case D_L0_4x8:
                x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
                x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
                break;
            case D_L0_4x4:
                x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
                x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
                x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
                x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
                break;
        }
    }
    else
188
    {
189
        int scan8 = x264_scan8[0] + x + 8*y;
190

191 192
        if( h->mb.cache.ref[0][scan8] >= 0 )
            if( h->mb.cache.ref[1][scan8] >= 0 )
193 194 195 196
                x264_mb_mc_01xywh( h, x, y, 2, 2 );
            else
                x264_mb_mc_0xywh( h, x, y, 2, 2 );
        else
197 198 199 200
            x264_mb_mc_1xywh( h, x, y, 2, 2 );
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
201 202
void x264_mb_mc( x264_t *h )
{
203
    if( h->mb.i_partition == D_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
204
    {
205
        for( int i = 0; i < 4; i++ )
206
            x264_mb_mc_8x8( h, i );
Laurent Aimar's avatar
Laurent Aimar committed
207
    }
208
    else
Laurent Aimar's avatar
Laurent Aimar committed
209
    {
210 211 212 213
        int ref0a = h->mb.cache.ref[0][x264_scan8[ 0]];
        int ref0b = h->mb.cache.ref[0][x264_scan8[12]];
        int ref1a = h->mb.cache.ref[1][x264_scan8[ 0]];
        int ref1b = h->mb.cache.ref[1][x264_scan8[12]];
Laurent Aimar's avatar
Laurent Aimar committed
214 215 216

        if( h->mb.i_partition == D_16x16 )
        {
217 218 219 220
            if( ref0a >= 0 )
                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
                else             x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
            else                 x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
Laurent Aimar's avatar
Laurent Aimar committed
221 222 223
        }
        else if( h->mb.i_partition == D_16x8 )
        {
224 225 226 227 228 229 230 231 232
            if( ref0a >= 0 )
                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
                else             x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
            else                 x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );

            if( ref0b >= 0 )
                if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
                else             x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
            else                 x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
Laurent Aimar's avatar
Laurent Aimar committed
233 234 235
        }
        else if( h->mb.i_partition == D_8x16 )
        {
236 237 238 239 240 241 242 243 244
            if( ref0a >= 0 )
                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
                else             x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
            else                 x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );

            if( ref0b >= 0 )
                if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
                else             x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
            else                 x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
Laurent Aimar's avatar
Laurent Aimar committed
245 246 247 248
        }
    }
}

249
int x264_macroblock_cache_allocate( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
250
{
251
    int i_mb_count = h->mb.i_mb_count;
Laurent Aimar's avatar
Laurent Aimar committed
252

253 254 255
    h->mb.i_mb_stride = h->mb.i_mb_width;
    h->mb.i_b8_stride = h->mb.i_mb_width * 2;
    h->mb.i_b4_stride = h->mb.i_mb_width * 4;
Laurent Aimar's avatar
Laurent Aimar committed
256

257
    h->mb.b_interlaced = PARAM_INTERLACED;
258

259 260 261
    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
262 263
    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
Laurent Aimar's avatar
Laurent Aimar committed
264 265

    /* 0 -> 3 top(4), 4 -> 6 : left(3) */
266
    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
267 268

    /* all coeffs */
Fiona Glaser's avatar
Fiona Glaser committed
269
    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
270 271 272

    if( h->param.b_cabac )
    {
273
        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
274
        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
Henrik Gramner's avatar
Henrik Gramner committed
275
        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
276 277
        if( h->param.i_bframe )
            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
Laurent Aimar's avatar
Laurent Aimar committed
278 279
    }

280
    for( int i = 0; i < 2; i++ )
281
    {
282
        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
Dylan Yudaken's avatar
Dylan Yudaken committed
283
        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
284
            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
Dylan Yudaken's avatar
Dylan Yudaken committed
285

286
        for( int j = !i; j < i_refs; j++ )
287 288 289 290 291
        {
            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
            M32( h->mb.mvr[i][j][0] ) = 0;
            h->mb.mvr[i][j]++;
        }
292
    }
293

Dylan Yudaken's avatar
Dylan Yudaken committed
294 295
    if( h->param.analyse.i_weighted_pred )
    {
296
        int i_padv = PADV << PARAM_INTERLACED;
297
        int luma_plane_size = 0;
Dylan Yudaken's avatar
Dylan Yudaken committed
298 299 300 301
        int numweightbuf;

        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE )
        {
Dylan Yudaken's avatar
Dylan Yudaken committed
302
            // only need buffer for lookahead
Dylan Yudaken's avatar
Dylan Yudaken committed
303 304 305
            if( !h->param.i_sync_lookahead || h == h->thread[h->param.i_threads] )
            {
                // Fake analysis only works on lowres
306
                luma_plane_size = h->fdec->i_stride_lowres * (h->mb.i_mb_height*8+2*i_padv);
Dylan Yudaken's avatar
Dylan Yudaken committed
307 308 309 310 311 312 313 314
                // Only need 1 buffer for analysis
                numweightbuf = 1;
            }
            else
                numweightbuf = 0;
        }
        else
        {
Henrik Gramner's avatar
Henrik Gramner committed
315 316 317
            /* Both ref and fenc is stored for 4:2:0 and 4:2:2 which means that 4:2:0 and 4:4:4
             * needs the same amount of space and 4:2:2 needs twice that much */
            luma_plane_size = h->fdec->i_stride[0] * (h->mb.i_mb_height*(16<<(CHROMA_FORMAT==CHROMA_422))+2*i_padv);
Dylan Yudaken's avatar
Dylan Yudaken committed
318 319

            if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
320 321
                //smart can weight one ref and one offset -1 in 8-bit
                numweightbuf = 1 + (BIT_DEPTH == 8);
Dylan Yudaken's avatar
Dylan Yudaken committed
322
            else
323
                //simple only has one weighted ref
Dylan Yudaken's avatar
Dylan Yudaken committed
324 325 326
                numweightbuf = 1;
        }

327
        for( int i = 0; i < numweightbuf; i++ )
328
            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
Dylan Yudaken's avatar
Dylan Yudaken committed
329 330
    }

331
    return 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
332 333
fail:
    return -1;
Laurent Aimar's avatar
Laurent Aimar committed
334
}
335
void x264_macroblock_cache_free( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
336
{
337
    for( int i = 0; i < 2; i++ )
338
        for( int j = !i; j < X264_REF_MAX*2; j++ )
339 340
            if( h->mb.mvr[i][j] )
                x264_free( h->mb.mvr[i][j]-1 );
341
    for( int i = 0; i < X264_REF_MAX; i++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
342 343
        x264_free( h->mb.p_weight_buf[i] );

Laurent Aimar's avatar
Laurent Aimar committed
344 345
    if( h->param.b_cabac )
    {
346
        x264_free( h->mb.skipbp );
Laurent Aimar's avatar
Laurent Aimar committed
347 348 349 350
        x264_free( h->mb.chroma_pred_mode );
        x264_free( h->mb.mvd[0] );
        x264_free( h->mb.mvd[1] );
    }
351
    x264_free( h->mb.slice_table );
Laurent Aimar's avatar
Laurent Aimar committed
352 353
    x264_free( h->mb.intra4x4_pred_mode );
    x264_free( h->mb.non_zero_count );
354
    x264_free( h->mb.mb_transform_size );
Laurent Aimar's avatar
Laurent Aimar committed
355 356 357
    x264_free( h->mb.cbp );
    x264_free( h->mb.qp );
}
358 359 360 361

int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
    if( !b_lookahead )
Simon Horlick's avatar
Simon Horlick committed
362
    {
363
        for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
364
            for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
365
            {
366
                CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
367
                h->intra_border_backup[i][j] += 16;
368
                if( !PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
369
                    h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
370
            }
371
        for( int i = 0; i <= PARAM_INTERLACED; i++ )
Simon Horlick's avatar
Simon Horlick committed
372
        {
373
            CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
374
            h->deblock_strength[1] = h->deblock_strength[i];
Fiona Glaser's avatar
Fiona Glaser committed
375
        }
Simon Horlick's avatar
Simon Horlick committed
376
    }
377 378 379 380 381

    /* Allocate scratch buffer */
    int scratch_size = 0;
    if( !b_lookahead )
    {
382
        int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
383 384 385
        int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
        int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
        int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
Loren Merritt's avatar
Loren Merritt committed
386
            ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
387
        scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
388
    }
Fiona Glaser's avatar
Fiona Glaser committed
389
    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
390
    scratch_size = X264_MAX( scratch_size, buf_mbtree );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
391 392 393 394
    if( scratch_size )
        CHECKED_MALLOC( h->scratch_buffer, scratch_size );
    else
        h->scratch_buffer = NULL;
395 396

    return 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
397 398
fail:
    return -1;
399 400 401 402 403
}

void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
{
    if( !b_lookahead )
Simon Horlick's avatar
Simon Horlick committed
404
    {
405
        for( int i = 0; i <= PARAM_INTERLACED; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
406
            x264_free( h->deblock_strength[i] );
407
        for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
408
            for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
409
                x264_free( h->intra_border_backup[i][j] - 16 );
Simon Horlick's avatar
Simon Horlick committed
410
    }
411 412 413
    x264_free( h->scratch_buffer );
}

414 415 416 417
void x264_macroblock_slice_init( x264_t *h )
{
    h->mb.mv[0] = h->fdec->mv[0];
    h->mb.mv[1] = h->fdec->mv[1];
418
    h->mb.mvr[0][0] = h->fdec->mv16x16;
419 420
    h->mb.ref[0] = h->fdec->ref[0];
    h->mb.ref[1] = h->fdec->ref[1];
421
    h->mb.type = h->fdec->mb_type;
422
    h->mb.partition = h->fdec->mb_partition;
423
    h->mb.field = h->fdec->field;
424

425 426 427 428
    h->fdec->i_ref[0] = h->i_ref[0];
    h->fdec->i_ref[1] = h->i_ref[1];
    for( int i = 0; i < h->i_ref[0]; i++ )
        h->fdec->ref_poc[0][i] = h->fref[0][i]->i_poc;
429 430
    if( h->sh.i_type == SLICE_TYPE_B )
    {
431 432
        for( int i = 0; i < h->i_ref[1]; i++ )
            h->fdec->ref_poc[1][i] = h->fref[1][i]->i_poc;
433

434 435
        map_col_to_list0(-1) = -1;
        map_col_to_list0(-2) = -2;
436
        for( int i = 0; i < h->fref[1][0]->i_ref[0]; i++ )
437
        {
438
            int poc = h->fref[1][0]->ref_poc[0][i];
439
            map_col_to_list0(i) = -2;
440 441
            for( int j = 0; j < h->i_ref[0]; j++ )
                if( h->fref[0][j]->i_poc == poc )
442
                {
443
                    map_col_to_list0(i) = j;
444 445 446
                    break;
                }
        }
447
    }
448 449
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
450
        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
451 452 453
        {
            deblock_ref_table(-2) = -2;
            deblock_ref_table(-1) = -1;
454
            for( int i = 0; i < h->i_ref[0] << SLICE_MBAFF; i++ )
455 456 457 458
            {
                /* Mask off high bits to avoid frame num collisions with -1/-2.
                 * In current x264 frame num values don't cover a range of more
                 * than 32, so 6 bits is enough for uniqueness. */
459
                if( !MB_INTERLACED )
460
                    deblock_ref_table(i) = h->fref[0][i]->i_frame_num&63;
461
                else
462
                    deblock_ref_table(i) = ((h->fref[0][i>>1]->i_frame_num&63)<<1) + (i&1);
463 464 465 466
            }
        }
    }

467
    /* init with not available (for top right idx=7,15) */
468
    memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
469

470
    if( h->i_ref[0] > 0 )
471
        for( int field = 0; field <= SLICE_MBAFF; field++ )
472
        {
Fiona Glaser's avatar
Fiona Glaser committed
473 474
            int curpoc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
            int refpoc = h->fref[0][0]->i_poc + h->fref[0][0]->i_delta_poc[field];
475 476 477 478
            int delta = curpoc - refpoc;

            h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta;
        }
479

480 481 482 483 484 485 486 487 488 489 490 491 492 493
    h->mb.i_neighbour4[6] =
    h->mb.i_neighbour4[9] =
    h->mb.i_neighbour4[12] =
    h->mb.i_neighbour4[14] = MB_LEFT|MB_TOP|MB_TOPLEFT|MB_TOPRIGHT;
    h->mb.i_neighbour4[3] =
    h->mb.i_neighbour4[7] =
    h->mb.i_neighbour4[11] =
    h->mb.i_neighbour4[13] =
    h->mb.i_neighbour4[15] =
    h->mb.i_neighbour8[3] = MB_LEFT|MB_TOP|MB_TOPLEFT;
}

void x264_macroblock_thread_init( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
494 495 496 497
    h->mb.i_me_method = h->param.analyse.i_me_method;
    h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
    if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
        h->mb.i_subpel_refine--;
498 499 500
    h->mb.b_chroma_me = h->param.analyse.b_chroma_me &&
                        ((h->sh.i_type == SLICE_TYPE_P && h->mb.i_subpel_refine >= 5) ||
                         (h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
Fiona Glaser's avatar
Fiona Glaser committed
501 502
    h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
                          (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
503
    h->mb.i_mb_prev_xy = -1;
Fiona Glaser's avatar
Fiona Glaser committed
504

Henrik Gramner's avatar
Henrik Gramner committed
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
    /*          4:2:0                      4:2:2                      4:4:4
     * fdec            fenc       fdec            fenc       fdec            fenc
     * y y y y y y y   Y Y Y Y    y y y y y y y   Y Y Y Y    y y y y y y y   Y Y Y Y
     * y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y
     * y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y
     * y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y    y Y Y Y Y       Y Y Y Y
     * y Y Y Y Y       U U V V    y Y Y Y Y       U U V V    y Y Y Y Y       U U U U
     * u u u   v v v   U U V V    u u u   v v v   U U V V    u u u u u u u   U U U U
     * u U U   v V V              u U U   v V V   U U V V    u U U U U       U U U U
     * u U U   v V V              u U U   v V V   U U V V    u U U U U       U U U U
     *                            u U U   v V V              u U U U U       V V V V
     *                            u U U   v V V              u U U U U       V V V V
     *                                                       v v v v v v v   V V V V
     *                                                       v V V V V       V V V V
     *                                                       v V V V V
     *                                                       v V V V V
     *                                                       v V V V V
     */
Fiona Glaser's avatar
Fiona Glaser committed
523 524 525 526 527 528 529 530 531 532 533 534 535 536
    h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
    h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
    h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
    h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
    if( CHROMA444 )
    {
        h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
        h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 36*FDEC_STRIDE;
    }
    else
    {
        h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
        h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;
    }
537
}
Laurent Aimar's avatar
Laurent Aimar committed
538

539 540 541 542
void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
{
    int stride_y  = fenc->i_stride[0];
    int stride_uv = fenc->i_stride[1];
543
    int off_y  = 16 * i_mb_x + 16 * i_mb_y * stride_y;
544
    int off_uv = 16 * i_mb_x + (16 * i_mb_y * stride_uv >> CHROMA_V_SHIFT);
545
    h->mc.prefetch_fenc( fenc->plane[0]+off_y, stride_y,
546
                         fenc->plane[1]+off_uv, stride_uv, i_mb_x );
547
}
Laurent Aimar's avatar
Laurent Aimar committed
548

549
NOINLINE void x264_copy_column8( pixel *dst, pixel *src )
550
{
551
    // input pointers are offset by 4 rows because that's faster (smaller instruction size on x86)
552
    for( int i = -4; i < 4; i++ )
553 554 555
        dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}

Fiona Glaser's avatar
Fiona Glaser committed
556
static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
557
{
Fiona Glaser's avatar
Fiona Glaser committed
558
    int mb_interlaced = b_mbaff && MB_INTERLACED;
559
    int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16;
560
    int i_stride = h->fdec->i_stride[i];
Fiona Glaser's avatar
Fiona Glaser committed
561 562
    int i_stride2 = i_stride << mb_interlaced;
    int i_pix_offset = mb_interlaced
Henrik Gramner's avatar
Henrik Gramner committed
563 564
                     ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
                     : 16 * mb_x + height * mb_y * i_stride;
565
    pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
Fiona Glaser's avatar
Fiona Glaser committed
566
    int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
Simon Horlick's avatar
Simon Horlick committed
567
    pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
568
    int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
Simon Horlick's avatar
Simon Horlick committed
569
    /* ref_pix_offset[0] references the current field and [1] the opposite field. */
Fiona Glaser's avatar
Fiona Glaser committed
570
    if( mb_interlaced )
571
        ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
572
    h->mb.pic.i_stride[i] = i_stride2;
573
    h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
Fiona Glaser's avatar
Fiona Glaser committed
574
    if( b_chroma )
575
    {
Henrik Gramner's avatar
Henrik Gramner committed
576
        h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
577 578
        memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
        memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
Simon Horlick's avatar
Simon Horlick committed
579 580 581 582 583
        if( b_mbaff )
        {
            h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
            h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
        }
584 585 586
    }
    else
    {
Fiona Glaser's avatar
Fiona Glaser committed
587 588
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
        memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
Simon Horlick's avatar
Simon Horlick committed
589
        if( b_mbaff )
Fiona Glaser's avatar
Fiona Glaser committed
590
            h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
591
    }
Simon Horlick's avatar
Simon Horlick committed
592
    if( b_mbaff )
593
    {
Henrik Gramner's avatar
Henrik Gramner committed
594
        for( int j = 0; j < height; j++ )
Fiona Glaser's avatar
Fiona Glaser committed
595
            if( b_chroma )
596 597 598 599 600
            {
                h->mb.pic.p_fdec[1][-1+j*FDEC_STRIDE] = plane_fdec[-2+j*i_stride2];
                h->mb.pic.p_fdec[2][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
            }
            else
Fiona Glaser's avatar
Fiona Glaser committed
601
                h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
602
    }
Simon Horlick's avatar
Simon Horlick committed
603
    pixel *plane_src, **filtered_src;
604
    for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
605
    {
Simon Horlick's avatar
Simon Horlick committed
606
        // Interpolate between pixels in same field.
Fiona Glaser's avatar
Fiona Glaser committed
607
        if( mb_interlaced )
Simon Horlick's avatar
Simon Horlick committed
608 609
        {
            plane_src = h->fref[0][j>>1]->plane_fld[i];
Fiona Glaser's avatar
Fiona Glaser committed
610
            filtered_src = h->fref[0][j>>1]->filtered_fld[i];
Simon Horlick's avatar
Simon Horlick committed
611 612 613 614
        }
        else
        {
            plane_src = h->fref[0][j]->plane[i];
Fiona Glaser's avatar
Fiona Glaser committed
615
            filtered_src = h->fref[0][j]->filtered[i];
Simon Horlick's avatar
Simon Horlick committed
616
        }
Fiona Glaser's avatar
Fiona Glaser committed
617
        h->mb.pic.p_fref[0][j][i*4] = plane_src + ref_pix_offset[j&1];
Simon Horlick's avatar
Simon Horlick committed
618

Fiona Glaser's avatar
Fiona Glaser committed
619
        if( !b_chroma )
Dylan Yudaken's avatar
Dylan Yudaken committed
620
        {
621
            for( int k = 1; k < 4; k++ )
Fiona Glaser's avatar
Fiona Glaser committed
622 623 624 625 626 627 628 629
                h->mb.pic.p_fref[0][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1];
            if( !i )
            {
                if( h->sh.weight[j][0].weightfn )
                    h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> mb_interlaced][ref_pix_offset[j&1]];
                else
                    h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
            }
Dylan Yudaken's avatar
Dylan Yudaken committed
630
        }
Fiona Glaser's avatar
Fiona Glaser committed
631 632
    }
    if( h->sh.i_type == SLICE_TYPE_B )
633
        for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
634
        {
Fiona Glaser's avatar
Fiona Glaser committed
635
            if( mb_interlaced )
Simon Horlick's avatar
Simon Horlick committed
636 637
            {
                plane_src = h->fref[1][j>>1]->plane_fld[i];
Fiona Glaser's avatar
Fiona Glaser committed
638
                filtered_src = h->fref[1][j>>1]->filtered_fld[i];
Simon Horlick's avatar
Simon Horlick committed
639 640 641 642
            }
            else
            {
                plane_src = h->fref[1][j]->plane[i];
Fiona Glaser's avatar
Fiona Glaser committed
643
                filtered_src = h->fref[1][j]->filtered[i];
Simon Horlick's avatar
Simon Horlick committed
644
            }
Fiona Glaser's avatar
Fiona Glaser committed
645
            h->mb.pic.p_fref[1][j][i*4] = plane_src + ref_pix_offset[j&1];
Simon Horlick's avatar
Simon Horlick committed
646

Fiona Glaser's avatar
Fiona Glaser committed
647
            if( !b_chroma )
648
                for( int k = 1; k < 4; k++ )
Fiona Glaser's avatar
Fiona Glaser committed
649
                    h->mb.pic.p_fref[1][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1];
650 651 652
        }
}

Fiona Glaser's avatar
Fiona Glaser committed
653
static const x264_left_table_t left_indices[4] =
654 655
{
    /* Current is progressive */
Fiona Glaser's avatar
Fiona Glaser committed
656 657
    {{ 4, 4, 5, 5}, { 3,  3,  7,  7}, {16+1, 16+1, 32+1, 32+1}, {0, 0, 1, 1}, {0, 0, 0, 0}},
    {{ 6, 6, 3, 3}, {11, 11, 15, 15}, {16+5, 16+5, 32+5, 32+5}, {2, 2, 3, 3}, {1, 1, 1, 1}},
658
    /* Current is interlaced */
Fiona Glaser's avatar
Fiona Glaser committed
659
    {{ 4, 6, 4, 6}, { 3, 11,  3, 11}, {16+1, 16+1, 32+1, 32+1}, {0, 2, 0, 2}, {0, 1, 0, 1}},
660
    /* Both same */
Fiona Glaser's avatar
Fiona Glaser committed
661
    {{ 4, 5, 6, 3}, { 3,  7, 11, 15}, {16+1, 16+5, 32+1, 32+5}, {0, 1, 2, 3}, {0, 0, 1, 1}}
662 663
};

664
static void ALWAYS_INLINE x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
Laurent Aimar's avatar
Laurent Aimar committed
665
{
666 667
    const int mb_interlaced = b_interlaced && MB_INTERLACED;
    int top_y = mb_y - (1 << mb_interlaced);
Simon Horlick's avatar
Simon Horlick committed
668
    int top = top_y * h->mb.i_mb_stride + mb_x;
669

670 671 672 673 674
    h->mb.i_mb_x = mb_x;
    h->mb.i_mb_y = mb_y;
    h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
    h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
    h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
Simon Horlick's avatar
Simon Horlick committed
675 676 677 678
    h->mb.left_b8[0] =
    h->mb.left_b8[1] = -1;
    h->mb.left_b4[0] =
    h->mb.left_b4[1] = -1;
Laurent Aimar's avatar
Laurent Aimar committed
679
    h->mb.i_neighbour = 0;
680
    h->mb.i_neighbour_intra = 0;
681 682
    h->mb.i_neighbour_frame = 0;
    h->mb.i_mb_top_xy = -1;
Simon Horlick's avatar
Simon Horlick committed
683
    h->mb.i_mb_top_y = -1;
684
    h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
685 686 687
    h->mb.i_mb_topleft_xy = -1;
    h->mb.i_mb_topright_xy = -1;
    h->mb.i_mb_type_top = -1;
688
    h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
689 690
    h->mb.i_mb_type_topleft = -1;
    h->mb.i_mb_type_topright = -1;
691
    h->mb.left_index_table = &left_indices[3];
Simon Horlick's avatar
Simon Horlick committed
692 693 694 695 696 697 698 699 700 701
    h->mb.topleft_partition = 0;

    int topleft_y = top_y;
    int topright_y = top_y;
    int left[2];

    left[0] = left[1] = h->mb.i_mb_xy - 1;
    h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
    h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;

702
    if( b_interlaced )
Simon Horlick's avatar
Simon Horlick committed
703
    {
704 705 706 707
        h->mb.i_mb_top_mbpair_xy = h->mb.i_mb_xy - 2*h->mb.i_mb_stride;
        h->mb.i_mb_topleft_y = -1;
        h->mb.i_mb_topright_y = -1;

Simon Horlick's avatar
Simon Horlick committed
708 709
        if( mb_y&1 )
        {
710
            if( mb_x && mb_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
Simon Horlick's avatar
Simon Horlick committed
711 712 713 714 715
            {
                left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
                h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
                h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;

716
                if( mb_interlaced )
Simon Horlick's avatar
Simon Horlick committed
717 718 719 720 721 722 723 724 725 726 727 728 729
                {
                    h->mb.left_index_table = &left_indices[2];
                    left[1] += h->mb.i_mb_stride;
                    h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
                    h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
                }
                else
                {
                    h->mb.left_index_table = &left_indices[1];
                    topleft_y++;
                    h->mb.topleft_partition = 1;
                }
            }
730
            if( !mb_interlaced )
Simon Horlick's avatar
Simon Horlick committed
731 732 733 734
                topright_y = -1;
        }
        else
        {
735
            if( mb_interlaced && top >= 0 )
Simon Horlick's avatar
Simon Horlick committed
736 737 738 739 740 741 742 743 744 745 746
            {
                if( !h->mb.field[top] )
                {
                    top += h->mb.i_mb_stride;
                    top_y++;
                }
                if( mb_x )
                    topleft_y += !h->mb.field[h->mb.i_mb_stride*topleft_y + mb_x - 1];
                if( mb_x < h->mb.i_mb_width-1 )
                    topright_y += !h->mb.field[h->mb.i_mb_stride*topright_y + mb_x + 1];
            }
747
            if( mb_x && mb_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
Simon Horlick's avatar
Simon Horlick committed
748
            {
749
                if( mb_interlaced )
Simon Horlick's avatar
Simon Horlick committed
750 751 752 753 754 755 756 757 758 759 760
                {
                    h->mb.left_index_table = &left_indices[2];
                    left[1] += h->mb.i_mb_stride;
                    h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
                    h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
                }
                else
                    h->mb.left_index_table = &left_indices[0];
            }
        }
    }
761 762

    if( mb_x > 0 )
Laurent Aimar's avatar
Laurent Aimar committed
763
    {
764
        h->mb.i_neighbour_frame |= MB_LEFT;
Simon Horlick's avatar
Simon Horlick committed
765 766
        h->mb.i_mb_left_xy[0] = left[0];
        h->mb.i_mb_left_xy[1] = left[1];
767
        h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
Simon Horlick's avatar
Simon Horlick committed
768
        h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
769
        if( h->mb.slice_table[left[0]] == h->sh.i_first_mb )
770 771 772
        {
            h->mb.i_neighbour |= MB_LEFT;

Simon Horlick's avatar
Simon Horlick committed
773
            // FIXME: We don't currently support constrained intra + mbaff.
774
            if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
775 776 777 778
                h->mb.i_neighbour_intra |= MB_LEFT;
        }
    }

779
    /* We can't predict from the previous threadslice since it hasn't been encoded yet. */
780
    if( (h->i_threadslice_start >> mb_interlaced) != (mb_y >> mb_interlaced) )
781
    {
782
        if( top >= 0 )
783
        {
784 785
            h->mb.i_neighbour_frame |= MB_TOP;
            h->mb.i_mb_top_xy = top;
Simon Horlick's avatar
Simon Horlick committed
786
            h->mb.i_mb_top_y = top_y;
787
            h->mb.i_mb_type_top = h->mb.type[h->mb.i_mb_top_xy];
788
            if( h->mb.slice_table[top] == h->sh.i_first_mb )
789 790 791 792 793
            {
                h->mb.i_neighbour |= MB_TOP;

                if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_top ) )
                    h->mb.i_neighbour_intra |= MB_TOP;
794 795 796 797 798 799 800 801 802 803

                /* We only need to prefetch the top blocks because the left was just written
                 * to as part of the previous cache_save.  Since most target CPUs use write-allocate
                 * caches, left blocks are near-guaranteed to be in L1 cache.  Top--not so much. */
                x264_prefetch( &h->mb.cbp[top] );
                x264_prefetch( h->mb.intra4x4_pred_mode[top] );
                x264_prefetch( &h->mb.non_zero_count[top][12] );
                /* These aren't always allocated, but prefetching an invalid address can't hurt. */
                x264_prefetch( &h->mb.mb_transform_size[top] );
                x264_prefetch( &h->mb.skipbp[top] );
804 805 806
            }
        }

Simon Horlick's avatar
Simon Horlick committed
807
        if( mb_x > 0 && topleft_y >= 0  )
808 809
        {
            h->mb.i_neighbour_frame |= MB_TOPLEFT;
Simon Horlick's avatar
Simon Horlick committed
810 811
            h->mb.i_mb_topleft_xy = h->mb.i_mb_stride*topleft_y + mb_x - 1;
            h->mb.i_mb_topleft_y = topleft_y;
Fiona Glaser's avatar
Fiona Glaser committed
812
            h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
813
            if( h->mb.slice_table[h->mb.i_mb_topleft_xy] == h->sh.i_first_mb )
814 815
            {
                h->mb.i_neighbour |= MB_TOPLEFT;
816

817 818 819
                if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topleft ) )
                    h->mb.i_neighbour_intra |= MB_TOPLEFT;
            }
820 821
        }

Simon Horlick's avatar
Simon Horlick committed
822
        if( mb_x < h->mb.i_mb_width - 1 && topright_y >= 0 )
823
        {
824
            h->mb.i_neighbour_frame |= MB_TOPRIGHT;
Simon Horlick's avatar
Simon Horlick committed
825 826
            h->mb.i_mb_topright_xy = h->mb.i_mb_stride*topright_y + mb_x + 1;
            h->mb.i_mb_topright_y = topright_y;
Fiona Glaser's avatar
Fiona Glaser committed
827
            h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
828
            if( h->mb.slice_table[h->mb.i_mb_topright_xy] == h->sh.i_first_mb )
829 830
            {
                h->mb.i_neighbour |= MB_TOPRIGHT;
831

832 833 834
                if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topright ) )
                    h->mb.i_neighbour_intra |= MB_TOPRIGHT;
            }
835 836 837
        }
    }
}
Laurent Aimar's avatar
Laurent Aimar committed
838

839 840 841 842 843 844 845
#define LTOP 0
#if HAVE_INTERLACED
#   define LBOT 1
#else
#   define LBOT 0
#endif

846
static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
847
{
848
    x264_macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
Laurent Aimar's avatar
Laurent Aimar committed
849

Simon Horlick's avatar
Simon Horlick committed
850
    int *left = h->mb.i_mb_left_xy;
851
    int top  = h->mb.i_mb_top_xy;
Simon Horlick's avatar
Simon Horlick committed
852
    int top_y = h->mb.i_mb_top_y;
853 854 855 856 857
    int s8x8 = h->mb.i_b8_stride;
    int s4x4 = h->mb.i_b4_stride;
    int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
    int top_4x4 = (4*top_y+3) * s4x4 + 4*mb_x;
    int lists = (1 << h->sh.i_type) & 3;
858

859 860 861
    /* GCC pessimizes direct loads from heap-allocated arrays due to aliasing. */
    /* By only dereferencing them once, we avoid this issue. */
    int8_t (*i4x4)[8] = h->mb.intra4x4_pred_mode;
Fiona Glaser's avatar
Fiona Glaser committed
862
    uint8_t (*nnz)[48] = h->mb.non_zero_count;
863 864
    int16_t *cbp = h->mb.cbp;

Fiona Glaser's avatar
Fiona Glaser committed
865
    const x264_left_table_t *left_index_table = h->mb.left_index_table;
866

867 868 869
    /* load cache */
    if( h->mb.i_neighbour & MB_TOP )
    {
870
        h->mb.cache.i_cbp_top = cbp[top];
Laurent Aimar's avatar
Laurent Aimar committed
871
        /* load intra4x4 */
872
        CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &i4x4[top][0] );
Laurent Aimar's avatar
Laurent Aimar committed
873 874

        /* load non_zero_count */
Fiona Glaser's avatar
Fiona Glaser committed
875
        CP32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8], &nnz[top][12] );
876 877
        CP32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8], &nnz[top][16-4 + (16>>CHROMA_V_SHIFT)] );
        CP32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8], &nnz[top][32-4 + (16>>CHROMA_V_SHIFT)] );
878 879 880 881 882 883 884 885 886 887 888

        /* Finish the prefetching */
        for( int l = 0; l < lists; l++ )
        {
            x264_prefetch( &h->mb.mv[l][top_4x4-1] );
            /* Top right being not in the same cacheline as top left will happen
             * once every 4 MBs, so one extra prefetch is worthwhile */
            x264_prefetch( &h->mb.mv[l][top_4x4+4] );
            x264_prefetch( &h->mb.ref[l][top_8x8-1] );
            x264_prefetch( &h->mb.mvd[l][top] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
889 890 891
    }
    else
    {
892
        h->mb.cache.i_cbp_top = -1;
893

Laurent Aimar's avatar
Laurent Aimar committed
894
        /* load intra4x4 */
Fiona Glaser's avatar
Fiona Glaser committed
895
        M32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] ) = 0xFFFFFFFFU;
Laurent Aimar's avatar
Laurent Aimar committed
896 897

        /* load non_zero_count */
Fiona Glaser's avatar
Fiona Glaser committed
898 899 900
        M32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8] ) = 0x80808080U;
        M32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8] ) = 0x80808080U;
        M32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8] ) = 0x80808080U;
Laurent Aimar's avatar
Laurent Aimar committed
901 902
    }

903
    if( h->mb.i_neighbour & MB_LEFT )
Laurent Aimar's avatar
Laurent Aimar committed
904
    {
Fiona Glaser's avatar
Fiona Glaser committed
905 906
        int ltop = left[LTOP];
        int lbot = b_mbaff ? left[LBOT] : ltop;
907
        if( b_mbaff )
908
        {
Fiona Glaser's avatar
Fiona Glaser committed
909 910 911
            const int16_t top_luma = (cbp[ltop] >> (left_index_table->mv[0]&(~1))) & 2;
            const int16_t bot_luma = (cbp[lbot] >> (left_index_table->mv[2]&(~1))) & 2;
            h->mb.cache.i_cbp_left = (cbp[ltop] & 0xfff0) | (bot_luma<<2) | top_luma;
912 913
        }
        else
Fiona Glaser's avatar
Fiona Glaser committed
914 915 916