macroblock.c 55.1 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
2
3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23
24
25
 *****************************************************************************/

#include "common.h"
26
#include "encoder/me.h"
Laurent Aimar's avatar
Laurent Aimar committed
27

28
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
Laurent Aimar's avatar
Laurent Aimar committed
29
30
31
32
33
34
35
{
    const int i8 = x264_scan8[idx];
    const int i_ref= h->mb.cache.ref[i_list][i8];
    int     i_refa = h->mb.cache.ref[i_list][i8 - 1];
    int16_t *mv_a  = h->mb.cache.mv[i_list][i8 - 1];
    int     i_refb = h->mb.cache.ref[i_list][i8 - 8];
    int16_t *mv_b  = h->mb.cache.mv[i_list][i8 - 8];
Fiona Glaser's avatar
Fiona Glaser committed
36
    int     i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
Laurent Aimar's avatar
Laurent Aimar committed
37
38
    int16_t *mv_c  = h->mb.cache.mv[i_list][i8 - 8 + i_width];

Fiona Glaser's avatar
Fiona Glaser committed
39
    int i_count = 0;
Laurent Aimar's avatar
Laurent Aimar committed
40

Fiona Glaser's avatar
Fiona Glaser committed
41
    if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
Laurent Aimar's avatar
Laurent Aimar committed
42
43
44
45
46
47
48
    {
        i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
        mv_c   = h->mb.cache.mv[i_list][i8 - 8 - 1];
    }

    if( h->mb.i_partition == D_16x8 )
    {
Fiona Glaser's avatar
Fiona Glaser committed
49
        if( idx == 0 )
Laurent Aimar's avatar
Laurent Aimar committed
50
        {
Fiona Glaser's avatar
Fiona Glaser committed
51
52
53
54
55
            if( i_refb == i_ref )
            {
                *(uint32_t*)mvp = *(uint32_t*)mv_b;
                return;
            }
Laurent Aimar's avatar
Laurent Aimar committed
56
        }
Fiona Glaser's avatar
Fiona Glaser committed
57
        else
Laurent Aimar's avatar
Laurent Aimar committed
58
        {
Fiona Glaser's avatar
Fiona Glaser committed
59
60
61
62
63
            if( i_refa == i_ref )
            {
                *(uint32_t*)mvp = *(uint32_t*)mv_a;
                return;
            }
Laurent Aimar's avatar
Laurent Aimar committed
64
65
66
67
        }
    }
    else if( h->mb.i_partition == D_8x16 )
    {
Fiona Glaser's avatar
Fiona Glaser committed
68
        if( idx == 0 )
Laurent Aimar's avatar
Laurent Aimar committed
69
        {
Fiona Glaser's avatar
Fiona Glaser committed
70
71
72
73
74
            if( i_refa == i_ref )
            {
                *(uint32_t*)mvp = *(uint32_t*)mv_a;
                return;
            }
Laurent Aimar's avatar
Laurent Aimar committed
75
        }
Fiona Glaser's avatar
Fiona Glaser committed
76
        else
Laurent Aimar's avatar
Laurent Aimar committed
77
        {
Fiona Glaser's avatar
Fiona Glaser committed
78
79
80
81
82
            if( i_refc == i_ref )
            {
                *(uint32_t*)mvp = *(uint32_t*)mv_c;
                return;
            }
Laurent Aimar's avatar
Laurent Aimar committed
83
84
85
86
87
88
89
90
        }
    }

    if( i_refa == i_ref ) i_count++;
    if( i_refb == i_ref ) i_count++;
    if( i_refc == i_ref ) i_count++;

    if( i_count > 1 )
Fiona Glaser's avatar
Fiona Glaser committed
91
92
    {
median:
Loren Merritt's avatar
Loren Merritt committed
93
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Fiona Glaser's avatar
Fiona Glaser committed
94
    }
Laurent Aimar's avatar
Laurent Aimar committed
95
96
97
    else if( i_count == 1 )
    {
        if( i_refa == i_ref )
98
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
99
        else if( i_refb == i_ref )
100
            *(uint32_t*)mvp = *(uint32_t*)mv_b;
Laurent Aimar's avatar
Laurent Aimar committed
101
        else
102
            *(uint32_t*)mvp = *(uint32_t*)mv_c;
Laurent Aimar's avatar
Laurent Aimar committed
103
104
    }
    else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
105
        *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
106
    else
Fiona Glaser's avatar
Fiona Glaser committed
107
        goto median;
Laurent Aimar's avatar
Laurent Aimar committed
108
109
}

110
void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
Laurent Aimar's avatar
Laurent Aimar committed
111
112
113
114
115
116
117
118
{
    int     i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
    int16_t *mv_a  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
    int     i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
    int16_t *mv_b  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
    int     i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
    int16_t *mv_c  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];

Fiona Glaser's avatar
Fiona Glaser committed
119
    int i_count = 0;
Laurent Aimar's avatar
Laurent Aimar committed
120
121
122
123
124
125
126
127
128
129
130
131

    if( i_refc == -2 )
    {
        i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
        mv_c   = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
    }

    if( i_refa == i_ref ) i_count++;
    if( i_refb == i_ref ) i_count++;
    if( i_refc == i_ref ) i_count++;

    if( i_count > 1 )
Fiona Glaser's avatar
Fiona Glaser committed
132
133
    {
median:
Loren Merritt's avatar
Loren Merritt committed
134
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Fiona Glaser's avatar
Fiona Glaser committed
135
    }
Laurent Aimar's avatar
Laurent Aimar committed
136
137
138
    else if( i_count == 1 )
    {
        if( i_refa == i_ref )
139
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
140
        else if( i_refb == i_ref )
141
            *(uint32_t*)mvp = *(uint32_t*)mv_b;
Laurent Aimar's avatar
Laurent Aimar committed
142
        else
143
            *(uint32_t*)mvp = *(uint32_t*)mv_c;
Laurent Aimar's avatar
Laurent Aimar committed
144
145
    }
    else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
146
        *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
147
    else
Fiona Glaser's avatar
Fiona Glaser committed
148
        goto median;
Laurent Aimar's avatar
Laurent Aimar committed
149
150
151
}


152
void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
Laurent Aimar's avatar
Laurent Aimar committed
153
154
155
156
157
158
159
{
    int     i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
    int     i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
    int16_t *mv_a  = h->mb.cache.mv[0][X264_SCAN8_0 - 1];
    int16_t *mv_b  = h->mb.cache.mv[0][X264_SCAN8_0 - 8];

    if( i_refa == -2 || i_refb == -2 ||
160
161
        !( i_refa | *(uint32_t*)mv_a ) ||
        !( i_refb | *(uint32_t*)mv_b ) )
Laurent Aimar's avatar
Laurent Aimar committed
162
    {
163
        *(uint32_t*)mv = 0;
Laurent Aimar's avatar
Laurent Aimar committed
164
165
166
167
168
169
170
    }
    else
    {
        x264_mb_predict_mv_16x16( h, 0, 0, mv );
    }
}

171
172
173
174
static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
{
    int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
    int i_mb_8x8 =  4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
Fiona Glaser's avatar
Fiona Glaser committed
175
    int i8;
Loren Merritt's avatar
Loren Merritt committed
176
    const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
Loren Merritt's avatar
Loren Merritt committed
177

178
    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
Loren Merritt's avatar
Loren Merritt committed
179

Loren Merritt's avatar
Loren Merritt committed
180
    if( IS_INTRA( type_col ) )
181
182
    {
        x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
183
184
        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, 0 );
        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, 0 );
185
186
187
        return 1;
    }

Loren Merritt's avatar
Loren Merritt committed
188
    for( i8 = 0; i8 < 4; i8++ )
189
    {
Loren Merritt's avatar
Loren Merritt committed
190
191
192
        const int x8 = i8%2;
        const int y8 = i8/2;
        const int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
193
        const int i_ref = h->mb.map_col_to_list0[ h->fref1[0]->ref[0][ i_part_8x8 ] ];
194

195
        if( i_ref >= 0 )
196
        {
197
            const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
198
199
200
            const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
            const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
            const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
Fiona Glaser's avatar
Fiona Glaser committed
201
202
            if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
                return 0;
Loren Merritt's avatar
Loren Merritt committed
203
            x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
204
205
            x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
            x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
206
        }
207
208
        else
        {
Loren Merritt's avatar
Loren Merritt committed
209
            /* the collocated ref isn't in the current list0 */
210
            /* FIXME: we might still be able to use direct_8x8 on some partitions */
211
212
213
            /* FIXME: with B-pyramid + extensive ref list reordering
             *   (not currently used), we would also have to check
             *   l1mv1 like in spatial mode */
214
215
            return 0;
        }
216
217
218
219
220
221
222
223
    }

    return 1;
}

static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
    int ref[2];
224
    ALIGNED_8( int16_t mv[2][2] );
225
    int i_list;
226
    int i8;
227
228
229
230
    const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
    const int8_t *l1ref1 = &h->fref1[0]->ref[1][ h->mb.i_b8_xy ];
    const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ];
    const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[1][ h->mb.i_b4_xy ];
Loren Merritt's avatar
Loren Merritt committed
231
    const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

    for( i_list=0; i_list<2; i_list++ )
    {
        int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
        int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
        int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
        if( i_refc == -2 )
            i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];

        ref[i_list] = i_refa;
        if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
            ref[i_list] = i_refb;
        if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
            ref[i_list] = i_refc;
        if( ref[i_list] < 0 )
            ref[i_list] = -1;
    }

    if( ref[0] < 0 && ref[1] < 0 )
    {
252
253
254
255
256
        x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
        x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
        x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
        x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
        return 1;
257
    }
258
259
260

    if( ref[0] >= 0 )
        x264_mb_predict_mv_16x16( h, 0, ref[0], mv[0] );
261
    else
262
263
264
265
266
        *(uint32_t*)mv[0] = 0;
    if( ref[1] >= 0 )
        x264_mb_predict_mv_16x16( h, 1, ref[1], mv[1] );
    else
        *(uint32_t*)mv[1] = 0;
267
268
269

    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
270
271
    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
272

Loren Merritt's avatar
Loren Merritt committed
273
274
275
276
277
278
279
280
281
282
283
284
    if( h->param.i_threads > 1
        && ( mv[0][1] > h->mb.mv_max_spel[1]
          || mv[1][1] > h->mb.mv_max_spel[1] ) )
    {
#if 0
        fprintf(stderr, "direct_spatial: (%d,%d) (%d,%d) > %d \n",
                mv[0][0], mv[0][1], mv[1][0], mv[1][1],
                h->mb.mv_max_spel[1]);
#endif
        return 0;
    }

285
286
287
    if( IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
        return 1;

288
289
290
291
292
    /* col_zero_flag */
    for( i8=0; i8<4; i8++ )
    {
        const int x8 = i8%2;
        const int y8 = i8/2;
293
        const int o8 = x8 + y8 * h->mb.i_b8_stride;
294
295
        const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
        if( l1ref0[o8] == 0 )
296
        {
297
            if( abs( l1mv0[o4][0] ) <= 1 && abs( l1mv0[o4][1] ) <= 1 )
298
            {
299
300
301
302
303
304
305
306
307
308
                if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
                if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
            }
        }
        else if( l1ref0[o8] < 0 && l1ref1[o8] == 0 )
        {
            if( abs( l1mv1[o4][0] ) <= 1 && abs( l1mv1[o4][1] ) <= 1 )
            {
                if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
                if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
309
310
311
312
313
314
315
            }
        }
    }

    return 1;
}

Loren Merritt's avatar
Loren Merritt committed
316
int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
317
318
319
320
321
322
323
324
325
{
    int b_available;
    if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
        return 0;
    else if( h->sh.b_direct_spatial_mv_pred )
        b_available = x264_mb_predict_mv_direct16x16_spatial( h );
    else
        b_available = x264_mb_predict_mv_direct16x16_temporal( h );

Loren Merritt's avatar
Loren Merritt committed
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
    if( b_changed != NULL && b_available )
    {
        int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
        if( IS_INTRA(type_col) || type_col == P_SKIP )
        {
            *b_changed = h->mb.cache.direct_ref[0][0] != h->mb.cache.ref[0][X264_SCAN8_0]
                      || h->mb.cache.direct_ref[1][0] != h->mb.cache.ref[1][X264_SCAN8_0]
                      || *(uint32_t*)h->mb.cache.direct_mv[0][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[0][X264_SCAN8_0]
                      || *(uint32_t*)h->mb.cache.direct_mv[1][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[1][X264_SCAN8_0];
        }
        else
        {
            int i, l;
            *b_changed = 0;
            for( l = 0; l < 2; l++ )
                for( i = 0; i < 4; i++ )
                    *b_changed |= h->mb.cache.direct_ref[l][i] != h->mb.cache.ref[l][x264_scan8[i*4]];
            *b_changed = *b_changed || memcmp(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
        }
        if( !*b_changed )
            return b_available;
    }

349
350
351
352
353
354
355
    /* cache ref & mv */
    if( b_available )
    {
        int i, l;
        for( l = 0; l < 2; l++ )
            for( i = 0; i < 4; i++ )
                h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
Fiona Glaser's avatar
Fiona Glaser committed
356
        h->mc.memcpy_aligned(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
357
358
359
360
361
362
363
364
365
366
367
    }

    return b_available;
}

void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
{
    const int x = 2*(idx%2);
    const int y = 2*(idx/2);
    x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
    x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
Fiona Glaser's avatar
Fiona Glaser committed
368
369
370
371
372
373
374
375
    *(uint64_t*)h->mb.cache.mv[0][x264_scan8[idx*4]] =
    *(uint64_t*)h->mb.cache.direct_mv[0][x264_scan8[idx*4]];
    *(uint64_t*)h->mb.cache.mv[0][x264_scan8[idx*4]+8] =
    *(uint64_t*)h->mb.cache.direct_mv[0][x264_scan8[idx*4]+8];
    *(uint64_t*)h->mb.cache.mv[1][x264_scan8[idx*4]] =
    *(uint64_t*)h->mb.cache.direct_mv[1][x264_scan8[idx*4]];
    *(uint64_t*)h->mb.cache.mv[1][x264_scan8[idx*4]+8] =
    *(uint64_t*)h->mb.cache.direct_mv[1][x264_scan8[idx*4]+8];
376
377
378
}

/* This just improves encoder performance, it's not part of the spec */
379
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[9][2], int *i_mvc )
380
381
382
{
    int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
    int i = 0;
383

384
#define SET_MVP(mvp) { \
385
        *(uint32_t*)mvc[i] = *(uint32_t*)mvp; \
386
387
388
389
390
391
        i++; \
    }

    /* b_direct */
    if( h->sh.i_type == SLICE_TYPE_B
        && h->mb.cache.ref[i_list][x264_scan8[12]] == i_ref )
392
    {
393
        SET_MVP( h->mb.cache.mv[i_list][x264_scan8[12]] );
394
395
    }

396
397
398
399
400
401
402
    if( i_ref == 0 && h->frames.b_have_lowres )
    {
        int16_t (*lowres_mv)[2] = i_list ? h->fenc->lowres_mvs[1][h->fref1[0]->i_frame-h->fenc->i_frame-1]
                                         : h->fenc->lowres_mvs[0][h->fenc->i_frame-h->fref0[0]->i_frame-1];
        if( lowres_mv[0][0] != 0x7fff ) *(uint32_t*)mvc[i++] = (*(uint32_t*)lowres_mv[h->mb.i_mb_xy]*2)&0xfffeffff;
    }

403
    /* spatial predictors */
Loren Merritt's avatar
Loren Merritt committed
404
    if( h->mb.i_neighbour & MB_LEFT )
405
406
    {
        int i_mb_l = h->mb.i_mb_xy - 1;
407
408
        /* skip MBs didn't go through the whole search process, so mvr is undefined */
        if( !IS_SKIP( h->mb.type[i_mb_l] ) )
409
            SET_MVP( mvr[i_mb_l] );
410
    }
Loren Merritt's avatar
Loren Merritt committed
411
    if( h->mb.i_neighbour & MB_TOP )
412
    {
413
        int i_mb_t = h->mb.i_mb_top_xy;
414
        if( !IS_SKIP( h->mb.type[i_mb_t] ) )
415
            SET_MVP( mvr[i_mb_t] );
416

Loren Merritt's avatar
Loren Merritt committed
417
        if( h->mb.i_neighbour & MB_TOPLEFT && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
418
            SET_MVP( mvr[i_mb_t-1] );
419
        if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 && !IS_SKIP( h->mb.type[i_mb_t + 1] ) )
420
421
422
423
424
            SET_MVP( mvr[i_mb_t+1] );
    }
#undef SET_MVP

    /* temporal predictors */
425
426
    /* FIXME temporal scaling w/ interlace */
    if( h->fref0[0]->i_ref[0] > 0 && !h->sh.b_mbaff )
427
428
429
430
431
432
    {
        x264_frame_t *l0 = h->fref0[0];

#define SET_TMVP(dx, dy) { \
            int i_b4 = h->mb.i_b4_xy + dx*4 + dy*4*h->mb.i_b4_stride; \
            int i_b8 = h->mb.i_b8_xy + dx*2 + dy*2*h->mb.i_b8_stride; \
433
434
            int ref_col = l0->ref[0][i_b8]; \
            if( ref_col >= 0 ) \
435
            { \
436
                int scale = (h->fdec->i_poc - h->fdec->ref_poc[0][i_ref]) * l0->inv_ref_poc[ref_col];\
437
438
                mvc[i][0] = (l0->mv[0][i_b4][0]*scale + 128) >> 8;\
                mvc[i][1] = (l0->mv[0][i_b4][1]*scale + 128) >> 8;\
439
440
                i++; \
            } \
441
        }
442
443
444
445
446
447
448

        SET_TMVP(0,0);
        if( h->mb.i_mb_x < h->sps->i_mb_width-1 )
            SET_TMVP(1,0);
        if( h->mb.i_mb_y < h->sps->i_mb_height-1 )
            SET_TMVP(0,1);
#undef SET_TMVP
449
    }
450

451
452
453
    *i_mvc = i;
}

454
455
456
457
458
459
460
/* Set up a lookup table for delta pocs to reduce an IDIV to an IMUL */
static void setup_inverse_delta_pocs( x264_t *h )
{
    int i;
    for( i = 0; i < h->i_ref0; i++ )
    {
        int delta = h->fdec->i_poc - h->fref0[i]->i_poc;
461
        h->fdec->inv_ref_poc[i] = (256 + delta/2) / delta;
462
463
464
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
465
466
467
468
static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;
    const int i_ref = h->mb.cache.ref[0][i8];
469
    const int mvx   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
470
    int       mvy   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
471

472
473
474
    h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
                   h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
                   mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
Laurent Aimar's avatar
Laurent Aimar committed
475

476
477
478
479
    // chroma is offset if MCing from a field of opposite parity
    if( h->mb.b_interlaced & i_ref )
        mvy += (h->mb.i_mb_y & 1)*4 - 2;

480
481
482
    h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
483

484
485
486
    h->mc.mc_chroma( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[0][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
487
488
489
490
491
}
static inline void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;
    const int i_ref = h->mb.cache.ref[1][i8];
492
    const int mvx   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
493
    int       mvy   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
494

495
496
497
    h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
                   h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
                   mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
Laurent Aimar's avatar
Laurent Aimar committed
498

499
500
501
    if( h->mb.b_interlaced & i_ref )
        mvy += (h->mb.i_mb_y & 1)*4 - 2;

502
503
504
    h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
505

506
507
508
    h->mc.mc_chroma( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[1][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
509
510
511
512
513
}

static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;
514
    const int i_ref0 = h->mb.cache.ref[0][i8];
Laurent Aimar's avatar
Laurent Aimar committed
515
    const int i_ref1 = h->mb.cache.ref[1][i8];
Fiona Glaser's avatar
Fiona Glaser committed
516
    const int weight = h->mb.bipred_weight[i_ref0][i_ref1];
517
    const int mvx0   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
518
    const int mvx1   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
519
    int       mvy0   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
520
    int       mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
521
522
    int       i_mode = x264_size2pixel[height][width];
    int       i_stride0 = 16, i_stride1 = 16;
523
524
    ALIGNED_ARRAY_16( uint8_t, tmp0,[16*16] );
    ALIGNED_ARRAY_16( uint8_t, tmp1,[16*16] );
525
526
527
528
529
530
    uint8_t *src0, *src1;

    src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
                          mvx0 + 4*4*x, mvy0 + 4*4*y, 4*width, 4*height );
    src1 = h->mc.get_ref( tmp1, &i_stride1, h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
                          mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
Fiona Glaser's avatar
Fiona Glaser committed
531
532
    h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
                       src0, i_stride0, src1, i_stride1, weight );
533
534
535

    if( h->mb.b_interlaced & i_ref0 )
        mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
536
537
538
    if( h->mb.b_interlaced & i_ref1 )
        mvy1 += (h->mb.i_mb_y & 1)*4 - 2;

Fiona Glaser's avatar
Fiona Glaser committed
539
    h->mc.mc_chroma( tmp0, 16, &h->mb.pic.p_fref[0][i_ref0][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
540
                     mvx0, mvy0, 2*width, 2*height );
Fiona Glaser's avatar
Fiona Glaser committed
541
542
543
544
    h->mc.mc_chroma( tmp1, 16, &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                     mvx1, mvy1, 2*width, 2*height );
    h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
    h->mc.mc_chroma( tmp0, 16, &h->mb.pic.p_fref[0][i_ref0][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
545
                     mvx0, mvy0, 2*width, 2*height );
Fiona Glaser's avatar
Fiona Glaser committed
546
547
548
    h->mc.mc_chroma( tmp1, 16, &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                     mvx1, mvy1, 2*width, 2*height );
    h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
Laurent Aimar's avatar
Laurent Aimar committed
549
550
}

551
552
553
554
static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
{
    const int i8 = x264_scan8[0] + x + 8*y;

555
556
557
    if( h->mb.cache.ref[0][i8] >= 0 )
        if( h->mb.cache.ref[1][i8] >= 0 )
            x264_mb_mc_01xywh( h, x, y, 2, 2 );
558
        else
559
            x264_mb_mc_0xywh( h, x, y, 2, 2 );
560
    else
561
        x264_mb_mc_1xywh( h, x, y, 2, 2 );
562
}
Laurent Aimar's avatar
Laurent Aimar committed
563

564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
void x264_mb_mc_8x8( x264_t *h, int i8 )
{
    const int x = 2*(i8&1);
    const int y = 2*(i8>>1);
    switch( h->mb.i_sub_partition[i8] )
    {
        case D_L0_8x8:
            x264_mb_mc_0xywh( h, x, y, 2, 2 );
            break;
        case D_L0_8x4:
            x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
            x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
            break;
        case D_L0_4x8:
            x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
            x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
            break;
        case D_L0_4x4:
            x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
            x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
            x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
            x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
            break;
        case D_L1_8x8:
            x264_mb_mc_1xywh( h, x, y, 2, 2 );
            break;
        case D_BI_8x8:
            x264_mb_mc_01xywh( h, x, y, 2, 2 );
            break;
        case D_DIRECT_8x8:
            x264_mb_mc_direct8x8( h, x, y );
            break;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
void x264_mb_mc( x264_t *h )
{
    if( h->mb.i_type == P_L0 )
    {
        if( h->mb.i_partition == D_16x16 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 4, 4 );
        }
        else if( h->mb.i_partition == D_16x8 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 4, 2 );
            x264_mb_mc_0xywh( h, 0, 2, 4, 2 );
        }
        else if( h->mb.i_partition == D_8x16 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 2, 4 );
            x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
        }
    }
618
    else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
619
620
621
    {
        int i;
        for( i = 0; i < 4; i++ )
622
            x264_mb_mc_8x8( h, i );
Laurent Aimar's avatar
Laurent Aimar committed
623
    }
624
    else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
Laurent Aimar's avatar
Laurent Aimar committed
625
    {
Loren Merritt's avatar
Loren Merritt committed
626
627
628
629
        x264_mb_mc_direct8x8( h, 0, 0 );
        x264_mb_mc_direct8x8( h, 2, 0 );
        x264_mb_mc_direct8x8( h, 0, 2 );
        x264_mb_mc_direct8x8( h, 2, 2 );
Laurent Aimar's avatar
Laurent Aimar committed
630
631
632
    }
    else    /* B_*x* */
    {
633
634
        const uint8_t *b_list0 = x264_mb_type_list_table[h->mb.i_type][0];
        const uint8_t *b_list1 = x264_mb_type_list_table[h->mb.i_type][1];
Laurent Aimar's avatar
Laurent Aimar committed
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664

        if( h->mb.i_partition == D_16x16 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
        }
        else if( h->mb.i_partition == D_16x8 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );

            if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
            else if( b_list0[1] )          x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
            else if( b_list1[1] )          x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
        }
        else if( h->mb.i_partition == D_8x16 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );

            if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
            else if( b_list0[1] )          x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
            else if( b_list1[1] )          x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
        }
    }
}

665
int x264_macroblock_cache_init( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
666
{
667
    int i, j;
668
    int i_mb_count = h->mb.i_mb_count;
Laurent Aimar's avatar
Laurent Aimar committed
669
670

    h->mb.i_mb_stride = h->sps->i_mb_width;
671
672
    h->mb.i_b8_stride = h->sps->i_mb_width * 2;
    h->mb.i_b4_stride = h->sps->i_mb_width * 4;
Laurent Aimar's avatar
Laurent Aimar committed
673

674
675
    h->mb.b_interlaced = h->param.b_interlaced;

676
677
678
679
    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
680
681

    /* 0 -> 3 top(4), 4 -> 6 : left(3) */
682
    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
683
684

    /* all coeffs */
685
    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 24 * sizeof(uint8_t) );
686
    CHECKED_MALLOC( h->mb.nnz_backup, h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
687
688
689

    if( h->param.b_cabac )
    {
690
691
692
        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
        CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(int16_t) );
Laurent Aimar's avatar
Laurent Aimar committed
693
694
    }

695
    for( i=0; i<2; i++ )
696
    {
Lamont Alston's avatar
Lamont Alston committed
697
        int i_refs = X264_MIN(16, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
698
        for( j=0; j < i_refs; j++ )
699
            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
700
    }
701

Loren Merritt's avatar
Loren Merritt committed
702
703
704
    for( i=0; i<=h->param.b_interlaced; i++ )
        for( j=0; j<3; j++ )
        {
705
            /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
Steven Walters's avatar
Steven Walters committed
706
            CHECKED_MALLOCZERO( h->mb.intra_border_backup[i][j], (h->sps->i_mb_width*16+32)>>!!j );
Loren Merritt's avatar
Loren Merritt committed
707
708
709
            h->mb.intra_border_backup[i][j] += 8;
        }

Loren Merritt's avatar
Loren Merritt committed
710
    /* init with not available (for top right idx=7,15) */
Laurent Aimar's avatar
Laurent Aimar committed
711
712
    memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
    memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
713

714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
    /* fdec:      fenc:
     * yyyyyyy
     * yYYYY      YYYY
     * yYYYY      YYYY
     * yYYYY      YYYY
     * yYYYY      YYYY
     * uuu vvv    UUVV
     * uUU vVV    UUVV
     * uUU vVV
     */
    h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
    h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
    h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
    h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
    h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
    h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;

    h->mb.i_neighbour4[6] =
    h->mb.i_neighbour4[9] =
    h->mb.i_neighbour4[12] =
    h->mb.i_neighbour4[14] = MB_LEFT|MB_TOP|MB_TOPLEFT|MB_TOPRIGHT;
    h->mb.i_neighbour4[3] =
    h->mb.i_neighbour4[7] =
    h->mb.i_neighbour4[11] =
    h->mb.i_neighbour4[13] =
    h->mb.i_neighbour4[15] =
    h->mb.i_neighbour8[3] = MB_LEFT|MB_TOP|MB_TOPLEFT;

Fiona Glaser's avatar
Fiona Glaser committed
742
    int buf_hpel = (h->param.i_width+48) * sizeof(int16_t);
743
744
745
    int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
    int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
    int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
Fiona Glaser's avatar
Fiona Glaser committed
746
        ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
747
748
    int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
    CHECKED_MALLOC( h->scratch_buffer, X264_MAX4( buf_hpel, buf_ssim, buf_tesa, buf_mbtree ) );
749

750
751
    return 0;
fail: return -1;
Laurent Aimar's avatar
Laurent Aimar committed
752
753
754
}
void x264_macroblock_cache_end( x264_t *h )
{
755
    int i, j;
Loren Merritt's avatar
Loren Merritt committed
756
757
758
    for( i=0; i<=h->param.b_interlaced; i++ )
        for( j=0; j<3; j++ )
            x264_free( h->mb.intra_border_backup[i][j] - 8 );
759
    for( i=0; i<2; i++ )
Loren Merritt's avatar
Loren Merritt committed
760
        for( j=0; j<32; j++ )
761
            x264_free( h->mb.mvr[i][j] );
Laurent Aimar's avatar
Laurent Aimar committed
762
763
764
765
766
767
768
769
    if( h->param.b_cabac )
    {
        x264_free( h->mb.chroma_pred_mode );
        x264_free( h->mb.mvd[0] );
        x264_free( h->mb.mvd[1] );
    }
    x264_free( h->mb.intra4x4_pred_mode );
    x264_free( h->mb.non_zero_count );
770
    x264_free( h->mb.nnz_backup );
771
    x264_free( h->mb.mb_transform_size );
772
    x264_free( h->mb.skipbp );
Laurent Aimar's avatar
Laurent Aimar committed
773
774
    x264_free( h->mb.cbp );
    x264_free( h->mb.qp );
775
    x264_free( h->scratch_buffer );
Laurent Aimar's avatar
Laurent Aimar committed
776
}
777
778
void x264_macroblock_slice_init( x264_t *h )
{
779
    int i, j;
780
781
782
783
784

    h->mb.mv[0] = h->fdec->mv[0];
    h->mb.mv[1] = h->fdec->mv[1];
    h->mb.ref[0] = h->fdec->ref[0];
    h->mb.ref[1] = h->fdec->ref[1];
785
    h->mb.type = h->fdec->mb_type;
786
787
788
789
790
791
792
793
794

    h->fdec->i_ref[0] = h->i_ref0;
    h->fdec->i_ref[1] = h->i_ref1;
    for( i = 0; i < h->i_ref0; i++ )
        h->fdec->ref_poc[0][i] = h->fref0[i]->i_poc;
    if( h->sh.i_type == SLICE_TYPE_B )
    {
        for( i = 0; i < h->i_ref1; i++ )
            h->fdec->ref_poc[1][i] = h->fref1[i]->i_poc;
795
796
797
798
799
800
801
802
803
804
805
806
807
808

        h->mb.map_col_to_list0[-1] = -1;
        h->mb.map_col_to_list0[-2] = -2;
        for( i = 0; i < h->fref1[0]->i_ref[0]; i++ )
        {
            int poc = h->fref1[0]->ref_poc[0][i];
            h->mb.map_col_to_list0[i] = -2;
            for( j = 0; j < h->i_ref0; j++ )
                if( h->fref0[j]->i_poc == poc )
                {
                    h->mb.map_col_to_list0[i] = j;
                    break;
                }
        }
809
    }
810
811
    if( h->sh.i_type == SLICE_TYPE_P )
        memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
812
813

    setup_inverse_delta_pocs( h );
814
}
Laurent Aimar's avatar
Laurent Aimar committed
815

816
817
818
819
820
821
822
823
824
void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
{
    int stride_y  = fenc->i_stride[0];
    int stride_uv = fenc->i_stride[1];
    int off_y = 16 * (i_mb_x + i_mb_y * stride_y);
    int off_uv = 8 * (i_mb_x + i_mb_y * stride_uv);
    h->mc.prefetch_fenc( fenc->plane[0]+off_y, stride_y,
                         fenc->plane[1+(i_mb_x&1)]+off_uv, stride_uv, i_mb_x );
}
Laurent Aimar's avatar
Laurent Aimar committed
825

826
827
static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src )
{
828
    // input pointers are offset by 4 rows because that's faster (smaller instruction size on x86)
829
    int i;
830
    for( i = -4; i < 4; i++ )
831
832
833
        dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}

834
835
836
static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb_x, int i_mb_y, int i)
{
    const int w = (i == 0 ? 16 : 8);
837
    const int i_stride = h->fdec->i_stride[!!i];
838
839
840
841
842
843
844
    const int i_stride2 = i_stride << h->mb.b_interlaced;
    const int i_pix_offset = h->mb.b_interlaced
                           ? w * (i_mb_x + (i_mb_y&~1) * i_stride) + (i_mb_y&1) * i_stride
                           : w * (i_mb_x + i_mb_y * i_stride);
    int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
    const uint8_t *intra_fdec = &h->mb.intra_border_backup[i_mb_y & h->sh.b_mbaff][i][i_mb_x*16>>!!i];
    x264_frame_t **fref[2] = { h->fref0, h->fref1 };
Fiona Glaser's avatar
Fiona Glaser committed
845
    int j, k;
846
847
848
    if( h->mb.b_interlaced )
        ref_pix_offset[1] += (1-2*(i_mb_y&1)) * i_stride;
    h->mb.pic.i_stride[i] = i_stride2;
849
    h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
850
    h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
851
        h->mb.pic.p_fenc_plane[i], i_stride2, w );
852
    memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
Fiona Glaser's avatar
Fiona Glaser committed
853
    if( h->mb.b_interlaced || h->mb.b_reencode_mb )
854
855
856
857
858
    {
        const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];
        for( j = 0; j < w; j++ )
            h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
    }
Fiona Glaser's avatar
Fiona Glaser committed
859
    for( j = 0; j < h->mb.pic.i_fref[0]; j++ )
860
    {
Fiona Glaser's avatar
Fiona Glaser committed
861
862
863
864
865
866
867
        h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &fref[0][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];
        if( i == 0 )
            for( k = 1; k < 4; k++ )
                h->mb.pic.p_fref[0][j][k] = &fref[0][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];
    }
    if( h->sh.i_type == SLICE_TYPE_B )
        for( j = 0; j < h->mb.pic.i_fref[1]; j++ )
868
        {
Fiona Glaser's avatar
Fiona Glaser committed
869
            h->mb.pic.p_fref[1][j][i==0 ? 0:i+3] = &fref[1][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];
870
871
            if( i == 0 )
                for( k = 1; k < 4; k++ )
Fiona Glaser's avatar
Fiona Glaser committed
872
                    h->mb.pic.p_fref[1][j][k] = &fref[1][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];
873
874
875
        }
}

Laurent Aimar's avatar
Laurent Aimar committed
876
877
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
{
Loren Merritt's avatar
Loren Merritt committed
878
    int i_mb_xy = i_mb_y * h->mb.i_mb_stride + i_mb_x;
879
880
881
882
883
884
    int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x);
    int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x);
    int i_top_y = i_mb_y - (1 << h->mb.b_interlaced);
    int i_top_xy = i_top_y * h->mb.i_mb_stride + i_mb_x;
    int i_top_4x4 = (4*i_top_y+3) * h->mb.i_b4_stride + 4*i_mb_x;
    int i_top_8x8 = (2*i_top_y+1) * h->mb.i_b8_stride + 2*i_mb_x;
Laurent Aimar's avatar
Laurent Aimar committed
885
886
887
888
889
890
891
892
893
    int i_left_xy = -1;
    int i_top_type = -1;    /* gcc warn */
    int i_left_type= -1;

    int i;

    /* init index */
    h->mb.i_mb_x = i_mb_x;
    h->mb.i_mb_y = i_mb_y;
Loren Merritt's avatar
Loren Merritt committed
894
    h->mb.i_mb_xy = i_mb_xy;
895
896
    h->mb.i_b8_xy = i_mb_8x8;
    h->mb.i_b4_xy = i_mb_4x4;
897
    h->mb.i_mb_top_xy = i_top_xy;
Laurent Aimar's avatar
Laurent Aimar committed
898
    h->mb.i_neighbour = 0;
899
    h->mb.i_neighbour_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
900
901

    /* load cache */
902
    if( i_top_xy >= h->sh.i_first_mb )
Laurent Aimar's avatar
Laurent Aimar committed
903
    {
Loren Merritt's avatar
Loren Merritt committed
904
        h->mb.i_mb_type_top =
905
        i_top_type = h->mb.type[i_top_xy];
906
        h->mb.cache.i_cbp_top = h->mb.cbp[i_top_xy];
Laurent Aimar's avatar
Laurent Aimar committed
907
908
909

        h->mb.i_neighbour |= MB_TOP;

910
911
912
        if( !h->param.b_constrained_intra || IS_INTRA( i_top_type ) )
            h->mb.i_neighbour_intra |= MB_TOP;

Laurent Aimar's avatar
Laurent Aimar committed
913
        /* load intra4x4 */
914
        *(uint32_t*)&h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = *(uint32_t*)&h->mb.intra4x4_pred_mode[i_top_xy][0];
Laurent Aimar's avatar
Laurent Aimar committed
915
916

        /* load non_zero_count */
917
918
919
920
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0] - 8] = *(uint32_t*)&h->mb.non_zero_count[i_top_xy][12];
        /* shift because x264_scan8[16] is misaligned */
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16] - 9] = *(uint16_t*)&h->mb.non_zero_count[i_top_xy][18] << 8;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] = *(uint16_t*)&h->mb.non_zero_count[i_top_xy][22] << 8;
Laurent Aimar's avatar
Laurent Aimar committed
921
922
923
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
924
        h->mb.i_mb_type_top = -1;
925
        h->mb.cache.i_cbp_top = -1;
926

Laurent Aimar's avatar
Laurent Aimar committed
927
        /* load intra4x4 */
928
        *(uint32_t*)&h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = 0xFFFFFFFFU;
Laurent Aimar's avatar
Laurent Aimar committed
929
930

        /* load non_zero_count */
931
932
933
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0] - 8] =
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] =
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] = 0x80808080U;
Laurent Aimar's avatar
Laurent Aimar committed
934
935
    }

Loren Merritt's avatar
Loren Merritt committed
936
    if( i_mb_x > 0 && i_mb_xy > h->sh.i_first_mb )
Laurent Aimar's avatar
Laurent Aimar committed
937
    {
Loren Merritt's avatar
Loren Merritt committed
938
        i_left_xy = i_mb_xy - 1;
Loren Merritt's avatar
Loren Merritt committed
939
        h->mb.i_mb_type_left =
Loren Merritt's avatar
Loren Merritt committed
940
        i_left_type = h->mb.type[i_left_xy];
941
        h->mb.cache.i_cbp_left = h->mb.cbp[h->mb.i_mb_xy - 1];
Laurent Aimar's avatar
Laurent Aimar committed
942
943
944

        h->mb.i_neighbour |= MB_LEFT;

945
946
947
        if( !h->param.b_constrained_intra || IS_INTRA( i_left_type ) )
            h->mb.i_neighbour_intra |= MB_LEFT;

Laurent Aimar's avatar
Laurent Aimar committed
948
949
950
951
952
953
954
        /* load intra4x4 */
        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][4];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][5];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][6];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][3];

        /* load non_zero_count */
955
        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = h->mb.non_zero_count[i_left_xy][3];
Laurent Aimar's avatar
Laurent Aimar committed
956
        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = h->mb.non_zero_count[i_left_xy][7];
957
        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = h->mb.non_zero_count[i_left_xy][11];
Laurent Aimar's avatar
Laurent Aimar committed
958
959
960
961
962
963
964
965
966
967
        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = h->mb.non_zero_count[i_left_xy][15];

        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = h->mb.non_zero_count[i_left_xy][16+1];
        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = h->mb.non_zero_count[i_left_xy][16+3];

        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = h->mb.non_zero_count[i_left_xy][16+4+1];
        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = h->mb.non_zero_count[i_left_xy][16+4+3];
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
968
        h->mb.i_mb_type_left = -1;
969
        h->mb.cache.i_cbp_left = -1;
Loren Merritt's avatar
Loren Merritt committed
970

Laurent Aimar's avatar