macroblock.c 56.7 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

#include "common.h"

int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
{
    const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1];
    const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8];
30
31
    const int m  = X264_MIN( x264_mb_pred_mode4x4_fix(ma),
                             x264_mb_pred_mode4x4_fix(mb) );
Laurent Aimar's avatar
Laurent Aimar committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

    if( m < 0 )
        return I_PRED_4x4_DC;

    return m;
}

int x264_mb_predict_non_zero_code( x264_t *h, int idx )
{
    const int za = h->mb.cache.non_zero_count[x264_scan8[idx] - 1];
    const int zb = h->mb.cache.non_zero_count[x264_scan8[idx] - 8];

    int i_ret = za + zb;

    if( i_ret < 0x80 )
    {
        i_ret = ( i_ret + 1 ) >> 1;
    }
    return i_ret & 0x7f;
}

53
int x264_mb_transform_8x8_allowed( x264_t *h )
54
{
55
56
57
58
59
60
61
62
63
    // intra and skip are disallowed
    // large partitions are allowed
    // direct and 8x8 are conditional
    static const uint8_t partition_tab[X264_MBTYPE_MAX] = {
        0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0,
    };
    int p, i;

    if( !h->pps->b_transform_8x8_mode )
64
        return 0;
65
66
67
68
69
70
    p = partition_tab[h->mb.i_type];
    if( p < 2 )
        return p;
    else if( h->mb.i_type == B_DIRECT )
        return h->sps->b_direct8x8_inference;
    else if( h->mb.i_type == P_8x8 )
71
    {
72
73
74
75
        if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) )
            return 1;
        for( i=0; i<4; i++ )
            if( h->mb.i_sub_partition[i] != D_L0_8x8 )
76
                return 0;
77
78
79
80
81
82
83
84
85
86
87
        return 1;
    }
    else // B_8x8
    {
        // x264 currently doesn't use sub-8x8 B partitions, so don't check for them
        if( h->sps->b_direct8x8_inference )
            return 1;
        for( i=0; i<4; i++ )
            if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
                return 0;
        return 1;
88
89
90
    }
}

91
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
Laurent Aimar's avatar
Laurent Aimar committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
    const int i8 = x264_scan8[idx];
    const int i_ref= h->mb.cache.ref[i_list][i8];
    int     i_refa = h->mb.cache.ref[i_list][i8 - 1];
    int16_t *mv_a  = h->mb.cache.mv[i_list][i8 - 1];
    int     i_refb = h->mb.cache.ref[i_list][i8 - 8];
    int16_t *mv_b  = h->mb.cache.mv[i_list][i8 - 8];
    int     i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width ];
    int16_t *mv_c  = h->mb.cache.mv[i_list][i8 - 8 + i_width];

    int i_count;

    if( (idx&0x03) == 3 || ( i_width == 2 && (idx&0x3) == 2 )|| i_refc == -2 )
    {
        i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
        mv_c   = h->mb.cache.mv[i_list][i8 - 8 - 1];
    }

    if( h->mb.i_partition == D_16x8 )
    {
        if( idx == 0 && i_refb == i_ref )
        {
114
            *(uint32_t*)mvp = *(uint32_t*)mv_b;
Laurent Aimar's avatar
Laurent Aimar committed
115
116
117
118
            return;
        }
        else if( idx != 0 && i_refa == i_ref )
        {
119
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
120
121
122
123
124
125
126
            return;
        }
    }
    else if( h->mb.i_partition == D_8x16 )
    {
        if( idx == 0 && i_refa == i_ref )
        {
127
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
128
129
130
131
            return;
        }
        else if( idx != 0 && i_refc == i_ref )
        {
132
            *(uint32_t*)mvp = *(uint32_t*)mv_c;
Laurent Aimar's avatar
Laurent Aimar committed
133
134
135
136
137
138
139
140
141
142
            return;
        }
    }

    i_count = 0;
    if( i_refa == i_ref ) i_count++;
    if( i_refb == i_ref ) i_count++;
    if( i_refc == i_ref ) i_count++;

    if( i_count > 1 )
Loren Merritt's avatar
Loren Merritt committed
143
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Laurent Aimar's avatar
Laurent Aimar committed
144
145
146
    else if( i_count == 1 )
    {
        if( i_refa == i_ref )
147
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
148
        else if( i_refb == i_ref )
149
            *(uint32_t*)mvp = *(uint32_t*)mv_b;
Laurent Aimar's avatar
Laurent Aimar committed
150
        else
151
            *(uint32_t*)mvp = *(uint32_t*)mv_c;
Laurent Aimar's avatar
Laurent Aimar committed
152
153
    }
    else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
154
        *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
155
    else
Loren Merritt's avatar
Loren Merritt committed
156
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Laurent Aimar's avatar
Laurent Aimar committed
157
158
}

159
void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
Laurent Aimar's avatar
Laurent Aimar committed
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
{
    int     i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
    int16_t *mv_a  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
    int     i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
    int16_t *mv_b  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
    int     i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
    int16_t *mv_c  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];

    int i_count;

    if( i_refc == -2 )
    {
        i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
        mv_c   = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
    }

    i_count = 0;
    if( i_refa == i_ref ) i_count++;
    if( i_refb == i_ref ) i_count++;
    if( i_refc == i_ref ) i_count++;

    if( i_count > 1 )
Loren Merritt's avatar
Loren Merritt committed
182
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Laurent Aimar's avatar
Laurent Aimar committed
183
184
185
    else if( i_count == 1 )
    {
        if( i_refa == i_ref )
186
            *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
187
        else if( i_refb == i_ref )
188
            *(uint32_t*)mvp = *(uint32_t*)mv_b;
Laurent Aimar's avatar
Laurent Aimar committed
189
        else
190
            *(uint32_t*)mvp = *(uint32_t*)mv_c;
Laurent Aimar's avatar
Laurent Aimar committed
191
192
    }
    else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
193
        *(uint32_t*)mvp = *(uint32_t*)mv_a;
Laurent Aimar's avatar
Laurent Aimar committed
194
    else
Loren Merritt's avatar
Loren Merritt committed
195
        x264_median_mv( mvp, mv_a, mv_b, mv_c );
Laurent Aimar's avatar
Laurent Aimar committed
196
197
198
}


199
void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
Laurent Aimar's avatar
Laurent Aimar committed
200
201
202
203
204
205
206
{
    int     i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
    int     i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
    int16_t *mv_a  = h->mb.cache.mv[0][X264_SCAN8_0 - 1];
    int16_t *mv_b  = h->mb.cache.mv[0][X264_SCAN8_0 - 8];

    if( i_refa == -2 || i_refb == -2 ||
207
208
        ( i_refa == 0 && *(uint32_t*)mv_a == 0 ) ||
        ( i_refb == 0 && *(uint32_t*)mv_b == 0 ) )
Laurent Aimar's avatar
Laurent Aimar committed
209
    {
210
        *(uint32_t*)mv = 0;
Laurent Aimar's avatar
Laurent Aimar committed
211
212
213
214
215
216
217
    }
    else
    {
        x264_mb_predict_mv_16x16( h, 0, 0, mv );
    }
}

218
219
220
221
static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
{
    int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
    int i_mb_8x8 =  4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
Loren Merritt's avatar
Loren Merritt committed
222
223
224
    int i8, i4;
    int b8x8;
    const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
225
226
227
    
    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
    
Loren Merritt's avatar
Loren Merritt committed
228
    if( IS_INTRA( type_col ) )
229
230
    {
        x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
231
232
        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, 0 );
        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, 0 );
233
234
        return 1;
    }
Loren Merritt's avatar
Loren Merritt committed
235
236
    b8x8 = h->sps->b_direct8x8_inference ||
           (type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8);
237

Loren Merritt's avatar
Loren Merritt committed
238
    for( i8 = 0; i8 < 4; i8++ )
239
    {
Loren Merritt's avatar
Loren Merritt committed
240
241
242
        const int x8 = i8%2;
        const int y8 = i8/2;
        const int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
243
        const int i_ref = h->mb.map_col_to_list0[ h->fref1[0]->ref[0][ i_part_8x8 ] ];
244

245
        if( i_ref >= 0 )
246
        {
247
            const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
248

Loren Merritt's avatar
Loren Merritt committed
249
            x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
250

Loren Merritt's avatar
Loren Merritt committed
251
252
253
            if( b8x8 )
            {
                const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
254
255
256
257
                const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
                const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
Loren Merritt's avatar
Loren Merritt committed
258
259
260
261
            }
            else
            {
                for( i4 = 0; i4 < 4; i4++ )
262
                {
Loren Merritt's avatar
Loren Merritt committed
263
264
265
                    const int x4 = i4%2 + 2*x8;
                    const int y4 = i4/2 + 2*y8;
                    const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * h->mb.i_b4_stride ];
266
267
268
269
                    const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
                    const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, pack16to32_mask(l0x, l0y) );
                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
270
                }
Loren Merritt's avatar
Loren Merritt committed
271
            }
272
        }
273
274
        else
        {
Loren Merritt's avatar
Loren Merritt committed
275
            /* the collocated ref isn't in the current list0 */
276
            /* FIXME: we might still be able to use direct_8x8 on some partitions */
277
278
279
            /* FIXME: with B-pyramid + extensive ref list reordering
             *   (not currently used), we would also have to check
             *   l1mv1 like in spatial mode */
280
281
            return 0;
        }
282
283
    }

Loren Merritt's avatar
Loren Merritt committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
    if( h->param.i_threads > 1 )
    {
        int di = b8x8 ? 4 : 1;
        for( i4=0; i4<16; i4+=di )
        {
            if( h->mb.cache.mv[0][x264_scan8[i4]][1] > h->mb.mv_max_spel[1]
             || h->mb.cache.mv[1][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] )
            {
#if 0
                fprintf(stderr, "direct_temporal: (%d,%d) (%d,%d) > %d \n",
                        h->mb.cache.mv[0][x264_scan8[i4]][0],
                        h->mb.cache.mv[0][x264_scan8[i4]][1],
                        h->mb.cache.mv[1][x264_scan8[i4]][0],
                        h->mb.cache.mv[1][x264_scan8[i4]][1],
                        h->mb.mv_max_spel[1]);
#endif
                return 0;
            }
        }
    }

305
306
307
308
309
310
    return 1;
}

static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
    int ref[2];
311
    DECLARE_ALIGNED_8( int16_t mv[2][2] );
312
313
    int i_list;
    int i8, i4;
Loren Merritt's avatar
Loren Merritt committed
314
    int b8x8;
315
316
317
318
    const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
    const int8_t *l1ref1 = &h->fref1[0]->ref[1][ h->mb.i_b8_xy ];
    const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ];
    const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[1][ h->mb.i_b4_xy ];
Loren Merritt's avatar
Loren Merritt committed
319
    const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

    for( i_list=0; i_list<2; i_list++ )
    {
        int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
        int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
        int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
        if( i_refc == -2 )
            i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];

        ref[i_list] = i_refa;
        if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
            ref[i_list] = i_refb;
        if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
            ref[i_list] = i_refc;
        if( ref[i_list] < 0 )
            ref[i_list] = -1;
    }

    if( ref[0] < 0 && ref[1] < 0 )
    {
        ref[0] = 
        ref[1] = 0;
342
        *(uint64_t*)mv[0] = 0;
343
344
345
346
347
348
349
350
    }
    else
    {
        for( i_list=0; i_list<2; i_list++ )
        {
            if( ref[i_list] >= 0 )
                x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
            else
351
                *(uint32_t*)mv[i_list] = 0;
352
353
354
355
356
        }
    }

    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
357
358
    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
359

Loren Merritt's avatar
Loren Merritt committed
360
361
    if( IS_INTRA( type_col ) )
        return 1;
Loren Merritt's avatar
Loren Merritt committed
362
363
364
365
366
367
368
369
370
371
372
373
374

    if( h->param.i_threads > 1
        && ( mv[0][1] > h->mb.mv_max_spel[1]
          || mv[1][1] > h->mb.mv_max_spel[1] ) )
    {
#if 0
        fprintf(stderr, "direct_spatial: (%d,%d) (%d,%d) > %d \n",
                mv[0][0], mv[0][1], mv[1][0], mv[1][1],
                h->mb.mv_max_spel[1]);
#endif
        return 0;
    }

Loren Merritt's avatar
Loren Merritt committed
375
376
377
    b8x8 = h->sps->b_direct8x8_inference ||
           (type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8);

378
379
380
381
382
    /* col_zero_flag */
    for( i8=0; i8<4; i8++ )
    {
        const int x8 = i8%2;
        const int y8 = i8/2;
383
        const int o8 = x8 + y8 * h->mb.i_b8_stride;
Loren Merritt's avatar
Loren Merritt committed
384
        if( l1ref0[o8] == 0 || ( l1ref0[o8] < 0 && l1ref1[o8] == 0 ) )
385
        {
386
            const int16_t (*l1mv)[2] = (l1ref0[o8] == 0) ? l1mv0 : l1mv1;
Loren Merritt's avatar
Loren Merritt committed
387
            if( b8x8 )
388
            {
Loren Merritt's avatar
Loren Merritt committed
389
                const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride];
390
391
392
                if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
                {
                    if( ref[0] == 0 )
393
                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
394
                    if( ref[1] == 0 )
395
                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
Loren Merritt's avatar
Loren Merritt committed
396
397
398
399
400
401
402
403
404
405
406
407
                }
            }
            else
            {
                for( i4=0; i4<4; i4++ )
                {
                    const int x4 = i4%2 + 2*x8;
                    const int y4 = i4/2 + 2*y8;
                    const int16_t *mvcol = l1mv[x4 + y4 * h->mb.i_b4_stride];
                    if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
                    {
                        if( ref[0] == 0 )
408
                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0 );
Loren Merritt's avatar
Loren Merritt committed
409
                        if( ref[1] == 0 )
410
                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0 );
Loren Merritt's avatar
Loren Merritt committed
411
                    }
412
413
414
415
416
417
418
419
                }
            }
        }
    }

    return 1;
}

Loren Merritt's avatar
Loren Merritt committed
420
int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
421
422
423
424
425
426
427
428
429
{
    int b_available;
    if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
        return 0;
    else if( h->sh.b_direct_spatial_mv_pred )
        b_available = x264_mb_predict_mv_direct16x16_spatial( h );
    else
        b_available = x264_mb_predict_mv_direct16x16_temporal( h );

Loren Merritt's avatar
Loren Merritt committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
    if( b_changed != NULL && b_available )
    {
        int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
        if( IS_INTRA(type_col) || type_col == P_SKIP )
        {
            *b_changed = h->mb.cache.direct_ref[0][0] != h->mb.cache.ref[0][X264_SCAN8_0]
                      || h->mb.cache.direct_ref[1][0] != h->mb.cache.ref[1][X264_SCAN8_0]
                      || *(uint32_t*)h->mb.cache.direct_mv[0][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[0][X264_SCAN8_0]
                      || *(uint32_t*)h->mb.cache.direct_mv[1][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[1][X264_SCAN8_0];
        }
        else
        {
            int i, l;
            *b_changed = 0;
            for( l = 0; l < 2; l++ )
                for( i = 0; i < 4; i++ )
                    *b_changed |= h->mb.cache.direct_ref[l][i] != h->mb.cache.ref[l][x264_scan8[i*4]];
            *b_changed = *b_changed || memcmp(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
        }
        if( !*b_changed )
            return b_available;
    }

453
454
455
456
457
458
459
    /* cache ref & mv */
    if( b_available )
    {
        int i, l;
        for( l = 0; l < 2; l++ )
            for( i = 0; i < 4; i++ )
                h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
Fiona Glaser's avatar
Fiona Glaser committed
460
        h->mc.memcpy_aligned(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
    }

    return b_available;
}

void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
{
    const int x = 2*(idx%2);
    const int y = 2*(idx/2);
    int l;
    x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
    x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
    for( l = 0; l < 2; l++ )
    {
        *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] =
        *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]];
        *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] =
        *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8];
    }
}

482
483
#define FIXED_SCALE 256

484
/* This just improves encoder performance, it's not part of the spec */
485
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc )
486
487
488
{
    int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
    int i = 0;
489

490
#define SET_MVP(mvp) { \
491
        *(uint32_t*)mvc[i] = *(uint32_t*)mvp; \
492
493
494
495
496
497
        i++; \
    }

    /* b_direct */
    if( h->sh.i_type == SLICE_TYPE_B
        && h->mb.cache.ref[i_list][x264_scan8[12]] == i_ref )
498
    {
499
        SET_MVP( h->mb.cache.mv[i_list][x264_scan8[12]] );
500
501
    }

502
    /* spatial predictors */
Loren Merritt's avatar
Loren Merritt committed
503
    if( h->mb.i_neighbour & MB_LEFT )
504
505
    {
        int i_mb_l = h->mb.i_mb_xy - 1;
506
507
        /* skip MBs didn't go through the whole search process, so mvr is undefined */
        if( !IS_SKIP( h->mb.type[i_mb_l] ) )
508
            SET_MVP( mvr[i_mb_l] );
509
    }
Loren Merritt's avatar
Loren Merritt committed
510
    if( h->mb.i_neighbour & MB_TOP )
511
    {
512
        int i_mb_t = h->mb.i_mb_top_xy;
513
        if( !IS_SKIP( h->mb.type[i_mb_t] ) )
514
            SET_MVP( mvr[i_mb_t] );
515

Loren Merritt's avatar
Loren Merritt committed
516
        if( h->mb.i_neighbour & MB_TOPLEFT && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
517
            SET_MVP( mvr[i_mb_t-1] );
518
        if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 && !IS_SKIP( h->mb.type[i_mb_t + 1] ) )
519
520
521
522
523
            SET_MVP( mvr[i_mb_t+1] );
    }
#undef SET_MVP

    /* temporal predictors */
524
525
    /* FIXME temporal scaling w/ interlace */
    if( h->fref0[0]->i_ref[0] > 0 && !h->sh.b_mbaff )
526
527
528
529
530
531
    {
        x264_frame_t *l0 = h->fref0[0];

#define SET_TMVP(dx, dy) { \
            int i_b4 = h->mb.i_b4_xy + dx*4 + dy*4*h->mb.i_b4_stride; \
            int i_b8 = h->mb.i_b8_xy + dx*2 + dy*2*h->mb.i_b8_stride; \
532
533
            int ref_col = l0->ref[0][i_b8]; \
            if( ref_col >= 0 ) \
534
            { \
535
536
537
                int scale = (h->fdec->i_poc - h->fdec->ref_poc[0][i_ref]) * l0->inv_ref_poc[ref_col];\
                mvc[i][0] = l0->mv[0][i_b4][0] * scale / FIXED_SCALE; \
                mvc[i][1] = l0->mv[0][i_b4][1] * scale / FIXED_SCALE; \
538
539
                i++; \
            } \
540
        }
541
542
543
544
545
546
547

        SET_TMVP(0,0);
        if( h->mb.i_mb_x < h->sps->i_mb_width-1 )
            SET_TMVP(1,0);
        if( h->mb.i_mb_y < h->sps->i_mb_height-1 )
            SET_TMVP(0,1);
#undef SET_TMVP
548
    }
549

Fiona Glaser's avatar
Fiona Glaser committed
550
551
552
    if(i == 0)
        *(uint32_t*)mvc[i] = 0;

553
554
555
    *i_mvc = i;
}

556
557
558
559
560
561
562
563
564
565
566
/* Set up a lookup table for delta pocs to reduce an IDIV to an IMUL */
static void setup_inverse_delta_pocs( x264_t *h )
{
    int i;
    for( i = 0; i < h->i_ref0; i++ )
    {
        int delta = h->fdec->i_poc - h->fref0[i]->i_poc;
        h->fdec->inv_ref_poc[i] = (FIXED_SCALE + delta/2) / delta;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
567
568
569
570
static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;
    const int i_ref = h->mb.cache.ref[0][i8];
571
    const int mvx   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
572
    int       mvy   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
573

574
575
576
    h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
                   h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
                   mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
Laurent Aimar's avatar
Laurent Aimar committed
577

578
579
580
581
    // chroma is offset if MCing from a field of opposite parity
    if( h->mb.b_interlaced & i_ref )
        mvy += (h->mb.i_mb_y & 1)*4 - 2;

582
583
584
    h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
585

586
587
588
    h->mc.mc_chroma( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[0][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
589
590
591
592
593
}
static inline void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;
    const int i_ref = h->mb.cache.ref[1][i8];
594
    const int mvx   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
595
    int       mvy   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
596

597
598
599
    h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
                   h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
                   mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
Laurent Aimar's avatar
Laurent Aimar committed
600

601
602
603
    if( h->mb.b_interlaced & i_ref )
        mvy += (h->mb.i_mb_y & 1)*4 - 2;

604
605
606
    h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
607

608
609
610
    h->mc.mc_chroma( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
                     &h->mb.pic.p_fref[1][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                     mvx, mvy, 2*width, 2*height );
Laurent Aimar's avatar
Laurent Aimar committed
611
612
613
614
615
616
617
}

static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
{
    const int i8 = x264_scan8[0]+x+8*y;

    const int i_ref1 = h->mb.cache.ref[1][i8];
618
    const int mvx1   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
619
    int       mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
Loren Merritt's avatar
Loren Merritt committed
620
    DECLARE_ALIGNED_16( uint8_t tmp[16*16] );
Loren Merritt's avatar
Loren Merritt committed
621
    int i_mode = x264_size2pixel[height][width];
Laurent Aimar's avatar
Laurent Aimar committed
622

Loren Merritt's avatar
Loren Merritt committed
623
    x264_mb_mc_0xywh( h, x, y, width, height );
Laurent Aimar's avatar
Laurent Aimar committed
624

625
626
    h->mc.mc_luma( tmp, 16, h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
                   mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
Laurent Aimar's avatar
Laurent Aimar committed
627

628
629
630
    if( h->mb.b_interlaced & i_ref1 )
        mvy1 += (h->mb.i_mb_y & 1)*4 - 2;

631
632
633
634
635
    if( h->param.analyse.b_weighted_bipred )
    {
        const int i_ref0 = h->mb.cache.ref[0][i8];
        const int weight = h->mb.bipred_weight[i_ref0][i_ref1];

636
        h->mc.avg_weight[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, tmp, 16, weight );
637

638
639
        h->mc.mc_chroma( tmp, 16, &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                         mvx1, mvy1, 2*width, 2*height );
640
        h->mc.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp, 16, weight );
Laurent Aimar's avatar
Laurent Aimar committed
641

642
643
        h->mc.mc_chroma( tmp, 16, &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                         mvx1, mvy1, 2*width, 2*height );
644
        h->mc.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp, 16, weight );
645
646
647
    }
    else
    {
648
        h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, tmp, 16 );
649

650
651
        h->mc.mc_chroma( tmp, 16, &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                         mvx1, mvy1, 2*width, 2*height );
652
        h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp, 16 );
653

654
655
        h->mc.mc_chroma( tmp, 16, &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                         mvx1, mvy1, 2*width, 2*height );
656
        h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp, 16 );
657
    }
Laurent Aimar's avatar
Laurent Aimar committed
658
659
}

660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
{
    const int i8 = x264_scan8[0] + x + 8*y;

    /* FIXME: optimize based on current block size, not global settings? */
    if( h->sps->b_direct8x8_inference )
    {
        if( h->mb.cache.ref[0][i8] >= 0 )
            if( h->mb.cache.ref[1][i8] >= 0 )
                x264_mb_mc_01xywh( h, x, y, 2, 2 );
            else
                x264_mb_mc_0xywh( h, x, y, 2, 2 );
        else
            x264_mb_mc_1xywh( h, x, y, 2, 2 );
    }
    else
    {
        if( h->mb.cache.ref[0][i8] >= 0 )
        {
            if( h->mb.cache.ref[1][i8] >= 0 )
            {
                x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
                x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
                x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
                x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
            }
            else
            {
                x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
                x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
                x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
                x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
            }
        }
        else
        {
            x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
            x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
            x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
            x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
        }
    }
}
Laurent Aimar's avatar
Laurent Aimar committed
703

704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
void x264_mb_mc_8x8( x264_t *h, int i8 )
{
    const int x = 2*(i8&1);
    const int y = 2*(i8>>1);
    switch( h->mb.i_sub_partition[i8] )
    {
        case D_L0_8x8:
            x264_mb_mc_0xywh( h, x, y, 2, 2 );
            break;
        case D_L0_8x4:
            x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
            x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
            break;
        case D_L0_4x8:
            x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
            x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
            break;
        case D_L0_4x4:
            x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
            x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
            x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
            x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
            break;
        case D_L1_8x8:
            x264_mb_mc_1xywh( h, x, y, 2, 2 );
            break;
        case D_L1_8x4:
            x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
            x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
            break;
        case D_L1_4x8:
            x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
            x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
            break;
        case D_L1_4x4:
            x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
            x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
            x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
            x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
            break;
        case D_BI_8x8:
            x264_mb_mc_01xywh( h, x, y, 2, 2 );
            break;
        case D_BI_8x4:
            x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
            x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
            break;
        case D_BI_4x8:
            x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
            x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
            break;
        case D_BI_4x4:
            x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
            x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
            x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
            x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
            break;
        case D_DIRECT_8x8:
            x264_mb_mc_direct8x8( h, x, y );
            break;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
void x264_mb_mc( x264_t *h )
{
    if( h->mb.i_type == P_L0 )
    {
        if( h->mb.i_partition == D_16x16 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 4, 4 );
        }
        else if( h->mb.i_partition == D_16x8 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 4, 2 );
            x264_mb_mc_0xywh( h, 0, 2, 4, 2 );
        }
        else if( h->mb.i_partition == D_8x16 )
        {
            x264_mb_mc_0xywh( h, 0, 0, 2, 4 );
            x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
        }
    }
786
    else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
787
788
789
    {
        int i;
        for( i = 0; i < 4; i++ )
790
            x264_mb_mc_8x8( h, i );
Laurent Aimar's avatar
Laurent Aimar committed
791
    }
792
    else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
Laurent Aimar's avatar
Laurent Aimar committed
793
    {
Loren Merritt's avatar
Loren Merritt committed
794
795
796
797
        x264_mb_mc_direct8x8( h, 0, 0 );
        x264_mb_mc_direct8x8( h, 2, 0 );
        x264_mb_mc_direct8x8( h, 0, 2 );
        x264_mb_mc_direct8x8( h, 2, 2 );
Laurent Aimar's avatar
Laurent Aimar committed
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
    }
    else    /* B_*x* */
    {
        int b_list0[2];
        int b_list1[2];

        int i;

        /* init ref list utilisations */
        for( i = 0; i < 2; i++ )
        {
            b_list0[i] = x264_mb_type_list0_table[h->mb.i_type][i];
            b_list1[i] = x264_mb_type_list1_table[h->mb.i_type][i];
        }
        if( h->mb.i_partition == D_16x16 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
        }
        else if( h->mb.i_partition == D_16x8 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );

            if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
            else if( b_list0[1] )          x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
            else if( b_list1[1] )          x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
        }
        else if( h->mb.i_partition == D_8x16 )
        {
            if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
            else if( b_list0[0] )          x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
            else if( b_list1[0] )          x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );

            if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
            else if( b_list0[1] )          x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
            else if( b_list1[1] )          x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
        }
    }
}

841
int x264_macroblock_cache_init( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
842
{
843
    int i, j;
844
    int i_mb_count = h->mb.i_mb_count;
Laurent Aimar's avatar
Laurent Aimar committed
845
846

    h->mb.i_mb_stride = h->sps->i_mb_width;
847
848
    h->mb.i_b8_stride = h->sps->i_mb_width * 2;
    h->mb.i_b4_stride = h->sps->i_mb_width * 4;
Laurent Aimar's avatar
Laurent Aimar committed
849

850
851
    h->mb.b_interlaced = h->param.b_interlaced;

852
853
854
855
    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
856
857

    /* 0 -> 3 top(4), 4 -> 6 : left(3) */
858
    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 7 * sizeof(int8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
859
860

    /* all coeffs */
861
    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 24 * sizeof(uint8_t) );
862
    CHECKED_MALLOC( h->mb.nnz_backup, h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t) );
Laurent Aimar's avatar
Laurent Aimar committed
863
864
865

    if( h->param.b_cabac )
    {
866
867
868
        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
        CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(int16_t) );
Laurent Aimar's avatar
Laurent Aimar committed
869
870
    }

871
    for( i=0; i<2; i++ )
872
    {
873
874
        int i_refs = X264_MIN(16, (i ? 1 : h->param.i_frame_reference) + h->param.b_bframe_pyramid) << h->param.b_interlaced;
        for( j=0; j < i_refs; j++ )
875
            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
876
    }
877

Loren Merritt's avatar
Loren Merritt committed
878
879
880
881
882
883
884
    for( i=0; i<=h->param.b_interlaced; i++ )
        for( j=0; j<3; j++ )
        {
            CHECKED_MALLOC( h->mb.intra_border_backup[i][j], h->fdec->i_stride[j] );
            h->mb.intra_border_backup[i][j] += 8;
        }

Loren Merritt's avatar
Loren Merritt committed
885
    /* init with not available (for top right idx=7,15) */
Laurent Aimar's avatar
Laurent Aimar committed
886
887
    memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
    memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
888
889
890

    return 0;
fail: return -1;
Laurent Aimar's avatar
Laurent Aimar committed
891
892
893
}
void x264_macroblock_cache_end( x264_t *h )
{
894
    int i, j;
Loren Merritt's avatar
Loren Merritt committed
895
896
897
    for( i=0; i<=h->param.b_interlaced; i++ )
        for( j=0; j<3; j++ )
            x264_free( h->mb.intra_border_backup[i][j] - 8 );
898
    for( i=0; i<2; i++ )
Loren Merritt's avatar
Loren Merritt committed
899
        for( j=0; j<32; j++ )
900
            x264_free( h->mb.mvr[i][j] );
Laurent Aimar's avatar
Laurent Aimar committed
901
902
903
904
905
906
907
908
    if( h->param.b_cabac )
    {
        x264_free( h->mb.chroma_pred_mode );
        x264_free( h->mb.mvd[0] );
        x264_free( h->mb.mvd[1] );
    }
    x264_free( h->mb.intra4x4_pred_mode );
    x264_free( h->mb.non_zero_count );
909
    x264_free( h->mb.nnz_backup );
910
    x264_free( h->mb.mb_transform_size );
911
    x264_free( h->mb.skipbp );
Laurent Aimar's avatar
Laurent Aimar committed
912
913
914
    x264_free( h->mb.cbp );
    x264_free( h->mb.qp );
}
915
916
void x264_macroblock_slice_init( x264_t *h )
{
917
    int i, j;
918
919
920
921
922

    h->mb.mv[0] = h->fdec->mv[0];
    h->mb.mv[1] = h->fdec->mv[1];
    h->mb.ref[0] = h->fdec->ref[0];
    h->mb.ref[1] = h->fdec->ref[1];
923
    h->mb.type = h->fdec->mb_type;
924
925
926
927
928
929
930
931
932

    h->fdec->i_ref[0] = h->i_ref0;
    h->fdec->i_ref[1] = h->i_ref1;
    for( i = 0; i < h->i_ref0; i++ )
        h->fdec->ref_poc[0][i] = h->fref0[i]->i_poc;
    if( h->sh.i_type == SLICE_TYPE_B )
    {
        for( i = 0; i < h->i_ref1; i++ )
            h->fdec->ref_poc[1][i] = h->fref1[i]->i_poc;
933
934
935
936
937
938
939
940
941
942
943
944
945
946

        h->mb.map_col_to_list0[-1] = -1;
        h->mb.map_col_to_list0[-2] = -2;
        for( i = 0; i < h->fref1[0]->i_ref[0]; i++ )
        {
            int poc = h->fref1[0]->ref_poc[0][i];
            h->mb.map_col_to_list0[i] = -2;
            for( j = 0; j < h->i_ref0; j++ )
                if( h->fref0[j]->i_poc == poc )
                {
                    h->mb.map_col_to_list0[i] = j;
                    break;
                }
        }
947
    }
948
949
    if( h->sh.i_type == SLICE_TYPE_P )
        memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
950
951

    setup_inverse_delta_pocs( h );
952
}
Laurent Aimar's avatar
Laurent Aimar committed
953

954
955
956
957
958
959
960
961
962
void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
{
    int stride_y  = fenc->i_stride[0];
    int stride_uv = fenc->i_stride[1];
    int off_y = 16 * (i_mb_x + i_mb_y * stride_y);
    int off_uv = 8 * (i_mb_x + i_mb_y * stride_uv);
    h->mc.prefetch_fenc( fenc->plane[0]+off_y, stride_y,
                         fenc->plane[1+(i_mb_x&1)]+off_uv, stride_uv, i_mb_x );
}
Laurent Aimar's avatar
Laurent Aimar committed
963

964
965
966
967
968
969
970
static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src )
{
    int i;
    for(i=0; i<8; i++)
        dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}

971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb_x, int i_mb_y, int i)
{
    const int w = (i == 0 ? 16 : 8);
    const int i_stride = h->fdec->i_stride[i];
    const int i_stride2 = i_stride << h->mb.b_interlaced;
    const int i_pix_offset = h->mb.b_interlaced
                           ? w * (i_mb_x + (i_mb_y&~1) * i_stride) + (i_mb_y&1) * i_stride
                           : w * (i_mb_x + i_mb_y * i_stride);
    int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
    const uint8_t *intra_fdec = &h->mb.intra_border_backup[i_mb_y & h->sh.b_mbaff][i][i_mb_x*16>>!!i];
    x264_frame_t **fref[2] = { h->fref0, h->fref1 };
    int j, k, l;
    if( h->mb.b_interlaced )
        ref_pix_offset[1] += (1-2*(i_mb_y&1)) * i_stride;
    h->mb.pic.i_stride[i] = i_stride2;
    h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
        &h->fenc->plane[i][i_pix_offset], i_stride2, w );
    memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
    if( h->mb.b_interlaced )
    {
        const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];
        for( j = 0; j < w; j++ )
            h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
    }
    for( l=0; l<2; l++ )
    {
        for( j=0; j<h->mb.pic.i_fref[l]; j++ )
        {
            h->mb.pic.p_fref[l][j][i==0 ? 0:i+3] = &fref[l][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];
            if( i == 0 )
                for( k = 1; k < 4; k++ )
                    h->mb.pic.p_fref[l][j][k] = &fref[l][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];
        }
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
1007
1008
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
{
Loren Merritt's avatar
Loren Merritt committed
1009
    int i_mb_xy = i_mb_y * h->mb.i_mb_stride + i_mb_x;
1010
1011
1012
1013
1014
1015
    int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x);
    int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x);
    int i_top_y = i_mb_y - (1 << h->mb.b_interlaced);
    int i_top_xy = i_top_y * h->mb.i_mb_stride + i_mb_x;
    int i_top_4x4 = (4*i_top_y+3) * h->mb.i_b4_stride + 4*i_mb_x;
    int i_top_8x8 = (2*i_top_y+1) * h->mb.i_b8_stride + 2*i_mb_x;
Laurent Aimar's avatar
Laurent Aimar committed
1016
1017
1018
1019
1020
1021
    int i_left_xy = -1;
    int i_top_type = -1;    /* gcc warn */
    int i_left_type= -1;

    int i;

1022
1023
1024
    assert( h->mb.i_b8_stride == 2*h->mb.i_mb_stride );
    assert( h->mb.i_b4_stride == 4*h->mb.i_mb_stride );

Laurent Aimar's avatar
Laurent Aimar committed
1025
1026
1027
    /* init index */
    h->mb.i_mb_x = i_mb_x;
    h->mb.i_mb_y = i_mb_y;
Loren Merritt's avatar
Loren Merritt committed
1028
    h->mb.i_mb_xy = i_mb_xy;
1029
1030
    h->mb.i_b8_xy = i_mb_8x8;
    h->mb.i_b4_xy = i_mb_4x4;
1031
    h->mb.i_mb_top_xy = i_top_xy;
Laurent Aimar's avatar
Laurent Aimar committed
1032
1033
1034
    h->mb.i_neighbour = 0;

    /* load cache */
1035
    if( i_top_xy >= h->sh.i_first_mb )
Laurent Aimar's avatar
Laurent Aimar committed
1036
    {
Loren Merritt's avatar
Loren Merritt committed
1037
        h->mb.i_mb_type_top =
Laurent Aimar's avatar
Laurent Aimar committed
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
        i_top_type= h->mb.type[i_top_xy];

        h->mb.i_neighbour |= MB_TOP;

        /* load intra4x4 */
        h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = h->mb.intra4x4_pred_mode[i_top_xy][0];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[1] - 8] = h->mb.intra4x4_pred_mode[i_top_xy][1];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[4] - 8] = h->mb.intra4x4_pred_mode[i_top_xy][2];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[5] - 8] = h->mb.intra4x4_pred_mode[i_top_xy][3];

        /* load non_zero_count */
        h->mb.cache.non_zero_count[x264_scan8[0] - 8] = h->mb.non_zero_count[i_top_xy][10];
        h->mb.cache.non_zero_count[x264_scan8[1] - 8] = h->mb.non_zero_count[i_top_xy][11];
        h->mb.cache.non_zero_count[x264_scan8[4] - 8] = h->mb.non_zero_count[i_top_xy][14];
        h->mb.cache.non_zero_count[x264_scan8[5] - 8] = h->mb.non_zero_count[i_top_xy][15];

        h->mb.cache.non_zero_count[x264_scan8[16+0] - 8] = h->mb.non_zero_count[i_top_xy][16+2];
        h->mb.cache.non_zero_count[x264_scan8[16+1] - 8] = h->mb.non_zero_count[i_top_xy][16+3];

        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 8] = h->mb.non_zero_count[i_top_xy][16+4+2];
        h->mb.cache.non_zero_count[x264_scan8[16+4+1] - 8] = h->mb.non_zero_count[i_top_xy][16+4+3];
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
1062
1063
        h->mb.i_mb_type_top = -1;
        
Laurent Aimar's avatar
Laurent Aimar committed
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
        /* load intra4x4 */
        h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[1] - 8] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[4] - 8] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[5] - 8] = -1;

        /* load non_zero_count */
        h->mb.cache.non_zero_count[x264_scan8[0] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[1] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[4] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[5] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[16+0] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[16+1] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 8] =
        h->mb.cache.non_zero_count[x264_scan8[16+4+1] - 8] = 0x80;

    }

Loren Merritt's avatar
Loren Merritt committed
1082
    if( i_mb_x > 0 && i_mb_xy > h->sh.i_first_mb )
Laurent Aimar's avatar
Laurent Aimar committed
1083
    {
Loren Merritt's avatar
Loren Merritt committed
1084
        i_left_xy = i_mb_xy - 1;
Loren Merritt's avatar
Loren Merritt committed
1085
        h->mb.i_mb_type_left =
Loren Merritt's avatar
Loren Merritt committed
1086
        i_left_type = h->mb.type[i_left_xy];
Laurent Aimar's avatar
Laurent Aimar committed
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109

        h->mb.i_neighbour |= MB_LEFT;

        /* load intra4x4 */
        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][4];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][5];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][6];
        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][3];

        /* load non_zero_count */
        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = h->mb.non_zero_count[i_left_xy][5];
        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = h->mb.non_zero_count[i_left_xy][7];
        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = h->mb.non_zero_count[i_left_xy][13];
        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = h->mb.non_zero_count[i_left_xy][15];

        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = h->mb.non_zero_count[i_left_xy][16+1];
        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = h->mb.non_zero_count[i_left_xy][16+3];

        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = h->mb.non_zero_count[i_left_xy][16+4+1];
        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = h->mb.non_zero_count[i_left_xy][16+4+3];
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
1110
1111
        h->mb.i_mb_type_left = -1;

Laurent Aimar's avatar
Laurent Aimar committed
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] =
        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = -1;

        /* load non_zero_count */
        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[10] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] =
        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80;
    }

Loren Merritt's avatar
Loren Merritt committed
1128
1129
    if( i_mb_x < h->sps->i_mb_width - 1 && i_top_xy + 1 >= h->sh.i_first_mb )
    {
Laurent Aimar's avatar
Laurent Aimar committed
1130
        h->mb.i_neighbour |= MB_TOPRIGHT;
Loren Merritt's avatar
Loren Merritt committed
1131
1132
1133
1134
1135
1136
        h->mb.i_mb_type_topright = h->mb.type[ i_top_xy + 1 ];
    }
    else
        h->mb.i_mb_type_topright = -1;
    if( i_mb_x > 0 && i_top_xy - 1 >= h->sh.i_first_mb )
    {
Loren Merritt's avatar
Loren Merritt committed
1137
        h->mb.i_neighbour |= MB_TOPLEFT;
Loren Merritt's avatar
Loren Merritt committed
1138
1139
1140
1141
        h->mb.i_mb_type_topleft = h->mb.type[ i_top_xy - 1 ];
    }
    else
        h->mb.i_mb_type_topleft = -1;
Laurent Aimar's avatar
Laurent Aimar committed
1142

1143
    if( h->pps->b_transform_8x8_mode )
1144
    {
1145
1146
1147
        h->mb.cache.i_neighbour_transform_size =
            ( i_left_type >= 0 && h->mb.mb_transform_size[i_left_xy] )
          + ( i_top_type  >= 0 && h->mb.mb_transform_size[i_top_xy]  );
1148
1149
    }

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
    if( h->sh.b_mbaff )
    {
        h->mb.pic.i_fref[0] = h->i_ref0 << h->mb.b_interlaced;
        h->mb.pic.i_fref[1] = h->i_ref1 << h->mb.b_interlaced;
        h->mb.cache.i_neighbour_interlaced =
            !!(h->mb.i_neighbour & MB_LEFT)
          + !!(h->mb.i_neighbour & MB_TOP);
    }

    /* fdec:      fenc:
     * yyyyyyy
     * yYYYY      YYYY
     * yYYYY      YYYY
     * yYYYY      YYYY
     * yYYYY      YYYY
     * uuu vvv    UUVV
     * uUU vVV    UUVV
     * uUU vVV
     */
    h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
    h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
    h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
    h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
    h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
    h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;

1176
1177
1178
1179
1180
1181
1182
1183
    if( !h->mb.b_interlaced )
    {
        copy_column8( h->mb.pic.p_fdec[0]-1, h->mb.pic.p_fdec[0]+15 );
        copy_column8( h->mb.pic.p_fdec[0]-1+8*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+8*FDEC_STRIDE );
        copy_column8( h->mb.pic.p_fdec[1]-1, h->mb.pic.p_fdec[1]+7 );
        copy_column8( h->mb.pic.p_fdec[2]-1, h->mb.pic.p_fdec[2]+7 );
    }

1184
    /* load picture pointers */
1185
1186
1187
    x264_macroblock_load_pic_pointers( h, i_mb_x, i_mb_y, 0 );