me.c 31.6 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3 4 5 6 7
/*****************************************************************************
 * me.c: h264 encoder library (Motion Estimation)
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "me.h"

28 29 30
/* presets selected from good points on the speed-vs-quality curve of several test videos
 * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
 * where me_* are the number of EPZS iterations run on all candidate block types,
31 32 33
 * and refine_* are run only on the winner.
 * the subme=7 values are much higher because any amount of satd search makes
 * up its time by reducing the number of rd iterations. */
Loren Merritt's avatar
Loren Merritt committed
34
static const int subpel_iterations[][4] = 
35 36
   {{1,0,0,0},
    {1,1,0,0},
37
    {0,1,1,0},
38 39
    {0,2,1,0},
    {0,2,1,1},
40
    {0,2,1,2},
41
    {0,0,2,2},
42 43 44 45 46 47 48
    {0,0,4,10}};

/* (x-1)%6 */
static const int mod6m1[8] = {5,0,1,2,3,4,5,0};
/* radius 2 hexagon. repeated entries are to avoid having to compute mod6 every time. */
static const int hex2[8][2] = {{-1,-2}, {-2,0}, {-1,2}, {1,2}, {2,0}, {1,-2}, {-1,-2}, {-2,0}};
static const int square1[8][2] = {{0,-1}, {0,1}, {-1,0}, {1,0}, {-1,-1}, {1,1}, {-1,1}, {1,-1}};
49

50
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );
51

Loren Merritt's avatar
Loren Merritt committed
52 53 54 55 56 57 58 59 60 61 62
#define BITS_MVD( mx, my )\
    (p_cost_mvx[(mx)<<2] + p_cost_mvy[(my)<<2])

#define COST_MV( mx, my )\
{\
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE,\
                   &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] )\
             + BITS_MVD(mx,my);\
    COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my );\
}

Loren Merritt's avatar
Loren Merritt committed
63
#define COST_MV_HPEL( mx, my ) \
64 65 66 67 68 69 70 71
{ \
    int stride = 16; \
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
    COPY3_IF_LT( bpred_cost, cost, bpred_mx, mx, bpred_my, my ); \
}

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
#define COST_MV_X3_DIR( m0x, m0y, m1x, m1y, m2x, m2y, costs )\
{\
    uint8_t *pix_base = p_fref + bmx + bmy*m->i_stride[0];\
    h->pixf.sad_x3[i_pixel]( m->p_fenc[0],\
        pix_base + (m0x) + (m0y)*m->i_stride[0],\
        pix_base + (m1x) + (m1y)*m->i_stride[0],\
        pix_base + (m2x) + (m2y)*m->i_stride[0],\
        m->i_stride[0], costs );\
    (costs)[0] += BITS_MVD( bmx+(m0x), bmy+(m0y) );\
    (costs)[1] += BITS_MVD( bmx+(m1x), bmy+(m1y) );\
    (costs)[2] += BITS_MVD( bmx+(m2x), bmy+(m2y) );\
}

#define COST_MV_X4( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
{\
    uint8_t *pix_base = p_fref + omx + omy*m->i_stride[0];\
    h->pixf.sad_x4[i_pixel]( m->p_fenc[0],\
        pix_base + (m0x) + (m0y)*m->i_stride[0],\
        pix_base + (m1x) + (m1y)*m->i_stride[0],\
        pix_base + (m2x) + (m2y)*m->i_stride[0],\
        pix_base + (m3x) + (m3y)*m->i_stride[0],\
        m->i_stride[0], costs );\
    costs[0] += BITS_MVD( omx+(m0x), omy+(m0y) );\
    costs[1] += BITS_MVD( omx+(m1x), omy+(m1y) );\
    costs[2] += BITS_MVD( omx+(m2x), omy+(m2y) );\
    costs[3] += BITS_MVD( omx+(m3x), omy+(m3y) );\
    COPY3_IF_LT( bcost, costs[0], bmx, omx+(m0x), bmy, omy+(m0y) );\
    COPY3_IF_LT( bcost, costs[1], bmx, omx+(m1x), bmy, omy+(m1y) );\
    COPY3_IF_LT( bcost, costs[2], bmx, omx+(m2x), bmy, omy+(m2y) );\
    COPY3_IF_LT( bcost, costs[3], bmx, omx+(m3x), bmy, omy+(m3y) );\
}

Loren Merritt's avatar
Loren Merritt committed
104 105 106 107 108 109 110 111
#define COST_MV_X4_ABS( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
{\
    h->pixf.sad_x4[i_pixel]( m->p_fenc[0],\
        p_fref + (m0x) + (m0y)*m->i_stride[0],\
        p_fref + (m1x) + (m1y)*m->i_stride[0],\
        p_fref + (m2x) + (m2y)*m->i_stride[0],\
        p_fref + (m3x) + (m3y)*m->i_stride[0],\
        m->i_stride[0], costs );\
Loren Merritt's avatar
Loren Merritt committed
112 113 114 115
    costs[0] += p_cost_mvx[m0x<<2]; /* no cost_mvy */\
    costs[1] += p_cost_mvx[m1x<<2];\
    costs[2] += p_cost_mvx[m2x<<2];\
    costs[3] += p_cost_mvx[m3x<<2];\
Loren Merritt's avatar
Loren Merritt committed
116 117 118 119 120 121
    COPY3_IF_LT( bcost, costs[0], bmx, m0x, bmy, m0y );\
    COPY3_IF_LT( bcost, costs[1], bmx, m1x, bmy, m1y );\
    COPY3_IF_LT( bcost, costs[2], bmx, m2x, bmy, m2y );\
    COPY3_IF_LT( bcost, costs[3], bmx, m3x, bmy, m3y );\
}

122 123 124 125 126 127 128 129
/*  1  */
/* 101 */
/*  1  */
#define DIA1_ITER( mx, my )\
{\
    omx = mx; omy = my;\
    COST_MV_X4( 0,-1, 0,1, -1,0, 1,0 );\
}
130

Loren Merritt's avatar
Loren Merritt committed
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
#define CROSS( start, x_max, y_max )\
{\
    i = start;\
    if( x_max <= X264_MIN(mv_x_max-omx, omx-mv_x_min) )\
        for( ; i < x_max-2; i+=4 )\
            COST_MV_X4( i,0, -i,0, i+2,0, -i-2,0 );\
    for( ; i < x_max; i+=2 )\
    {\
        if( omx+i <= mv_x_max )\
            COST_MV( omx+i, omy );\
        if( omx-i >= mv_x_min )\
            COST_MV( omx-i, omy );\
    }\
    i = start;\
    if( y_max <= X264_MIN(mv_y_max-omy, omy-mv_y_min) )\
        for( ; i < y_max-2; i+=4 )\
            COST_MV_X4( 0,i, 0,-i, 0,i+2, 0,-i-2 );\
    for( ; i < y_max; i+=2 )\
    {\
        if( omy+i <= mv_y_max )\
            COST_MV( omx, omy+i );\
        if( omy-i >= mv_y_min )\
            COST_MV( omx, omy-i );\
    }\
}
156

157
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
Laurent Aimar's avatar
Laurent Aimar committed
158
{
159 160
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
Laurent Aimar's avatar
Laurent Aimar committed
161
    const int i_pixel = m->i_pixel;
162
    int i_me_range = h->param.analyse.i_me_range;
163
    int bmx, bmy, bcost;
164
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
165
    int omx, omy, pmx, pmy;
166
    uint8_t *p_fref = m->p_fref[0];
167 168
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    
169
    int i, j;
170
    int dir;
171
    int costs[6];
Laurent Aimar's avatar
Laurent Aimar committed
172

173 174 175 176
    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];
Laurent Aimar's avatar
Laurent Aimar committed
177

Loren Merritt's avatar
Loren Merritt committed
178 179
#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )

180 181 182
    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

183 184 185 186
    bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 );
    bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
187
    bcost = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
188

189
    /* try extra predictors if provided */
190
    if( h->mb.i_subpel_refine >= 3 )
Laurent Aimar's avatar
Laurent Aimar committed
191
    {
Loren Merritt's avatar
Loren Merritt committed
192
        COST_MV_HPEL( bmx, bmy );
193 194 195 196 197
        for( i = 0; i < i_mvc; i++ )
        {
             const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 );
             const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
             if( mx != bpred_mx || my != bpred_my )
Loren Merritt's avatar
Loren Merritt committed
198
                 COST_MV_HPEL( mx, my );
199 200 201 202
        }
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
203
    }
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
    else
    {
        /* check the MVP */
        COST_MV( pmx, pmy );
        /* I don't know why this helps */
        bcost -= BITS_MVD(bmx,bmy);
        
        for( i = 0; i < i_mvc; i++ )
        {
             const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max );
             const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max );
             if( mx != bmx || my != bmy )
                 COST_MV( mx, my );
        }
    }
    
220 221
    COST_MV( 0, 0 );

Loren Merritt's avatar
Loren Merritt committed
222 223
    switch( h->mb.i_me_method )
    {
224
    case X264_ME_DIA:
225 226
        /* diamond search, radius 1 */
        for( i = 0; i < i_me_range; i++ )
227
        {
228
            DIA1_ITER( bmx, bmy );
229 230
            if( bmx == omx && bmy == omy )
                break;
Loren Merritt's avatar
Loren Merritt committed
231 232
            if( !CHECK_MVRANGE(bmx, bmy) )
                break;
233 234
        }
        break;
235

236
    case X264_ME_HEX:
237
me_hex2:
238
        /* hexagon search, radius 2 */
239
#if 0
240 241
        for( i = 0; i < i_me_range/2; i++ )
        {
242 243 244 245 246 247 248
            omx = bmx; omy = bmy;
            COST_MV( omx-2, omy   );
            COST_MV( omx-1, omy+2 );
            COST_MV( omx+1, omy+2 );
            COST_MV( omx+2, omy   );
            COST_MV( omx+1, omy-2 );
            COST_MV( omx-1, omy-2 );
249 250
            if( bmx == omx && bmy == omy )
                break;
Loren Merritt's avatar
Loren Merritt committed
251 252
            if( !CHECK_MVRANGE(bmx, bmy) )
                break;
Laurent Aimar's avatar
Laurent Aimar committed
253
        }
254 255
#else
        /* equivalent to the above, but eliminates duplicate candidates */
256 257 258 259 260 261 262 263 264 265 266 267 268
        dir = -2;

        /* hexagon */
        COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
        COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
        COPY2_IF_LT( bcost, costs[0], dir, 0 );
        COPY2_IF_LT( bcost, costs[1], dir, 1 );
        COPY2_IF_LT( bcost, costs[2], dir, 2 );
        COPY2_IF_LT( bcost, costs[3], dir, 3 );
        COPY2_IF_LT( bcost, costs[4], dir, 4 );
        COPY2_IF_LT( bcost, costs[5], dir, 5 );

        if( dir != -2 )
269
        {
270 271 272
            bmx += hex2[dir+1][0];
            bmy += hex2[dir+1][1];
            /* half hexagon, not overlapping the previous iteration */
Loren Merritt's avatar
Loren Merritt committed
273
            for( i = 1; i < i_me_range/2 && CHECK_MVRANGE(bmx, bmy); i++ )
274
            {
275
                const int odir = mod6m1[dir+1];
276 277 278 279 280 281 282 283 284
                COST_MV_X3_DIR( hex2[odir+0][0], hex2[odir+0][1],
                                hex2[odir+1][0], hex2[odir+1][1],
                                hex2[odir+2][0], hex2[odir+2][1],
                                costs );
                dir = -2;
                COPY2_IF_LT( bcost, costs[0], dir, odir-1 );
                COPY2_IF_LT( bcost, costs[1], dir, odir   );
                COPY2_IF_LT( bcost, costs[2], dir, odir+1 );
                if( dir == -2 )
285
                    break;
286 287
                bmx += hex2[dir+1][0];
                bmy += hex2[dir+1][1];
288 289 290
            }
        }
#endif
291
        /* square refine */
292 293 294
        omx = bmx; omy = bmy;
        COST_MV_X4(  0,-1,  0,1, -1,0, 1,0 );
        COST_MV_X4( -1,-1, -1,1, 1,-1, 1,1 );
295
        break;
296 297

    case X264_ME_UMH:
298 299
        {
            /* Uneven-cross Multi-Hexagon-grid Search
300
             * as in JM, except with different early termination */
301

302 303 304 305
            static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };

            int ucost1, ucost2;
            int cross_start = 1;
306

307
            /* refine predictors */
308
            ucost1 = bcost;
309 310 311
            DIA1_ITER( pmx, pmy );
            if( pmx || pmy )
                DIA1_ITER( 0, 0 );
312

313 314
            if(i_pixel == PIXEL_4x4)
                goto me_hex2;
315

316
            ucost2 = bcost;
317 318
            if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) )
                DIA1_ITER( bmx, bmy );
319 320
            if( bcost == ucost2 )
                cross_start = 3;
321
            omx = bmx; omy = bmy;
322 323 324 325

            /* early termination */
#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) )
            if( bcost == ucost2 && SAD_THRESH(2000) )
326
            {
327 328
                COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 );
                COST_MV_X4( 2, 0, -1, 1, 1, 1,  0,2 );
329 330 331 332 333 334
                if( bcost == ucost1 && SAD_THRESH(500) )
                    break;
                if( bcost == ucost2 )
                {
                    int range = (i_me_range>>1) | 1;
                    CROSS( 3, range, range );
335 336
                    COST_MV_X4( -1,-2, 1,-2, -2,-1, 2,-1 );
                    COST_MV_X4( -2, 1, 2, 1, -1, 2, 1, 2 );
337 338 339 340
                    if( bcost == ucost2 )
                        break;
                    cross_start = range + 2;
                }
341
            }
342 343

            /* adaptive search range */
344
            if( i_mvc )
345
            {
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
                /* range multipliers based on casual inspection of some statistics of
                 * average distance between current predictor and final mv found by ESA.
                 * these have not been tuned much by actual encoding. */
                static const int range_mul[4][4] =
                {
                    { 3, 3, 4, 4 },
                    { 3, 4, 4, 4 },
                    { 4, 4, 4, 5 },
                    { 4, 4, 5, 6 },
                };
                int mvd;
                int sad_ctx, mvd_ctx;

                if( i_mvc == 1 )
                {
                    if( i_pixel == PIXEL_16x16 )
                        /* mvc is probably the same as mvp, so the difference isn't meaningful.
                         * but prediction usually isn't too bad, so just use medium range */
                        mvd = 25;
                    else
                        mvd = abs( m->mvp[0] - mvc[0][0] )
                            + abs( m->mvp[1] - mvc[0][1] );
                }
                else
                {
                    /* calculate the degree of agreement between predictors. */
                    /* in 16x16, mvc includes all the neighbors used to make mvp,
                     * so don't count mvp separately. */
                    int i_denom = i_mvc - 1;
                    mvd = 0;
                    if( i_pixel != PIXEL_16x16 )
                    {
                        mvd = abs( m->mvp[0] - mvc[0][0] )
                            + abs( m->mvp[1] - mvc[0][1] );
                        i_denom++;
                    }
                    for( i = 0; i < i_mvc-1; i++ )
                        mvd += abs( mvc[i][0] - mvc[i+1][0] )
                             + abs( mvc[i][1] - mvc[i+1][1] );
                    mvd /= i_denom; //FIXME idiv
                }

                sad_ctx = SAD_THRESH(1000) ? 0
                        : SAD_THRESH(2000) ? 1
                        : SAD_THRESH(4000) ? 2 : 3;
                mvd_ctx = mvd < 10 ? 0
                        : mvd < 20 ? 1
                        : mvd < 40 ? 2 : 3;

                i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4;
396 397
            }

398 399 400 401
            /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy.
             * we are still centered on the same place as the DIA2. is this desirable? */
            CROSS( cross_start, i_me_range, i_me_range/2 );

402 403
            /* 5x5 ESA */
            omx = bmx; omy = bmy;
404 405 406 407 408 409 410
            if( bcost != ucost2 )
                COST_MV_X4(  1, 0,  0, 1, -1, 0,  0,-1 );
            COST_MV_X4(  1, 1, -1, 1, -1,-1,  1,-1 );
            COST_MV_X4(  2,-1,  2, 0,  2, 1,  2, 2 );
            COST_MV_X4(  1, 2,  0, 2, -1, 2, -2, 2 );
            COST_MV_X4( -2, 1, -2, 0, -2,-1, -2,-2 );
            COST_MV_X4( -1,-2,  0,-2,  1,-2,  2,-2 );
411

412 413 414 415
            /* hexagon grid */
            omx = bmx; omy = bmy;
            for( i = 1; i <= i_me_range/4; i++ )
            {
416 417 418 419 420 421 422
                static const int hex4[16][2] = {
                    {-4, 2}, {-4, 1}, {-4, 0}, {-4,-1}, {-4,-2},
                    { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
                    { 2, 3}, { 0, 4}, {-2, 3},
                    {-2,-3}, { 0,-4}, { 2,-3},
                };

423 424
                if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min,
                                     mv_y_max-omy, omy-mv_y_min ) )
425
                {
426 427 428 429
                    for( j = 0; j < 16; j++ )
                    {
                        int mx = omx + hex4[j][0]*i;
                        int my = omy + hex4[j][1]*i;
Loren Merritt's avatar
Loren Merritt committed
430
                        if( CHECK_MVRANGE(mx, my) )
431
                            COST_MV( mx, my );
432 433 434 435
                    }
                }
                else
                {
436 437 438 439
                    COST_MV_X4( -4*i, 2*i, -4*i, 1*i, -4*i, 0*i, -4*i,-1*i );
                    COST_MV_X4( -4*i,-2*i,  4*i,-2*i,  4*i,-1*i,  4*i, 0*i );
                    COST_MV_X4(  4*i, 1*i,  4*i, 2*i,  2*i, 3*i,  0*i, 4*i );
                    COST_MV_X4( -2*i, 3*i, -2*i,-3*i,  0*i,-4*i,  2*i,-3*i );
440 441
                }
            }
442 443 444
            if( bmy <= mv_y_max )
                goto me_hex2;
            break;
445 446
        }

447
    case X264_ME_ESA:
448
        {
449 450 451 452
            const int min_x = X264_MAX( bmx - i_me_range, mv_x_min);
            const int min_y = X264_MAX( bmy - i_me_range, mv_y_min);
            const int max_x = X264_MIN( bmx + i_me_range, mv_x_max);
            const int max_y = X264_MIN( bmy + i_me_range, mv_y_max);
453 454 455 456 457 458 459 460 461 462 463
            int mx, my;
#if 0
            /* plain old exhaustive search */
            for( my = min_y; my <= max_y; my++ )
                for( mx = min_x; mx <= max_x; mx++ )
                    COST_MV( mx, my );
#else
            /* successive elimination by comparing DC before a full SAD,
             * because sum(abs(diff)) >= abs(diff(sum)). */
            const int stride = m->i_stride[0];
            static uint8_t zero[16*16] = {0,};
Loren Merritt's avatar
Loren Merritt committed
464
            uint16_t *sums_base = m->integral;
Loren Merritt's avatar
Loren Merritt committed
465 466
            int enc_dc[4];
            int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
Loren Merritt's avatar
Loren Merritt committed
467
            int delta = x264_pixel_size[sad_size].w;
468
            uint16_t *ads = x264_malloc((max_x-min_x+8) * sizeof(uint16_t));
Loren Merritt's avatar
Loren Merritt committed
469 470 471

            h->pixf.sad_x4[sad_size]( zero, m->p_fenc[0], m->p_fenc[0]+delta,
                m->p_fenc[0]+delta*FENC_STRIDE, m->p_fenc[0]+delta+delta*FENC_STRIDE,
Loren Merritt's avatar
Loren Merritt committed
472
                FENC_STRIDE, enc_dc );
Loren Merritt's avatar
Loren Merritt committed
473
            if( delta == 4 )
474
                sums_base += stride * (h->fenc->i_lines[0] + PADV*2);
Loren Merritt's avatar
Loren Merritt committed
475 476 477 478
            if( i_pixel == PIXEL_16x16 || i_pixel == PIXEL_8x16 || i_pixel == PIXEL_4x8 )
                delta *= stride;
            if( i_pixel == PIXEL_8x16 || i_pixel == PIXEL_4x8 )
                enc_dc[1] = enc_dc[2];
479

Loren Merritt's avatar
Loren Merritt committed
480
            for( my = min_y; my <= max_y; my++ )
Loren Merritt's avatar
Loren Merritt committed
481
            {
Loren Merritt's avatar
Loren Merritt committed
482 483 484 485 486
                int mvs[3], i_mvs=0;
                bcost -= p_cost_mvy[my<<2];
                h->pixf.ads[i_pixel]( enc_dc, sums_base + min_x + my * stride, delta,
                                      ads, max_x-min_x+1 );
                for( mx = min_x; mx <= max_x; mx++ )
Loren Merritt's avatar
Loren Merritt committed
487
                {
Loren Merritt's avatar
Loren Merritt committed
488 489 490 491 492 493 494 495 496 497
                    if( ads[mx-min_x] < bcost - p_cost_mvx[mx<<2] )
                    {
                        if( i_mvs == 3 )
                        {
                            COST_MV_X4_ABS( mvs[0],my, mvs[1],my, mvs[2],my, mx,my );
                            i_mvs = 0;
                        }
                        else
                            mvs[i_mvs++] = mx;
                    }
Loren Merritt's avatar
Loren Merritt committed
498
                }
Loren Merritt's avatar
Loren Merritt committed
499 500 501
                bcost += p_cost_mvy[my<<2];
                for( i=0; i<i_mvs; i++ )
                    COST_MV( mvs[i], my );
502
            }
503 504

            x264_free(ads);
505
#endif
Laurent Aimar's avatar
Laurent Aimar committed
506
        }
507
        break;
Laurent Aimar's avatar
Laurent Aimar committed
508 509 510
    }

    /* -> qpel mv */
511 512 513 514 515 516 517 518 519 520 521 522
    if( bpred_cost < bcost )
    {
        m->mv[0] = bpred_mx;
        m->mv[1] = bpred_my;
        m->cost = bpred_cost;
    }
    else
    {
        m->mv[0] = bmx << 2;
        m->mv[1] = bmy << 2;
        m->cost = bcost;
    }
Laurent Aimar's avatar
Laurent Aimar committed
523 524

    /* compute the real cost */
525
    m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
526
    if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 )
527
        m->cost += m->cost_mv;
Loren Merritt's avatar
Loren Merritt committed
528

529
    /* subpel refine */
530
    if( h->mb.i_subpel_refine >= 2 )
531
    {
532 533 534
        int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
        int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
        refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
535
    }
Loren Merritt's avatar
Loren Merritt committed
536 537
    else if( m->mv[1] > h->mb.mv_max_spel[1] )
        m->mv[1] = h->mb.mv_max_spel[1];
Laurent Aimar's avatar
Laurent Aimar committed
538
}
539
#undef COST_MV
Laurent Aimar's avatar
Laurent Aimar committed
540 541

void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
542
{
543 544
    int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
    int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
Loren Merritt's avatar
Loren Merritt committed
545 546 547 548

    if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P )
        m->cost -= m->i_ref_cost;
	
549
    refine_subpel( h, m, hpel, qpel, NULL, 1 );
550 551
}

Loren Merritt's avatar
Loren Merritt committed
552
#define COST_MV_SAD( mx, my ) \
553 554
{ \
    int stride = 16; \
Loren Merritt's avatar
Loren Merritt committed
555
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
556
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
557
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
Loren Merritt's avatar
Loren Merritt committed
558
    COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
559 560
}

561 562
#define COST_MV_SATD( mx, my, dir ) \
if( b_refine_qpel || (dir^1) != odir ) \
Loren Merritt's avatar
Loren Merritt committed
563 564
{ \
    int stride = 16; \
Loren Merritt's avatar
Loren Merritt committed
565
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
566
    int cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
Loren Merritt's avatar
Loren Merritt committed
567 568 569
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
    if( b_chroma_me && cost < bcost ) \
    { \
Loren Merritt's avatar
Loren Merritt committed
570 571
        h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
        cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix[0], 8 ); \
Loren Merritt's avatar
Loren Merritt committed
572 573
        if( cost < bcost ) \
        { \
Loren Merritt's avatar
Loren Merritt committed
574 575
            h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
            cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \
Loren Merritt's avatar
Loren Merritt committed
576 577 578 579 580
        } \
    } \
    if( cost < bcost ) \
    {                  \
        bcost = cost;  \
581 582
        bmx = mx;      \
        bmy = my;      \
583
        bdir = dir;    \
Loren Merritt's avatar
Loren Merritt committed
584 585 586
    } \
}

587
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
Laurent Aimar's avatar
Laurent Aimar committed
588 589 590
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
591 592
    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
Loren Merritt's avatar
Loren Merritt committed
593 594
    const int i_pixel = m->i_pixel;
    const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
Laurent Aimar's avatar
Laurent Aimar committed
595

596
    DECLARE_ALIGNED( uint8_t, pix[2][32*18], 16 ); // really 17x17, but round up for alignment
597 598
    int omx, omy;
    int i;
Laurent Aimar's avatar
Laurent Aimar committed
599 600 601

    int bmx = m->mv[0];
    int bmy = m->mv[1];
602
    int bcost = m->cost;
603
    int odir = -1, bdir;
604

605

Loren Merritt's avatar
Loren Merritt committed
606
    /* try the subpel component of the predicted mv */
607
    if( hpel_iters && h->mb.i_subpel_refine < 3 )
608
    {
609 610
        int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
        int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
611
        if( mx != bmx || my != bmy )
Loren Merritt's avatar
Loren Merritt committed
612
            COST_MV_SAD( mx, my );
613
    }
Loren Merritt's avatar
Loren Merritt committed
614 615

    /* halfpel diamond search */
616 617
    for( i = hpel_iters; i > 0; i-- )
    {
Loren Merritt's avatar
Loren Merritt committed
618 619
        int omx = bmx, omy = bmy;
        int costs[4];
620
        int stride = 32; // candidates are either all hpel or all qpel, so one stride is enough
Loren Merritt's avatar
Loren Merritt committed
621
        uint8_t *src0, *src1, *src2, *src3;
622 623 624 625
        src0 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, omx, omy-2, bw, bh+1 );
        src2 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[1], &stride, omx-2, omy, bw+4, bh );
        src1 = src0 + stride;
        src3 = src2 + 1;
Loren Merritt's avatar
Loren Merritt committed
626 627 628 629 630
        h->pixf.sad_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs );
        COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx  ] + p_cost_mvy[omy-2], bmy, omy-2 );
        COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx  ] + p_cost_mvy[omy+2], bmy, omy+2 );
        COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy  ], bmx, omx-2, bmy, omy );
        COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy  ], bmx, omx+2, bmy, omy );
631 632 633
        if( bmx == omx && bmy == omy )
            break;
    }
Loren Merritt's avatar
Loren Merritt committed
634

635
    if( !b_refine_qpel )
Laurent Aimar's avatar
Laurent Aimar committed
636
    {
Loren Merritt's avatar
Loren Merritt committed
637 638 639
        /* check for mvrange */
        if( bmy > h->mb.mv_max_spel[1] )
            bmy = h->mb.mv_max_spel[1];
640
        bcost = COST_MAX;
641
        COST_MV_SATD( bmx, bmy, -1 );
642
    }
Loren Merritt's avatar
Loren Merritt committed
643

644 645 646 647
    /* early termination when examining multiple reference frames */
    if( p_halfpel_thresh )
    {
        if( (bcost*7)>>3 > *p_halfpel_thresh )
648
        {
649 650 651 652 653 654 655 656 657 658
            m->cost = bcost;
            m->mv[0] = bmx;
            m->mv[1] = bmy;
            // don't need cost_mv
            return;
        }
        else if( bcost < *p_halfpel_thresh )
            *p_halfpel_thresh = bcost;
    }

Loren Merritt's avatar
Loren Merritt committed
659
    /* quarterpel diamond search */
660
    bdir = -1;
661 662
    for( i = qpel_iters; i > 0; i-- )
    {
663
        odir = bdir;
664 665
        omx = bmx;
        omy = bmy;
666 667 668 669
        COST_MV_SATD( omx, omy - 1, 0 );
        COST_MV_SATD( omx, omy + 1, 1 );
        COST_MV_SATD( omx - 1, omy, 2 );
        COST_MV_SATD( omx + 1, omy, 3 );
670 671
        if( bmx == omx && bmy == omy )
            break;
Laurent Aimar's avatar
Laurent Aimar committed
672 673
    }

Loren Merritt's avatar
Loren Merritt committed
674 675 676 677 678 679 680 681
    /* check for mvrange */
    if( bmy > h->mb.mv_max_spel[1] )
    {
        bmy = h->mb.mv_max_spel[1];
        bcost = COST_MAX;
        COST_MV_SATD( bmx, bmy, -1 );
    }

682
    m->cost = bcost;
Laurent Aimar's avatar
Laurent Aimar committed
683 684
    m->mv[0] = bmx;
    m->mv[1] = bmy;
685
    m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];
Laurent Aimar's avatar
Laurent Aimar committed
686
}
687

688 689 690 691 692 693 694 695
#define BIME_CACHE( dx, dy ) \
{ \
    int i = 4 + 3*dx + dy; \
    h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \
    h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \
}

#define BIME_CACHE2(a,b) \
Loren Merritt's avatar
Loren Merritt committed
696 697
    BIME_CACHE(a,b) \
    BIME_CACHE(-(a),-(b))
698 699 700 701 702 703 704 705 706 707 708 709 710

#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \
if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \
{ \
    int cost; \
    int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \
    int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \
    visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \
    memcpy( pix, pix0[i0], bs ); \
    if( i_weight == 32 ) \
        h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \
    else \
        h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \
711
    cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], FENC_STRIDE, pix, bw ) \
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
         + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \
         + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \
    if( cost < bcost ) \
    {                  \
        bcost = cost;  \
        bm0x = m0x;    \
        bm0y = m0y;    \
        bm1x = m1x;    \
        bm1y = m1y;    \
    } \
}

#define CHECK_BIDIR(a,b,c,d) \
    COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)

#define CHECK_BIDIR2(a,b,c,d) \
Loren Merritt's avatar
Loren Merritt committed
728 729
    CHECK_BIDIR(a,b,c,d) \
    CHECK_BIDIR(-(a),-(b),-(c),-(d))
730 731 732 733 734 735 736 737 738 739 740 741 742

#define CHECK_BIDIR8(a,b,c,d) \
    CHECK_BIDIR2(a,b,c,d) \
    CHECK_BIDIR2(b,c,d,a) \
    CHECK_BIDIR2(c,d,a,b) \
    CHECK_BIDIR2(d,a,b,c)

int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight )
{
    const int i_pixel = m0->i_pixel;
    const int bw = x264_pixel_size[i_pixel].w;
    const int bh = x264_pixel_size[i_pixel].h;
    const int bs = bw*bh;
743 744 745 746
    const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761
    DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
    DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    int bm0x = m0->mv[0], om0x = bm0x;
    int bm0y = m0->mv[1], om0y = bm0y;
    int bm1x = m1->mv[0], om1x = bm1x;
    int bm1y = m1->mv[1], om1y = bm1y;
    int bcost = COST_MAX;
    int pass = 0;
    uint8_t visited[8][8][8][8];
    memset( visited, 0, sizeof(visited) );

    BIME_CACHE( 0, 0 );
    CHECK_BIDIR( 0, 0, 0, 0 );

Loren Merritt's avatar
Loren Merritt committed
762 763 764 765
    if( bm0y > h->mb.mv_max_spel[1] - 8 ||
        bm1y > h->mb.mv_max_spel[1] - 8 )
        return bcost;

766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
    for( pass = 0; pass < 8; pass++ )
    {
        /* check all mv pairs that differ in at most 2 components from the current mvs. */
        /* doesn't do chroma ME. this probably doesn't matter, as the gains
         * from bidir ME are the same with and without chroma ME. */

        BIME_CACHE2( 1, 0 );
        BIME_CACHE2( 0, 1 );
        BIME_CACHE2( 1, 1 );
        BIME_CACHE2( 1,-1 );

        CHECK_BIDIR8( 0, 0, 0, 1 );
        CHECK_BIDIR8( 0, 0, 1, 1 );
        CHECK_BIDIR2( 0, 1, 0, 1 );
        CHECK_BIDIR2( 1, 0, 1, 0 );
        CHECK_BIDIR8( 0, 0,-1, 1 );
        CHECK_BIDIR2( 0,-1, 0, 1 );
        CHECK_BIDIR2(-1, 0, 1, 0 );

        if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y )
            break;

        om0x = bm0x;
        om0y = bm0y;
        om1x = bm1x;
        om1y = bm1y;
        BIME_CACHE( 0, 0 );
    }

    m0->mv[0] = bm0x;
    m0->mv[1] = bm0y;
    m1->mv[0] = bm1x;
    m1->mv[1] = bm1y;
    return bcost;
}
801

802 803
#undef COST_MV_SATD
#define COST_MV_SATD( mx, my, dst ) \
804
{ \
805 806 807 808 809 810 811
    int stride = 16; \
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw*4, bh*4 ); \
    dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
        + p_cost_mvx[mx] + p_cost_mvy[my]; \
    COPY1_IF_LT( bsatd, dst ); \
}

812
#define COST_MV_RD( mx, my, satd, do_dir, mdir ) \
813
{ \
814
    if( satd <= bsatd * SATD_THRESH )\
815 816 817 818 819
    { \
        int cost; \
        cache_mv[0] = cache_mv2[0] = mx; \
        cache_mv[1] = cache_mv2[1] = my; \
        cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \
820
        COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
821 822 823
    } \
}

824 825
#define SATD_THRESH 17/16

826 827 828 829 830 831
void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
{
    // don't have to fill the whole mv cache rectangle
    static const int pixel_mv_offs[] = { 0, 4, 4*8, 0 };
    int16_t *cache_mv = h->mb.cache.mv[0][x264_scan8[i8*4]];
    int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel];
832
    const int16_t *p_cost_mvx, *p_cost_mvy;
833 834
    const int bw = x264_pixel_size[m->i_pixel].w>>2;
    const int bh = x264_pixel_size[m->i_pixel].h>>2;
835
    const int i_pixel = m->i_pixel;
836

837
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
838
    int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX;
839
    int bmx = m->mv[0];
840
    int bmy = m->mv[1];
841 842 843 844 845 846 847
    int omx = bmx;
    int omy = bmy;
    int pmx, pmy, i, j;
    unsigned bsatd;
    int satd = 0;
    int dir = -2;
    int satds[8];
848

849
    if( m->i_pixel != PIXEL_16x16 && i8 != 0 )
850
        x264_mb_predict_mv( h, 0, i8*4, bw, m->mvp );
851 852 853 854 855
    pmx = m->mvp[0];
    pmy = m->mvp[1];
    p_cost_mvx = m->p_cost_mv - pmx;
    p_cost_mvy = m->p_cost_mv - pmy;
    COST_MV_SATD( bmx, bmy, bsatd );
856
    COST_MV_RD( bmx, bmy, 0, 0, 0);
857 858

    /* check the predicted mv */
859 860 861 862 863
    if( (bmx != pmx || bmy != pmy)
        && pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0]
        && pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] )
    {
        COST_MV_SATD( pmx, pmy, satd );
864
        COST_MV_RD( pmx, pmy, satd, 0,0 );
865
    }
866

867 868 869 870 871 872 873
    /* subpel hex search, same pattern as ME HEX. */
    dir = -2;
    omx = bmx;
    omy = bmy;
    for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j] );
    for( j=0; j<6; j++ ) COST_MV_RD  ( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1,j );
    if( dir != -2 )
874
    {
875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
        /* half hexagon, not overlapping the previous iteration */
        for( i = 1; i < 10; i++ )
        {
            const int odir = mod6m1[dir+1];
            if( bmy > h->mb.mv_max_spel[1] - 2 ||
                bmy < h->mb.mv_min_spel[1] - 2 )
                break;
            dir = -2;
            omx = bmx;
            omy = bmy;
            for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j] );
            for( j=0; j<3; j++ ) COST_MV_RD  ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1, odir-1+j );
            if( dir == -2 )
                break;
        }
890 891
    }

892 893 894 895 896
    /* square refine, same as pattern as ME HEX. */
    omx = bmx;
    omy = bmy;
    for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy  + square1[i][1], satds[i] );
    for( i=0; i<8; i++ ) COST_MV_RD  ( omx + square1[i][0], omy  + square1[i][1], satds[i], 0,0 );
Loren Merritt's avatar
Loren Merritt committed
897

898
    bmy = x264_clip3( bmy, h->mb.mv_min_spel[1],  h->mb.mv_max_spel[1] );
899 900 901 902
    m->cost = bcost;
    m->mv[0] = bmx;
    m->mv[1] = bmy;
    x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy );
903
    x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - pmx, bmy - pmy );
904 905
}