me.c 31.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3 4 5 6 7
/*****************************************************************************
 * me.c: h264 encoder library (Motion Estimation)
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

#include <stdio.h>
#include <string.h>

28
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
29 30
#include "me.h"

31 32 33 34
/* presets selected from good points on the speed-vs-quality curve of several test videos
 * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
 * where me_* are the number of EPZS iterations run on all candidate block types,
 * and refine_* are run only on the winner. */
Loren Merritt's avatar
Loren Merritt committed
35
static const int subpel_iterations[][4] = 
36 37
   {{1,0,0,0},
    {1,1,0,0},
38
    {0,1,1,0},
39 40
    {0,2,1,0},
    {0,2,1,1},
41
    {0,2,1,2},
42
    {0,0,2,2},
Loren Merritt's avatar
Loren Merritt committed
43
    {0,0,2,2}};
44

45
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );
46

Loren Merritt's avatar
Loren Merritt committed
47 48 49 50 51 52 53 54 55 56 57
#define BITS_MVD( mx, my )\
    (p_cost_mvx[(mx)<<2] + p_cost_mvy[(my)<<2])

#define COST_MV( mx, my )\
{\
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE,\
                   &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] )\
             + BITS_MVD(mx,my);\
    COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my );\
}

58 59 60 61 62 63 64 65 66
#define COST_MV_PRED( mx, my ) \
{ \
    int stride = 16; \
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
    COPY3_IF_LT( bpred_cost, cost, bpred_mx, mx, bpred_my, my ); \
}

67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
#define COST_MV_X3_DIR( m0x, m0y, m1x, m1y, m2x, m2y, costs )\
{\
    uint8_t *pix_base = p_fref + bmx + bmy*m->i_stride[0];\
    h->pixf.sad_x3[i_pixel]( m->p_fenc[0],\
        pix_base + (m0x) + (m0y)*m->i_stride[0],\
        pix_base + (m1x) + (m1y)*m->i_stride[0],\
        pix_base + (m2x) + (m2y)*m->i_stride[0],\
        m->i_stride[0], costs );\
    (costs)[0] += BITS_MVD( bmx+(m0x), bmy+(m0y) );\
    (costs)[1] += BITS_MVD( bmx+(m1x), bmy+(m1y) );\
    (costs)[2] += BITS_MVD( bmx+(m2x), bmy+(m2y) );\
}

#define COST_MV_X4( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
{\
    uint8_t *pix_base = p_fref + omx + omy*m->i_stride[0];\
    h->pixf.sad_x4[i_pixel]( m->p_fenc[0],\
        pix_base + (m0x) + (m0y)*m->i_stride[0],\
        pix_base + (m1x) + (m1y)*m->i_stride[0],\
        pix_base + (m2x) + (m2y)*m->i_stride[0],\
        pix_base + (m3x) + (m3y)*m->i_stride[0],\
        m->i_stride[0], costs );\
    costs[0] += BITS_MVD( omx+(m0x), omy+(m0y) );\
    costs[1] += BITS_MVD( omx+(m1x), omy+(m1y) );\
    costs[2] += BITS_MVD( omx+(m2x), omy+(m2y) );\
    costs[3] += BITS_MVD( omx+(m3x), omy+(m3y) );\
    COPY3_IF_LT( bcost, costs[0], bmx, omx+(m0x), bmy, omy+(m0y) );\
    COPY3_IF_LT( bcost, costs[1], bmx, omx+(m1x), bmy, omy+(m1y) );\
    COPY3_IF_LT( bcost, costs[2], bmx, omx+(m2x), bmy, omy+(m2y) );\
    COPY3_IF_LT( bcost, costs[3], bmx, omx+(m3x), bmy, omy+(m3y) );\
}

Loren Merritt's avatar
Loren Merritt committed
99 100 101 102 103 104 105 106
#define COST_MV_X4_ABS( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
{\
    h->pixf.sad_x4[i_pixel]( m->p_fenc[0],\
        p_fref + (m0x) + (m0y)*m->i_stride[0],\
        p_fref + (m1x) + (m1y)*m->i_stride[0],\
        p_fref + (m2x) + (m2y)*m->i_stride[0],\
        p_fref + (m3x) + (m3y)*m->i_stride[0],\
        m->i_stride[0], costs );\
Loren Merritt's avatar
Loren Merritt committed
107 108 109 110
    costs[0] += p_cost_mvx[m0x<<2]; /* no cost_mvy */\
    costs[1] += p_cost_mvx[m1x<<2];\
    costs[2] += p_cost_mvx[m2x<<2];\
    costs[3] += p_cost_mvx[m3x<<2];\
Loren Merritt's avatar
Loren Merritt committed
111 112 113 114 115 116
    COPY3_IF_LT( bcost, costs[0], bmx, m0x, bmy, m0y );\
    COPY3_IF_LT( bcost, costs[1], bmx, m1x, bmy, m1y );\
    COPY3_IF_LT( bcost, costs[2], bmx, m2x, bmy, m2y );\
    COPY3_IF_LT( bcost, costs[3], bmx, m3x, bmy, m3y );\
}

117 118 119 120 121 122 123 124
/*  1  */
/* 101 */
/*  1  */
#define DIA1_ITER( mx, my )\
{\
    omx = mx; omy = my;\
    COST_MV_X4( 0,-1, 0,1, -1,0, 1,0 );\
}
125

Loren Merritt's avatar
Loren Merritt committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
#define CROSS( start, x_max, y_max )\
{\
    i = start;\
    if( x_max <= X264_MIN(mv_x_max-omx, omx-mv_x_min) )\
        for( ; i < x_max-2; i+=4 )\
            COST_MV_X4( i,0, -i,0, i+2,0, -i-2,0 );\
    for( ; i < x_max; i+=2 )\
    {\
        if( omx+i <= mv_x_max )\
            COST_MV( omx+i, omy );\
        if( omx-i >= mv_x_min )\
            COST_MV( omx-i, omy );\
    }\
    i = start;\
    if( y_max <= X264_MIN(mv_y_max-omy, omy-mv_y_min) )\
        for( ; i < y_max-2; i+=4 )\
            COST_MV_X4( 0,i, 0,-i, 0,i+2, 0,-i-2 );\
    for( ; i < y_max; i+=2 )\
    {\
        if( omy+i <= mv_y_max )\
            COST_MV( omx, omy+i );\
        if( omy-i >= mv_y_min )\
            COST_MV( omx, omy-i );\
    }\
}
151

152
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
Laurent Aimar's avatar
Laurent Aimar committed
153
{
154 155
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
Laurent Aimar's avatar
Laurent Aimar committed
156
    const int i_pixel = m->i_pixel;
157
    int i_me_range = h->param.analyse.i_me_range;
158
    int bmx, bmy, bcost;
159
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
160
    int omx, omy, pmx, pmy;
161
    uint8_t *p_fref = m->p_fref[0];
162 163
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    
164
    int i, j;
165
    int dir;
166
    int costs[6];
Laurent Aimar's avatar
Laurent Aimar committed
167

168 169 170 171
    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];
    int mv_x_max = h->mb.mv_max_fpel[0];
    int mv_y_max = h->mb.mv_max_fpel[1];
Laurent Aimar's avatar
Laurent Aimar committed
172

Loren Merritt's avatar
Loren Merritt committed
173 174
#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )

175 176 177
    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

178 179 180 181
    bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 );
    bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
    pmx = ( bmx + 2 ) >> 2;
    pmy = ( bmy + 2 ) >> 2;
182
    bcost = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
183

184
    /* try extra predictors if provided */
185
    if( h->mb.i_subpel_refine >= 3 )
Laurent Aimar's avatar
Laurent Aimar committed
186
    {
187 188 189 190 191 192 193 194 195 196 197
        COST_MV_PRED( bmx, bmy );
        for( i = 0; i < i_mvc; i++ )
        {
             const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 );
             const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
             if( mx != bpred_mx || my != bpred_my )
                 COST_MV_PRED( mx, my );
        }
        bmx = ( bpred_mx + 2 ) >> 2;
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );
198
    }
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
    else
    {
        /* check the MVP */
        COST_MV( pmx, pmy );
        /* I don't know why this helps */
        bcost -= BITS_MVD(bmx,bmy);
        
        for( i = 0; i < i_mvc; i++ )
        {
             const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max );
             const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max );
             if( mx != bmx || my != bmy )
                 COST_MV( mx, my );
        }
    }
    
215 216
    COST_MV( 0, 0 );

Loren Merritt's avatar
Loren Merritt committed
217 218
    switch( h->mb.i_me_method )
    {
219
    case X264_ME_DIA:
220 221
        /* diamond search, radius 1 */
        for( i = 0; i < i_me_range; i++ )
222
        {
223
            DIA1_ITER( bmx, bmy );
224 225
            if( bmx == omx && bmy == omy )
                break;
Loren Merritt's avatar
Loren Merritt committed
226 227
            if( !CHECK_MVRANGE(bmx, bmy) )
                break;
228 229
        }
        break;
230

231
    case X264_ME_HEX:
232
me_hex2:
233
        /* hexagon search, radius 2 */
234
#if 0
235 236
        for( i = 0; i < i_me_range/2; i++ )
        {
237 238 239 240 241 242 243
            omx = bmx; omy = bmy;
            COST_MV( omx-2, omy   );
            COST_MV( omx-1, omy+2 );
            COST_MV( omx+1, omy+2 );
            COST_MV( omx+2, omy   );
            COST_MV( omx+1, omy-2 );
            COST_MV( omx-1, omy-2 );
244 245
            if( bmx == omx && bmy == omy )
                break;
Loren Merritt's avatar
Loren Merritt committed
246 247
            if( !CHECK_MVRANGE(bmx, bmy) )
                break;
Laurent Aimar's avatar
Laurent Aimar committed
248
        }
249 250
#else
        /* equivalent to the above, but eliminates duplicate candidates */
251 252 253 254 255 256 257 258 259 260 261 262 263
        dir = -2;

        /* hexagon */
        COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
        COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
        COPY2_IF_LT( bcost, costs[0], dir, 0 );
        COPY2_IF_LT( bcost, costs[1], dir, 1 );
        COPY2_IF_LT( bcost, costs[2], dir, 2 );
        COPY2_IF_LT( bcost, costs[3], dir, 3 );
        COPY2_IF_LT( bcost, costs[4], dir, 4 );
        COPY2_IF_LT( bcost, costs[5], dir, 5 );

        if( dir != -2 )
264
        {
265 266 267 268
            static const int hex2[8][2] = {{-1,-2}, {-2,0}, {-1,2}, {1,2}, {2,0}, {1,-2}, {-1,-2}, {-2,0}};
            bmx += hex2[dir+1][0];
            bmy += hex2[dir+1][1];
            /* half hexagon, not overlapping the previous iteration */
Loren Merritt's avatar
Loren Merritt committed
269
            for( i = 1; i < i_me_range/2 && CHECK_MVRANGE(bmx, bmy); i++ )
270 271 272
            {
                static const int mod6[8] = {5,0,1,2,3,4,5,0};
                const int odir = mod6[dir+1];
273 274 275 276 277 278 279 280 281
                COST_MV_X3_DIR( hex2[odir+0][0], hex2[odir+0][1],
                                hex2[odir+1][0], hex2[odir+1][1],
                                hex2[odir+2][0], hex2[odir+2][1],
                                costs );
                dir = -2;
                COPY2_IF_LT( bcost, costs[0], dir, odir-1 );
                COPY2_IF_LT( bcost, costs[1], dir, odir   );
                COPY2_IF_LT( bcost, costs[2], dir, odir+1 );
                if( dir == -2 )
282
                    break;
283 284
                bmx += hex2[dir+1][0];
                bmy += hex2[dir+1][1];
285 286 287
            }
        }
#endif
288
        /* square refine */
289 290 291
        omx = bmx; omy = bmy;
        COST_MV_X4(  0,-1,  0,1, -1,0, 1,0 );
        COST_MV_X4( -1,-1, -1,1, 1,-1, 1,1 );
292
        break;
293 294

    case X264_ME_UMH:
295 296
        {
            /* Uneven-cross Multi-Hexagon-grid Search
297
             * as in JM, except with different early termination */
298

299 300 301 302
            static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };

            int ucost1, ucost2;
            int cross_start = 1;
303

304
            /* refine predictors */
305
            ucost1 = bcost;
306 307 308
            DIA1_ITER( pmx, pmy );
            if( pmx || pmy )
                DIA1_ITER( 0, 0 );
309

310 311
            if(i_pixel == PIXEL_4x4)
                goto me_hex2;
312

313
            ucost2 = bcost;
314 315
            if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) )
                DIA1_ITER( bmx, bmy );
316 317
            if( bcost == ucost2 )
                cross_start = 3;
318
            omx = bmx; omy = bmy;
319 320 321 322

            /* early termination */
#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) )
            if( bcost == ucost2 && SAD_THRESH(2000) )
323
            {
324 325
                COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 );
                COST_MV_X4( 2, 0, -1, 1, 1, 1,  0,2 );
326 327 328 329 330 331
                if( bcost == ucost1 && SAD_THRESH(500) )
                    break;
                if( bcost == ucost2 )
                {
                    int range = (i_me_range>>1) | 1;
                    CROSS( 3, range, range );
332 333
                    COST_MV_X4( -1,-2, 1,-2, -2,-1, 2,-1 );
                    COST_MV_X4( -2, 1, 2, 1, -1, 2, 1, 2 );
334 335 336 337
                    if( bcost == ucost2 )
                        break;
                    cross_start = range + 2;
                }
338
            }
339 340

            /* adaptive search range */
341
            if( i_mvc )
342
            {
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
                /* range multipliers based on casual inspection of some statistics of
                 * average distance between current predictor and final mv found by ESA.
                 * these have not been tuned much by actual encoding. */
                static const int range_mul[4][4] =
                {
                    { 3, 3, 4, 4 },
                    { 3, 4, 4, 4 },
                    { 4, 4, 4, 5 },
                    { 4, 4, 5, 6 },
                };
                int mvd;
                int sad_ctx, mvd_ctx;

                if( i_mvc == 1 )
                {
                    if( i_pixel == PIXEL_16x16 )
                        /* mvc is probably the same as mvp, so the difference isn't meaningful.
                         * but prediction usually isn't too bad, so just use medium range */
                        mvd = 25;
                    else
                        mvd = abs( m->mvp[0] - mvc[0][0] )
                            + abs( m->mvp[1] - mvc[0][1] );
                }
                else
                {
                    /* calculate the degree of agreement between predictors. */
                    /* in 16x16, mvc includes all the neighbors used to make mvp,
                     * so don't count mvp separately. */
                    int i_denom = i_mvc - 1;
                    mvd = 0;
                    if( i_pixel != PIXEL_16x16 )
                    {
                        mvd = abs( m->mvp[0] - mvc[0][0] )
                            + abs( m->mvp[1] - mvc[0][1] );
                        i_denom++;
                    }
                    for( i = 0; i < i_mvc-1; i++ )
                        mvd += abs( mvc[i][0] - mvc[i+1][0] )
                             + abs( mvc[i][1] - mvc[i+1][1] );
                    mvd /= i_denom; //FIXME idiv
                }

                sad_ctx = SAD_THRESH(1000) ? 0
                        : SAD_THRESH(2000) ? 1
                        : SAD_THRESH(4000) ? 2 : 3;
                mvd_ctx = mvd < 10 ? 0
                        : mvd < 20 ? 1
                        : mvd < 40 ? 2 : 3;

                i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4;
393 394
            }

395 396 397 398
            /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy.
             * we are still centered on the same place as the DIA2. is this desirable? */
            CROSS( cross_start, i_me_range, i_me_range/2 );

399 400
            /* 5x5 ESA */
            omx = bmx; omy = bmy;
401 402 403 404 405 406 407
            if( bcost != ucost2 )
                COST_MV_X4(  1, 0,  0, 1, -1, 0,  0,-1 );
            COST_MV_X4(  1, 1, -1, 1, -1,-1,  1,-1 );
            COST_MV_X4(  2,-1,  2, 0,  2, 1,  2, 2 );
            COST_MV_X4(  1, 2,  0, 2, -1, 2, -2, 2 );
            COST_MV_X4( -2, 1, -2, 0, -2,-1, -2,-2 );
            COST_MV_X4( -1,-2,  0,-2,  1,-2,  2,-2 );
408

409 410 411 412
            /* hexagon grid */
            omx = bmx; omy = bmy;
            for( i = 1; i <= i_me_range/4; i++ )
            {
413 414 415 416 417 418 419
                static const int hex4[16][2] = {
                    {-4, 2}, {-4, 1}, {-4, 0}, {-4,-1}, {-4,-2},
                    { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
                    { 2, 3}, { 0, 4}, {-2, 3},
                    {-2,-3}, { 0,-4}, { 2,-3},
                };

420 421
                if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min,
                                     mv_y_max-omy, omy-mv_y_min ) )
422
                {
423 424 425 426
                    for( j = 0; j < 16; j++ )
                    {
                        int mx = omx + hex4[j][0]*i;
                        int my = omy + hex4[j][1]*i;
Loren Merritt's avatar
Loren Merritt committed
427
                        if( CHECK_MVRANGE(mx, my) )
428
                            COST_MV( mx, my );
429 430 431 432
                    }
                }
                else
                {
433 434 435 436
                    COST_MV_X4( -4*i, 2*i, -4*i, 1*i, -4*i, 0*i, -4*i,-1*i );
                    COST_MV_X4( -4*i,-2*i,  4*i,-2*i,  4*i,-1*i,  4*i, 0*i );
                    COST_MV_X4(  4*i, 1*i,  4*i, 2*i,  2*i, 3*i,  0*i, 4*i );
                    COST_MV_X4( -2*i, 3*i, -2*i,-3*i,  0*i,-4*i,  2*i,-3*i );
437 438 439
                }
            }
            goto me_hex2;
440 441
        }

442
    case X264_ME_ESA:
443
        {
444 445 446 447
            const int min_x = X264_MAX( bmx - i_me_range, mv_x_min);
            const int min_y = X264_MAX( bmy - i_me_range, mv_y_min);
            const int max_x = X264_MIN( bmx + i_me_range, mv_x_max);
            const int max_y = X264_MIN( bmy + i_me_range, mv_y_max);
448 449 450 451 452 453 454 455 456 457 458
            int mx, my;
#if 0
            /* plain old exhaustive search */
            for( my = min_y; my <= max_y; my++ )
                for( mx = min_x; mx <= max_x; mx++ )
                    COST_MV( mx, my );
#else
            /* successive elimination by comparing DC before a full SAD,
             * because sum(abs(diff)) >= abs(diff(sum)). */
            const int stride = m->i_stride[0];
            static uint8_t zero[16*16] = {0,};
Loren Merritt's avatar
Loren Merritt committed
459
            uint16_t *sums_base = m->integral;
Loren Merritt's avatar
Loren Merritt committed
460 461
            int enc_dc[4];
            int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
Loren Merritt's avatar
Loren Merritt committed
462
            int delta = x264_pixel_size[sad_size].w;
463
            uint16_t *ads = x264_malloc((max_x-min_x+8) * sizeof(uint16_t));
Loren Merritt's avatar
Loren Merritt committed
464 465 466

            h->pixf.sad_x4[sad_size]( zero, m->p_fenc[0], m->p_fenc[0]+delta,
                m->p_fenc[0]+delta*FENC_STRIDE, m->p_fenc[0]+delta+delta*FENC_STRIDE,
Loren Merritt's avatar
Loren Merritt committed
467
                FENC_STRIDE, enc_dc );
Loren Merritt's avatar
Loren Merritt committed
468 469 470 471 472 473
            if( delta == 4 )
                sums_base += stride * (h->fenc->i_lines[0] + 64);
            if( i_pixel == PIXEL_16x16 || i_pixel == PIXEL_8x16 || i_pixel == PIXEL_4x8 )
                delta *= stride;
            if( i_pixel == PIXEL_8x16 || i_pixel == PIXEL_4x8 )
                enc_dc[1] = enc_dc[2];
474

Loren Merritt's avatar
Loren Merritt committed
475
            for( my = min_y; my <= max_y; my++ )
Loren Merritt's avatar
Loren Merritt committed
476
            {
Loren Merritt's avatar
Loren Merritt committed
477 478 479 480 481
                int mvs[3], i_mvs=0;
                bcost -= p_cost_mvy[my<<2];
                h->pixf.ads[i_pixel]( enc_dc, sums_base + min_x + my * stride, delta,
                                      ads, max_x-min_x+1 );
                for( mx = min_x; mx <= max_x; mx++ )
Loren Merritt's avatar
Loren Merritt committed
482
                {
Loren Merritt's avatar
Loren Merritt committed
483 484 485 486 487 488 489 490 491 492
                    if( ads[mx-min_x] < bcost - p_cost_mvx[mx<<2] )
                    {
                        if( i_mvs == 3 )
                        {
                            COST_MV_X4_ABS( mvs[0],my, mvs[1],my, mvs[2],my, mx,my );
                            i_mvs = 0;
                        }
                        else
                            mvs[i_mvs++] = mx;
                    }
Loren Merritt's avatar
Loren Merritt committed
493
                }
Loren Merritt's avatar
Loren Merritt committed
494 495 496
                bcost += p_cost_mvy[my<<2];
                for( i=0; i<i_mvs; i++ )
                    COST_MV( mvs[i], my );
497
            }
498 499

            x264_free(ads);
500
#endif
Laurent Aimar's avatar
Laurent Aimar committed
501
        }
502
        break;
Laurent Aimar's avatar
Laurent Aimar committed
503 504 505
    }

    /* -> qpel mv */
506 507 508 509 510 511 512 513 514 515 516 517
    if( bpred_cost < bcost )
    {
        m->mv[0] = bpred_mx;
        m->mv[1] = bpred_my;
        m->cost = bpred_cost;
    }
    else
    {
        m->mv[0] = bmx << 2;
        m->mv[1] = bmy << 2;
        m->cost = bcost;
    }
Laurent Aimar's avatar
Laurent Aimar committed
518 519

    /* compute the real cost */
520
    m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
521
    if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 )
522
        m->cost += m->cost_mv;
Loren Merritt's avatar
Loren Merritt committed
523

524
    /* subpel refine */
525
    if( h->mb.i_subpel_refine >= 2 )
526
    {
527 528 529
        int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
        int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
        refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
530
    }
Loren Merritt's avatar
Loren Merritt committed
531 532
    else if( m->mv[1] > h->mb.mv_max_spel[1] )
        m->mv[1] = h->mb.mv_max_spel[1];
Laurent Aimar's avatar
Laurent Aimar committed
533
}
534
#undef COST_MV
Laurent Aimar's avatar
Laurent Aimar committed
535 536

void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
537
{
538 539
    int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
    int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
Loren Merritt's avatar
Loren Merritt committed
540 541 542 543

    if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P )
        m->cost -= m->i_ref_cost;
	
544
    refine_subpel( h, m, hpel, qpel, NULL, 1 );
545 546
}

Loren Merritt's avatar
Loren Merritt committed
547
#define COST_MV_SAD( mx, my ) \
548 549
{ \
    int stride = 16; \
Loren Merritt's avatar
Loren Merritt committed
550
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
551
    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
552
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
Loren Merritt's avatar
Loren Merritt committed
553
    COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
554 555
}

556 557
#define COST_MV_SATD( mx, my, dir ) \
if( b_refine_qpel || (dir^1) != odir ) \
Loren Merritt's avatar
Loren Merritt committed
558 559
{ \
    int stride = 16; \
Loren Merritt's avatar
Loren Merritt committed
560
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
561
    int cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
Loren Merritt's avatar
Loren Merritt committed
562 563 564
             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
    if( b_chroma_me && cost < bcost ) \
    { \
Loren Merritt's avatar
Loren Merritt committed
565 566
        h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
        cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix[0], 8 ); \
Loren Merritt's avatar
Loren Merritt committed
567 568
        if( cost < bcost ) \
        { \
Loren Merritt's avatar
Loren Merritt committed
569 570
            h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
            cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \
Loren Merritt's avatar
Loren Merritt committed
571 572 573 574 575
        } \
    } \
    if( cost < bcost ) \
    {                  \
        bcost = cost;  \
576 577
        bmx = mx;      \
        bmy = my;      \
578
        bdir = dir;    \
Loren Merritt's avatar
Loren Merritt committed
579 580 581
    } \
}

582
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
Laurent Aimar's avatar
Laurent Aimar committed
583 584 585
{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
586 587
    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
Loren Merritt's avatar
Loren Merritt committed
588 589
    const int i_pixel = m->i_pixel;
    const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
Laurent Aimar's avatar
Laurent Aimar committed
590

591
    DECLARE_ALIGNED( uint8_t, pix[2][32*18], 16 ); // really 17x17, but round up for alignment
592 593
    int omx, omy;
    int i;
Laurent Aimar's avatar
Laurent Aimar committed
594 595 596

    int bmx = m->mv[0];
    int bmy = m->mv[1];
597
    int bcost = m->cost;
598
    int odir = -1, bdir;
599

600

Loren Merritt's avatar
Loren Merritt committed
601
    /* try the subpel component of the predicted mv */
602
    if( hpel_iters && h->mb.i_subpel_refine < 3 )
603
    {
604 605
        int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
        int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
606
        if( mx != bmx || my != bmy )
Loren Merritt's avatar
Loren Merritt committed
607
            COST_MV_SAD( mx, my );
608
    }
Loren Merritt's avatar
Loren Merritt committed
609 610

    /* halfpel diamond search */
611 612
    for( i = hpel_iters; i > 0; i-- )
    {
Loren Merritt's avatar
Loren Merritt committed
613 614
        int omx = bmx, omy = bmy;
        int costs[4];
615
        int stride = 32; // candidates are either all hpel or all qpel, so one stride is enough
Loren Merritt's avatar
Loren Merritt committed
616
        uint8_t *src0, *src1, *src2, *src3;
617 618 619 620
        src0 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, omx, omy-2, bw, bh+1 );
        src2 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[1], &stride, omx-2, omy, bw+4, bh );
        src1 = src0 + stride;
        src3 = src2 + 1;
Loren Merritt's avatar
Loren Merritt committed
621 622 623 624 625
        h->pixf.sad_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs );
        COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx  ] + p_cost_mvy[omy-2], bmy, omy-2 );
        COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx  ] + p_cost_mvy[omy+2], bmy, omy+2 );
        COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy  ], bmx, omx-2, bmy, omy );
        COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy  ], bmx, omx+2, bmy, omy );
626 627 628
        if( bmx == omx && bmy == omy )
            break;
    }
Loren Merritt's avatar
Loren Merritt committed
629

630
    if( !b_refine_qpel )
Laurent Aimar's avatar
Laurent Aimar committed
631
    {
Loren Merritt's avatar
Loren Merritt committed
632 633 634
        /* check for mvrange */
        if( bmy > h->mb.mv_max_spel[1] )
            bmy = h->mb.mv_max_spel[1];
635
        bcost = COST_MAX;
636
        COST_MV_SATD( bmx, bmy, -1 );
637
    }
Loren Merritt's avatar
Loren Merritt committed
638

639 640 641 642
    /* early termination when examining multiple reference frames */
    if( p_halfpel_thresh )
    {
        if( (bcost*7)>>3 > *p_halfpel_thresh )
643
        {
644 645 646 647 648 649 650 651 652 653
            m->cost = bcost;
            m->mv[0] = bmx;
            m->mv[1] = bmy;
            // don't need cost_mv
            return;
        }
        else if( bcost < *p_halfpel_thresh )
            *p_halfpel_thresh = bcost;
    }

Loren Merritt's avatar
Loren Merritt committed
654
    /* quarterpel diamond search */
655
    bdir = -1;
656 657
    for( i = qpel_iters; i > 0; i-- )
    {
658
        odir = bdir;
659 660
        omx = bmx;
        omy = bmy;
661 662 663 664
        COST_MV_SATD( omx, omy - 1, 0 );
        COST_MV_SATD( omx, omy + 1, 1 );
        COST_MV_SATD( omx - 1, omy, 2 );
        COST_MV_SATD( omx + 1, omy, 3 );
665 666
        if( bmx == omx && bmy == omy )
            break;
Laurent Aimar's avatar
Laurent Aimar committed
667 668
    }

Loren Merritt's avatar
Loren Merritt committed
669 670 671 672 673 674 675 676
    /* check for mvrange */
    if( bmy > h->mb.mv_max_spel[1] )
    {
        bmy = h->mb.mv_max_spel[1];
        bcost = COST_MAX;
        COST_MV_SATD( bmx, bmy, -1 );
    }

677
    m->cost = bcost;
Laurent Aimar's avatar
Laurent Aimar committed
678 679
    m->mv[0] = bmx;
    m->mv[1] = bmy;
680
    m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];
Laurent Aimar's avatar
Laurent Aimar committed
681
}
682

683 684 685 686 687 688 689 690
#define BIME_CACHE( dx, dy ) \
{ \
    int i = 4 + 3*dx + dy; \
    h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \
    h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \
}

#define BIME_CACHE2(a,b) \
Loren Merritt's avatar
Loren Merritt committed
691 692
    BIME_CACHE(a,b) \
    BIME_CACHE(-(a),-(b))
693 694 695 696 697 698 699 700 701 702 703 704 705

#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \
if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \
{ \
    int cost; \
    int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \
    int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \
    visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \
    memcpy( pix, pix0[i0], bs ); \
    if( i_weight == 32 ) \
        h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \
    else \
        h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \
706
    cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], FENC_STRIDE, pix, bw ) \
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
         + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \
         + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \
    if( cost < bcost ) \
    {                  \
        bcost = cost;  \
        bm0x = m0x;    \
        bm0y = m0y;    \
        bm1x = m1x;    \
        bm1y = m1y;    \
    } \
}

#define CHECK_BIDIR(a,b,c,d) \
    COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)

#define CHECK_BIDIR2(a,b,c,d) \
Loren Merritt's avatar
Loren Merritt committed
723 724
    CHECK_BIDIR(a,b,c,d) \
    CHECK_BIDIR(-(a),-(b),-(c),-(d))
725 726 727 728 729 730 731 732 733 734 735 736 737

#define CHECK_BIDIR8(a,b,c,d) \
    CHECK_BIDIR2(a,b,c,d) \
    CHECK_BIDIR2(b,c,d,a) \
    CHECK_BIDIR2(c,d,a,b) \
    CHECK_BIDIR2(d,a,b,c)

int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight )
{
    const int i_pixel = m0->i_pixel;
    const int bw = x264_pixel_size[i_pixel].w;
    const int bh = x264_pixel_size[i_pixel].h;
    const int bs = bw*bh;
738 739 740 741
    const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
    const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
    DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
    DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
    int bm0x = m0->mv[0], om0x = bm0x;
    int bm0y = m0->mv[1], om0y = bm0y;
    int bm1x = m1->mv[0], om1x = bm1x;
    int bm1y = m1->mv[1], om1y = bm1y;
    int bcost = COST_MAX;
    int pass = 0;
    uint8_t visited[8][8][8][8];
    memset( visited, 0, sizeof(visited) );

    BIME_CACHE( 0, 0 );
    CHECK_BIDIR( 0, 0, 0, 0 );

Loren Merritt's avatar
Loren Merritt committed
757 758 759 760
    if( bm0y > h->mb.mv_max_spel[1] - 8 ||
        bm1y > h->mb.mv_max_spel[1] - 8 )
        return bcost;

761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
    for( pass = 0; pass < 8; pass++ )
    {
        /* check all mv pairs that differ in at most 2 components from the current mvs. */
        /* doesn't do chroma ME. this probably doesn't matter, as the gains
         * from bidir ME are the same with and without chroma ME. */

        BIME_CACHE2( 1, 0 );
        BIME_CACHE2( 0, 1 );
        BIME_CACHE2( 1, 1 );
        BIME_CACHE2( 1,-1 );

        CHECK_BIDIR8( 0, 0, 0, 1 );
        CHECK_BIDIR8( 0, 0, 1, 1 );
        CHECK_BIDIR2( 0, 1, 0, 1 );
        CHECK_BIDIR2( 1, 0, 1, 0 );
        CHECK_BIDIR8( 0, 0,-1, 1 );
        CHECK_BIDIR2( 0,-1, 0, 1 );
        CHECK_BIDIR2(-1, 0, 1, 0 );

        if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y )
            break;

        om0x = bm0x;
        om0y = bm0y;
        om1x = bm1x;
        om1y = bm1y;
        BIME_CACHE( 0, 0 );
    }

    m0->mv[0] = bm0x;
    m0->mv[1] = bm0y;
    m1->mv[0] = bm1x;
    m1->mv[1] = bm1y;
    return bcost;
}
796

797 798
#undef COST_MV_SATD
#define COST_MV_SATD( mx, my, dst ) \
799
{ \
800 801 802 803 804 805 806 807 808 809 810 811
    int stride = 16; \
    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw*4, bh*4 ); \
    dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
        + p_cost_mvx[mx] + p_cost_mvy[my]; \
    COPY1_IF_LT( bsatd, dst ); \
}

#define COST_MV_RD( mx, my, satd, dir ) \
{ \
    if( satd <= bsatd * SATD_THRESH \
        && (dir^1) != odir \
        && (dir<0 || !p_visited[(mx)+(my)*16]) ) \
812 813 814 815 816
    { \
        int cost; \
        cache_mv[0] = cache_mv2[0] = mx; \
        cache_mv[1] = cache_mv2[1] = my; \
        cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \
817
        COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
818 819 820 821
        if(dir>=0) p_visited[(mx)+(my)*16] = 1; \
    } \
}

822 823
#define SATD_THRESH 17/16

824 825 826 827 828 829
void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
{
    // don't have to fill the whole mv cache rectangle
    static const int pixel_mv_offs[] = { 0, 4, 4*8, 0 };
    int16_t *cache_mv = h->mb.cache.mv[0][x264_scan8[i8*4]];
    int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel];
830
    const int16_t *p_cost_mvx, *p_cost_mvy;
831 832
    const int bw = x264_pixel_size[m->i_pixel].w>>2;
    const int bh = x264_pixel_size[m->i_pixel].h>>2;
833
    const int i_pixel = m->i_pixel;
834

835
    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
836
    int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX;
837
    int bmx = m->mv[0];
838
    int bmy = m->mv[1];
839
    int pmx, pmy, omx, omy, i;
840
    int odir = -1, bdir;
841
    unsigned bsatd, satds[4];
842 843 844 845

    int visited[16*13] = {0}; // only need 13x13, but 16 is more convenient
    int *p_visited = &visited[6+6*16];

846
    if( m->i_pixel != PIXEL_16x16 && i8 != 0 )
847
        x264_mb_predict_mv( h, 0, i8*4, bw, m->mvp );
848 849 850 851 852 853 854
    pmx = m->mvp[0];
    pmy = m->mvp[1];
    p_cost_mvx = m->p_cost_mv - pmx;
    p_cost_mvy = m->p_cost_mv - pmy;
    COST_MV_SATD( bmx, bmy, bsatd );
    if( m->i_pixel != PIXEL_16x16 )
        COST_MV_RD( bmx, bmy, 0, -1 );
855 856

    /* check the predicted mv */
857 858 859 860 861 862 863 864
    if( (bmx != pmx || bmy != pmy)
        && pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0]
        && pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] )
    {
        int satd;
        COST_MV_SATD( pmx, pmy, satd );
        COST_MV_RD( pmx, pmy, satd, -1 );
    }
865 866 867 868 869

    /* mark mv and mvp as visited */
    p_visited[0] = 1;
    p_visited -= bmx + bmy*16;
    {
870 871
        int mx = bmx ^ m->mv[0] ^ pmx;
        int my = bmy ^ m->mv[1] ^ pmy;
872 873 874 875
        if( abs(mx-bmx) < 7 && abs(my-bmy) < 7 )
            p_visited[mx + my*16] = 1;
    }

876
    /* hpel diamond */
877 878 879
    bdir = -1;
    for( i = 0; i < 2; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
880 881
         if( bmy > h->mb.mv_max_spel[1] - 2 )
             break;
882 883 884
         omx = bmx;
         omy = bmy;
         odir = bdir;
885 886 887 888 889 890 891 892
         COST_MV_SATD( omx, omy - 2, satds[0] );
         COST_MV_SATD( omx, omy + 2, satds[1] );
         COST_MV_SATD( omx - 2, omy, satds[2] );
         COST_MV_SATD( omx + 2, omy, satds[3] );
         COST_MV_RD( omx, omy - 2, satds[0], 0 );
         COST_MV_RD( omx, omy + 2, satds[1], 1 );
         COST_MV_RD( omx - 2, omy, satds[2], 2 );
         COST_MV_RD( omx + 2, omy, satds[3], 3 );
893 894 895
         if( bmx == omx && bmy == omy )
            break;
    }
896 897

    /* qpel diamond */
898 899 900
    bdir = -1;
    for( i = 0; i < 2; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
901 902
         if( bmy > h->mb.mv_max_spel[1] - 1 )
             break;
903 904 905
         omx = bmx;
         omy = bmy;
         odir = bdir;
906 907 908 909 910 911 912 913
         COST_MV_SATD( omx, omy - 1, satds[0] );
         COST_MV_SATD( omx, omy + 1, satds[1] );
         COST_MV_SATD( omx - 1, omy, satds[2] );
         COST_MV_SATD( omx + 1, omy, satds[3] );
         COST_MV_RD( omx, omy - 1, satds[0], 0 );
         COST_MV_RD( omx, omy + 1, satds[1], 1 );
         COST_MV_RD( omx - 1, omy, satds[2], 2 );
         COST_MV_RD( omx + 1, omy, satds[3], 3 );
914 915 916 917
         if( bmx == omx && bmy == omy )
            break;
    }

Loren Merritt's avatar
Loren Merritt committed
918 919 920
    if( bmy > h->mb.mv_max_spel[1] )
        bmy = h->mb.mv_max_spel[1];

921 922 923 924 925
    m->cost = bcost;
    m->mv[0] = bmx;
    m->mv[1] = bmy;

    x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy );
926
    x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - pmx, bmy - pmy );
927 928
}