mc.c 25.3 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * mc.c: motion compensation
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2014 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
22 23 24
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
25 26
 *****************************************************************************/

Loren Merritt's avatar
Loren Merritt committed
27
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
28

Steven Walters's avatar
Steven Walters committed
29
#if HAVE_MMX
30
#include "x86/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
#endif
Steven Walters's avatar
Steven Walters committed
32
#if ARCH_PPC
33
#include "ppc/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
34
#endif
Steven Walters's avatar
Steven Walters committed
35
#if ARCH_ARM
36 37
#include "arm/mc.h"
#endif
Laurent Aimar's avatar
Laurent Aimar committed
38 39


40 41 42
static inline void pixel_avg( pixel *dst,  intptr_t i_dst_stride,
                              pixel *src1, intptr_t i_src1_stride,
                              pixel *src2, intptr_t i_src2_stride, int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
43
{
44
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
45
    {
46
        for( int x = 0; x < i_width; x++ )
Laurent Aimar's avatar
Laurent Aimar committed
47 48 49 50 51 52 53
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
        dst  += i_dst_stride;
        src1 += i_src1_stride;
        src2 += i_src2_stride;
    }
}

54 55 56
static inline void pixel_avg_wxh( pixel *dst,  intptr_t i_dst,
                                  pixel *src1, intptr_t i_src1,
                                  pixel *src2, intptr_t i_src2, int width, int height )
57
{
58
    for( int y = 0; y < height; y++ )
59
    {
60
        for( int x = 0; x < width; x++ )
61 62 63
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
        src1 += i_src1;
        src2 += i_src2;
64 65 66 67 68 69
        dst += i_dst;
    }
}

/* Implicit weighted bipred only:
 * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
70 71 72
static inline void pixel_avg_weight_wxh( pixel *dst,  intptr_t i_dst,
                                         pixel *src1, intptr_t i_src1,
                                         pixel *src2, intptr_t i_src2, int width, int height, int i_weight1 )
73
{
74
    int i_weight2 = 64 - i_weight1;
75
    for( int y = 0; y<height; y++, dst += i_dst, src1 += i_src1, src2 += i_src2 )
76 77
        for( int x = 0; x<width; x++ )
            dst[x] = x264_clip_pixel( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 );
78
}
Fiona Glaser's avatar
Fiona Glaser committed
79
#undef op_scale2
80

Fiona Glaser's avatar
Fiona Glaser committed
81
#define PIXEL_AVG_C( name, width, height ) \
82 83 84
static void name( pixel *pix1, intptr_t i_stride_pix1, \
                  pixel *pix2, intptr_t i_stride_pix2, \
                  pixel *pix3, intptr_t i_stride_pix3, int weight ) \
85
{ \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
86
    if( weight == 32 ) \
Fiona Glaser's avatar
Fiona Glaser committed
87
        pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
88
    else \
Fiona Glaser's avatar
Fiona Glaser committed
89
        pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
90
}
Fiona Glaser's avatar
Fiona Glaser committed
91 92 93 94 95
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
PIXEL_AVG_C( pixel_avg_16x8,  16, 8 )
PIXEL_AVG_C( pixel_avg_8x16,  8, 16 )
PIXEL_AVG_C( pixel_avg_8x8,   8, 8 )
PIXEL_AVG_C( pixel_avg_8x4,   8, 4 )
Henrik Gramner's avatar
Henrik Gramner committed
96
PIXEL_AVG_C( pixel_avg_4x16,  4, 16 )
Fiona Glaser's avatar
Fiona Glaser committed
97 98 99
PIXEL_AVG_C( pixel_avg_4x8,   4, 8 )
PIXEL_AVG_C( pixel_avg_4x4,   4, 4 )
PIXEL_AVG_C( pixel_avg_4x2,   4, 2 )
Henrik Gramner's avatar
Henrik Gramner committed
100
PIXEL_AVG_C( pixel_avg_2x8,   2, 8 )
Fiona Glaser's avatar
Fiona Glaser committed
101 102
PIXEL_AVG_C( pixel_avg_2x4,   2, 4 )
PIXEL_AVG_C( pixel_avg_2x2,   2, 2 )
103

Dylan Yudaken's avatar
Dylan Yudaken committed
104 105 106 107
static void x264_weight_cache( x264_t *h, x264_weight_t *w )
{
    w->weightfn = h->mc.weight;
}
108 109
#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
110 111
static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                       const x264_weight_t *weight, int i_width, int i_height )
Dylan Yudaken's avatar
Dylan Yudaken committed
112
{
113 114 115 116
    int offset = weight->i_offset << (BIT_DEPTH-8);
    int scale = weight->i_scale;
    int denom = weight->i_denom;
    if( denom >= 1 )
Dylan Yudaken's avatar
Dylan Yudaken committed
117
    {
118 119
        for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
            for( int x = 0; x < i_width; x++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
120 121 122 123
                opscale( x );
    }
    else
    {
124 125
        for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
            for( int x = 0; x < i_width; x++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
126 127 128 129
                opscale_noden( x );
    }
}

130
#define MC_WEIGHT_C( name, width ) \
131
    static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \
Dylan Yudaken's avatar
Dylan Yudaken committed
132
{ \
133
    mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
Dylan Yudaken's avatar
Dylan Yudaken committed
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

MC_WEIGHT_C( mc_weight_w20, 20 )
MC_WEIGHT_C( mc_weight_w16, 16 )
MC_WEIGHT_C( mc_weight_w12, 12 )
MC_WEIGHT_C( mc_weight_w8,   8 )
MC_WEIGHT_C( mc_weight_w4,   4 )
MC_WEIGHT_C( mc_weight_w2,   2 )

static weight_fn_t x264_mc_weight_wtab[6] =
{
    mc_weight_w2,
    mc_weight_w4,
    mc_weight_w8,
    mc_weight_w12,
    mc_weight_w16,
    mc_weight_w20,
};
Anton Mitrofanov's avatar
Anton Mitrofanov committed
152
const x264_weight_t x264_weight_none[3] = { {{0}} };
153
static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
154
{
155
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
156
    {
157
        memcpy( dst, src, i_width * sizeof(pixel) );
Laurent Aimar's avatar
Laurent Aimar committed
158 159 160 161 162 163

        src += i_src_stride;
        dst += i_dst_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
164
#define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
165
static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
166
                         intptr_t stride, int width, int height, int16_t *buf )
Laurent Aimar's avatar
Laurent Aimar committed
167
{
168
    const int pad = (BIT_DEPTH > 9) ? (-10 * PIXEL_MAX) : 0;
169
    for( int y = 0; y < height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
170
    {
171
        for( int x = -2; x < width+3; x++ )
Laurent Aimar's avatar
Laurent Aimar committed
172
        {
Loren Merritt's avatar
Loren Merritt committed
173
            int v = TAPFILTER(src,stride);
174
            dstv[x] = x264_clip_pixel( (v + 16) >> 5 );
175 176
            /* transform v for storage in a 16-bit integer */
            buf[x+2] = v + pad;
Laurent Aimar's avatar
Laurent Aimar committed
177
        }
178
        for( int x = 0; x < width; x++ )
179
            dstc[x] = x264_clip_pixel( (TAPFILTER(buf+2,1) - 32*pad + 512) >> 10 );
180
        for( int x = 0; x < width; x++ )
181
            dsth[x] = x264_clip_pixel( (TAPFILTER(src,1) + 16) >> 5 );
Loren Merritt's avatar
Loren Merritt committed
182 183 184 185
        dsth += stride;
        dstv += stride;
        dstc += stride;
        src += stride;
Laurent Aimar's avatar
Laurent Aimar committed
186
    }
187 188
}

189 190
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
Loren Merritt's avatar
Loren Merritt committed
191

192 193
static void mc_luma( pixel *dst,    intptr_t i_dst_stride,
                     pixel *src[4], intptr_t i_src_stride,
Loren Merritt's avatar
Loren Merritt committed
194
                     int mvx, int mvy,
Dylan Yudaken's avatar
Dylan Yudaken committed
195
                     int i_width, int i_height, const x264_weight_t *weight )
196
{
Loren Merritt's avatar
Loren Merritt committed
197 198
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
199
    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
200

Loren Merritt's avatar
Loren Merritt committed
201
    if( qpel_idx & 5 ) /* qpel interpolation needed */
202
    {
203
        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
204 205
        pixel_avg( dst, i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
206 207
        if( weight->weightfn )
            mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
208
    }
Dylan Yudaken's avatar
Dylan Yudaken committed
209 210
    else if( weight->weightfn )
        mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
211 212 213 214
    else
        mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
}

215 216
static pixel *get_ref( pixel *dst,   intptr_t *i_dst_stride,
                       pixel *src[4], intptr_t i_src_stride,
217 218
                       int mvx, int mvy,
                       int i_width, int i_height, const x264_weight_t *weight )
219
{
Loren Merritt's avatar
Loren Merritt committed
220 221
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
222
    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
223

Loren Merritt's avatar
Loren Merritt committed
224
    if( qpel_idx & 5 ) /* qpel interpolation needed */
225
    {
226
        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
227 228
        pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
229 230 231 232 233 234 235
        if( weight->weightfn )
            mc_weight( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_width, i_height );
        return dst;
    }
    else if( weight->weightfn )
    {
        mc_weight( dst, *i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
236 237 238 239 240 241 242 243 244
        return dst;
    }
    else
    {
        *i_dst_stride = i_src_stride;
        return src1;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
245
/* full chroma mc (ie until 1/8 pixel)*/
246 247
static void mc_chroma( pixel *dstu, pixel *dstv, intptr_t i_dst_stride,
                       pixel *src, intptr_t i_src_stride,
248 249
                       int mvx, int mvy,
                       int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
250
{
251
    pixel *srcp;
Laurent Aimar's avatar
Laurent Aimar committed
252

253 254 255 256 257 258
    int d8x = mvx&0x07;
    int d8y = mvy&0x07;
    int cA = (8-d8x)*(8-d8y);
    int cB = d8x    *(8-d8y);
    int cC = (8-d8x)*d8y;
    int cD = d8x    *d8y;
Laurent Aimar's avatar
Laurent Aimar committed
259

260
    src += (mvy >> 3) * i_src_stride + (mvx >> 3)*2;
Laurent Aimar's avatar
Laurent Aimar committed
261 262
    srcp = &src[i_src_stride];

263
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
264
    {
265
        for( int x = 0; x < i_width; x++ )
266 267 268 269 270 271 272 273
        {
            dstu[x] = ( cA*src[2*x]  + cB*src[2*x+2] +
                        cC*srcp[2*x] + cD*srcp[2*x+2] + 32 ) >> 6;
            dstv[x] = ( cA*src[2*x+1]  + cB*src[2*x+3] +
                        cC*srcp[2*x+1] + cD*srcp[2*x+3] + 32 ) >> 6;
        }
        dstu += i_dst_stride;
        dstv += i_dst_stride;
Laurent Aimar's avatar
Laurent Aimar committed
274 275 276 277 278
        src   = srcp;
        srcp += i_src_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
279
#define MC_COPY(W) \
280
static void mc_copy_w##W( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int i_height ) \
Loren Merritt's avatar
Loren Merritt committed
281 282 283 284 285 286 287
{ \
    mc_copy( src, i_src, dst, i_dst, W, i_height ); \
}
MC_COPY( 16 )
MC_COPY( 8 )
MC_COPY( 4 )

288 289
void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
                        pixel *src, intptr_t i_src, int w, int h )
290 291 292
{
    while( h-- )
    {
293
        memcpy( dst, src, w * sizeof(pixel) );
294 295 296 297 298
        dst += i_dst;
        src += i_src;
    }
}

299 300 301
void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
                                   pixel *srcu, intptr_t i_srcu,
                                   pixel *srcv, intptr_t i_srcv, int w, int h )
302 303 304 305
{
    for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
        for( int x=0; x<w; x++ )
        {
306 307
            dst[2*x]   = srcu[x];
            dst[2*x+1] = srcv[x];
308 309 310
        }
}

311 312 313
static void x264_plane_copy_deinterleave_c( pixel *dstu, intptr_t i_dstu,
                                            pixel *dstv, intptr_t i_dstv,
                                            pixel *src,  intptr_t i_src, int w, int h )
314 315 316 317 318 319 320 321 322
{
    for( int y=0; y<h; y++, dstu+=i_dstu, dstv+=i_dstv, src+=i_src )
        for( int x=0; x<w; x++ )
        {
            dstu[x] = src[2*x];
            dstv[x] = src[2*x+1];
        }
}

323 324 325 326
static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
                                                pixel *dstb, intptr_t i_dstb,
                                                pixel *dstc, intptr_t i_dstc,
                                                pixel *src,  intptr_t i_src, int pw, int w, int h )
xvidfan's avatar
xvidfan committed
327 328 329 330 331 332 333 334 335 336 337 338
{
    for( int y=0; y<h; y++, dsta+=i_dsta, dstb+=i_dstb, dstc+=i_dstc, src+=i_src )
    {
        for( int x=0; x<w; x++ )
        {
            dsta[x] = src[x*pw];
            dstb[x] = src[x*pw+1];
            dstc[x] = src[x*pw+2];
        }
    }
}

James Weaver's avatar
James Weaver committed
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
                                          pixel *dstc, intptr_t i_dstc,
                                          uint32_t *src, intptr_t i_src, int w, int h )
{
    for( int l = 0; l < h; l++ )
    {
        pixel *dsty0 = dsty;
        pixel *dstc0 = dstc;
        uint32_t *src0 = src;

        for( int n = 0; n < w; n += 3 )
        {
            *(dstc0++) = *src0 & 0x03FF;
            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
            src0++;
            *(dsty0++) = *src0 & 0x03FF;
            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
            src0++;
        }

        dsty += i_dsty;
        dstc += i_dstc;
        src  += i_src;
    }
}

367
static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
368
{
Henrik Gramner's avatar
Henrik Gramner committed
369
    for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
370 371 372 373 374 375 376
        for( int x=0; x<8; x++ )
        {
            dst[2*x]   = srcu[x];
            dst[2*x+1] = srcv[x];
        }
}

377
static void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, intptr_t i_src, int height )
378
{
Henrik Gramner's avatar
Henrik Gramner committed
379
    x264_plane_copy_deinterleave_c( dst, FENC_STRIDE, dst+FENC_STRIDE/2, FENC_STRIDE, src, i_src, 8, height );
380 381
}

382
static void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, intptr_t i_src, int height )
383
{
Henrik Gramner's avatar
Henrik Gramner committed
384
    x264_plane_copy_deinterleave_c( dst, FDEC_STRIDE, dst+FDEC_STRIDE/2, FDEC_STRIDE, src, i_src, 8, height );
385 386
}

387 388
static void prefetch_fenc_null( pixel *pix_y,  intptr_t stride_y,
                                pixel *pix_uv, intptr_t stride_uv, int mb_x )
389 390
{}

391
static void prefetch_ref_null( pixel *pix, intptr_t stride, int parity )
392 393
{}

394
static void memzero_aligned( void * dst, size_t n )
Fiona Glaser's avatar
Fiona Glaser committed
395 396 397 398
{
    memset( dst, 0, n );
}

399
static void integral_init4h( uint16_t *sum, pixel *pix, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
400
{
401 402
    int v = pix[0]+pix[1]+pix[2]+pix[3];
    for( int x = 0; x < stride-4; x++ )
Loren Merritt's avatar
Loren Merritt committed
403 404 405 406 407 408
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+4] - pix[x];
    }
}

409
static void integral_init8h( uint16_t *sum, pixel *pix, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
410
{
411 412
    int v = pix[0]+pix[1]+pix[2]+pix[3]+pix[4]+pix[5]+pix[6]+pix[7];
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
413 414 415 416 417 418
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+8] - pix[x];
    }
}

419
static void integral_init4v( uint16_t *sum8, uint16_t *sum4, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
420
{
421
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
422
        sum4[x] = sum8[x+4*stride] - sum8[x];
423
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
424 425 426
        sum8[x] = sum8[x+8*stride] + sum8[x+8*stride+4] - sum8[x] - sum8[x+4];
}

427
static void integral_init8v( uint16_t *sum8, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
428
{
429
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
430 431 432
        sum8[x] = sum8[x+8*stride] - sum8[x];
}

Loren Merritt's avatar
Loren Merritt committed
433 434
void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
{
435
    pixel *src = frame->plane[0];
Loren Merritt's avatar
Loren Merritt committed
436 437 438 439 440
    int i_stride = frame->i_stride[0];
    int i_height = frame->i_lines[0];
    int i_width  = frame->i_width[0];

    // duplicate last row and column so that their interpolation doesn't have to be special-cased
441
    for( int y = 0; y < i_height; y++ )
Loren Merritt's avatar
Loren Merritt committed
442
        src[i_width+y*i_stride] = src[i_width-1+y*i_stride];
443
    memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) );
Loren Merritt's avatar
Loren Merritt committed
444 445 446 447
    h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3],
                                  i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres );
    x264_frame_expand_border_lowres( frame );

448 449
    memset( frame->i_cost_est, -1, sizeof(frame->i_cost_est) );

450 451
    for( int y = 0; y < h->param.i_bframe + 2; y++ )
        for( int x = 0; x < h->param.i_bframe + 2; x++ )
452
            frame->i_row_satds[y][x][0] = -1;
453

454 455
    for( int y = 0; y <= !!h->param.i_bframe; y++ )
        for( int x = 0; x <= h->param.i_bframe; x++ )
456
            frame->lowres_mvs[y][x][0][0] = 0x7FFF;
Loren Merritt's avatar
Loren Merritt committed
457 458
}

459
static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
460
                                    intptr_t src_stride, intptr_t dst_stride, int width, int height )
Loren Merritt's avatar
Loren Merritt committed
461
{
462
    for( int y = 0; y < height; y++ )
Loren Merritt's avatar
Loren Merritt committed
463
    {
464 465
        pixel *src1 = src0+src_stride;
        pixel *src2 = src1+src_stride;
466
        for( int x = 0; x<width; x++ )
Loren Merritt's avatar
Loren Merritt committed
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
        {
            // slower than naive bilinear, but matches asm
#define FILTER(a,b,c,d) ((((a+b+1)>>1)+((c+d+1)>>1)+1)>>1)
            dst0[x] = FILTER(src0[2*x  ], src1[2*x  ], src0[2*x+1], src1[2*x+1]);
            dsth[x] = FILTER(src0[2*x+1], src1[2*x+1], src0[2*x+2], src1[2*x+2]);
            dstv[x] = FILTER(src1[2*x  ], src2[2*x  ], src1[2*x+1], src2[2*x+1]);
            dstc[x] = FILTER(src1[2*x+1], src2[2*x+1], src1[2*x+2], src2[2*x+2]);
#undef FILTER
        }
        src0 += src_stride*2;
        dst0 += dst_stride;
        dsth += dst_stride;
        dstv += dst_stride;
        dstc += dst_stride;
    }
}

484 485
/* Estimate the total amount of influence on future quality that could be had if we
 * were to improve the reference samples used to inter predict any given macroblock. */
486
static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
487
                                   uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
488
{
489
    float fps = *fps_factor;
490
    for( int i = 0; i < len; i++ )
491
    {
492 493 494 495 496 497 498
        int intra_cost = intra_costs[i];
        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
        float propagate_intra  = intra_cost * inv_qscales[i];
        float propagate_amount = propagate_in[i] + propagate_intra*fps;
        float propagate_num    = intra_cost - inter_cost;
        float propagate_denom  = intra_cost;
        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
499 500 501
    }
}

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
                                   int16_t *propagate_amount, uint16_t *lowres_costs,
                                   int bipred_weight, int mb_y, int len, int list )
{
    unsigned stride = h->mb.i_mb_stride;
    unsigned width = h->mb.i_mb_width;
    unsigned height = h->mb.i_mb_height;

    for( unsigned i = 0; i < len; i++ )
    {
#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;

        if( !(lists_used & (1 << list)) )
            continue;

        int listamount = propagate_amount[i];
        /* Apply bipred weighting. */
        if( lists_used == 3 )
            listamount = (listamount * bipred_weight + 32) >> 6;

        /* Early termination for simple case of mv0. */
        if( !M32( mvs[i] ) )
        {
            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
            continue;
        }

        int x = mvs[i][0];
        int y = mvs[i][1];
        unsigned mbx = (x>>5)+i;
        unsigned mby = (y>>5)+mb_y;
        unsigned idx0 = mbx + mby * stride;
        unsigned idx2 = idx0 + stride;
        x &= 31;
        y &= 31;
        int idx0weight = (32-y)*(32-x);
        int idx1weight = (32-y)*x;
        int idx2weight = y*(32-x);
        int idx3weight = y*x;
        idx0weight = (idx0weight * listamount + 512) >> 10;
        idx1weight = (idx1weight * listamount + 512) >> 10;
        idx2weight = (idx2weight * listamount + 512) >> 10;
        idx3weight = (idx3weight * listamount + 512) >> 10;

        if( mbx < width-1 && mby < height-1 )
        {
            CLIP_ADD( ref_costs[idx0+0], idx0weight );
            CLIP_ADD( ref_costs[idx0+1], idx1weight );
            CLIP_ADD( ref_costs[idx2+0], idx2weight );
            CLIP_ADD( ref_costs[idx2+1], idx3weight );
        }
        else
        {
            /* Note: this takes advantage of unsigned representation to
             * catch negative mbx/mby. */
            if( mby < height )
            {
                if( mbx < width )
                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
                if( mbx+1 < width )
                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
            }
            if( mby+1 < height )
            {
                if( mbx < width )
                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
                if( mbx+1 < width )
                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
            }
        }
    }
#undef CLIP_ADD
}

577
void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
Laurent Aimar's avatar
Laurent Aimar committed
578
{
579 580
    pf->mc_luma   = mc_luma;
    pf->get_ref   = get_ref;
Henrik Gramner's avatar
Henrik Gramner committed
581

582
    pf->mc_chroma = mc_chroma;
Laurent Aimar's avatar
Laurent Aimar committed
583

584 585 586 587 588
    pf->avg[PIXEL_16x16]= pixel_avg_16x16;
    pf->avg[PIXEL_16x8] = pixel_avg_16x8;
    pf->avg[PIXEL_8x16] = pixel_avg_8x16;
    pf->avg[PIXEL_8x8]  = pixel_avg_8x8;
    pf->avg[PIXEL_8x4]  = pixel_avg_8x4;
Henrik Gramner's avatar
Henrik Gramner committed
589
    pf->avg[PIXEL_4x16] = pixel_avg_4x16;
590 591 592
    pf->avg[PIXEL_4x8]  = pixel_avg_4x8;
    pf->avg[PIXEL_4x4]  = pixel_avg_4x4;
    pf->avg[PIXEL_4x2]  = pixel_avg_4x2;
Henrik Gramner's avatar
Henrik Gramner committed
593
    pf->avg[PIXEL_2x8]  = pixel_avg_2x8;
594 595
    pf->avg[PIXEL_2x4]  = pixel_avg_2x4;
    pf->avg[PIXEL_2x2]  = pixel_avg_2x2;
Loren Merritt's avatar
Loren Merritt committed
596

Dylan Yudaken's avatar
Dylan Yudaken committed
597 598 599 600 601
    pf->weight    = x264_mc_weight_wtab;
    pf->offsetadd = x264_mc_weight_wtab;
    pf->offsetsub = x264_mc_weight_wtab;
    pf->weight_cache = x264_weight_cache;

602
    pf->copy_16x16_unaligned = mc_copy_w16;
Loren Merritt's avatar
Loren Merritt committed
603 604 605 606
    pf->copy[PIXEL_16x16] = mc_copy_w16;
    pf->copy[PIXEL_8x8]   = mc_copy_w8;
    pf->copy[PIXEL_4x4]   = mc_copy_w4;

Henrik Gramner's avatar
Henrik Gramner committed
607 608 609
    pf->store_interleave_chroma       = store_interleave_chroma;
    pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc;
    pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
610

611
    pf->plane_copy = x264_plane_copy_c;
612 613
    pf->plane_copy_interleave = x264_plane_copy_interleave_c;
    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
xvidfan's avatar
xvidfan committed
614
    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
James Weaver's avatar
James Weaver committed
615
    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
616

617
    pf->hpel_filter = hpel_filter;
618

619 620
    pf->prefetch_fenc_420 = prefetch_fenc_null;
    pf->prefetch_fenc_422 = prefetch_fenc_null;
621
    pf->prefetch_ref  = prefetch_ref_null;
Fiona Glaser's avatar
Fiona Glaser committed
622
    pf->memcpy_aligned = memcpy;
Fiona Glaser's avatar
Fiona Glaser committed
623
    pf->memzero_aligned = memzero_aligned;
Loren Merritt's avatar
Loren Merritt committed
624
    pf->frame_init_lowres_core = frame_init_lowres_core;
625

Loren Merritt's avatar
Loren Merritt committed
626 627 628 629 630
    pf->integral_init4h = integral_init4h;
    pf->integral_init8h = integral_init8h;
    pf->integral_init4v = integral_init4v;
    pf->integral_init8v = integral_init8v;

631
    pf->mbtree_propagate_cost = mbtree_propagate_cost;
632
    pf->mbtree_propagate_list = mbtree_propagate_list;
633

Steven Walters's avatar
Steven Walters committed
634
#if HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
635
    x264_mc_init_mmx( cpu, pf );
636
#endif
Steven Walters's avatar
Steven Walters committed
637
#if HAVE_ALTIVEC
Laurent Aimar's avatar
Laurent Aimar committed
638 639 640
    if( cpu&X264_CPU_ALTIVEC )
        x264_mc_altivec_init( pf );
#endif
Steven Walters's avatar
Steven Walters committed
641
#if HAVE_ARMV6
642 643
    x264_mc_init_arm( cpu, pf );
#endif
644 645

    if( cpu_independent )
646
    {
647
        pf->mbtree_propagate_cost = mbtree_propagate_cost;
648 649
        pf->mbtree_propagate_list = mbtree_propagate_list;
    }
Laurent Aimar's avatar
Laurent Aimar committed
650 651
}

652
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
653
{
654
    const int b_interlaced = PARAM_INTERLACED;
Simon Horlick's avatar
Simon Horlick committed
655
    int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
656
    int height = (b_end ? frame->i_lines[0] + 16*PARAM_INTERLACED : (mb_y+b_interlaced)*16) + 8;
657

Loren Merritt's avatar
Loren Merritt committed
658 659
    if( mb_y & b_interlaced )
        return;
660

Fiona Glaser's avatar
Fiona Glaser committed
661
    for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
Simon Horlick's avatar
Simon Horlick committed
662
    {
Fiona Glaser's avatar
Fiona Glaser committed
663 664 665 666 667
        int stride = frame->i_stride[p];
        const int width = frame->i_width[p];
        int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd

        if( !b_interlaced || h->mb.b_adaptive_mbaff )
Simon Horlick's avatar
Simon Horlick committed
668
            h->mc.hpel_filter(
Fiona Glaser's avatar
Fiona Glaser committed
669 670 671 672 673
                frame->filtered[p][1] + offs,
                frame->filtered[p][2] + offs,
                frame->filtered[p][3] + offs,
                frame->plane[p] + offs,
                stride, width + 16, height - start,
Simon Horlick's avatar
Simon Horlick committed
674
                h->scratch_buffer );
Fiona Glaser's avatar
Fiona Glaser committed
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692

        if( b_interlaced )
        {
            /* MC must happen between pixels in the same field. */
            stride = frame->i_stride[p] << 1;
            start = (mb_y*16 >> 1) - 8;
            int height_fld = ((b_end ? frame->i_lines[p] : mb_y*16) >> 1) + 8;
            offs = start*stride - 8;
            for( int i = 0; i < 2; i++, offs += frame->i_stride[p] )
            {
                h->mc.hpel_filter(
                    frame->filtered_fld[p][1] + offs,
                    frame->filtered_fld[p][2] + offs,
                    frame->filtered_fld[p][3] + offs,
                    frame->plane_fld[p] + offs,
                    stride, width + 16, height_fld - start,
                    h->scratch_buffer );
            }
Simon Horlick's avatar
Simon Horlick committed
693
        }
694
    }
695 696

    /* generate integral image:
Loren Merritt's avatar
Loren Merritt committed
697 698
     * frame->integral contains 2 planes. in the upper plane, each element is
     * the sum of an 8x8 pixel region with top-left corner on that point.
Loren Merritt's avatar
Loren Merritt committed
699
     * in the lower plane, 4x4 sums (needed only with --partitions p4x4). */
700

Loren Merritt's avatar
Loren Merritt committed
701
    if( frame->integral )
702
    {
Fiona Glaser's avatar
Fiona Glaser committed
703
        int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
704 705
        if( start < 0 )
        {
706 707
            memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
            start = -PADV;
Loren Merritt's avatar
Loren Merritt committed
708 709
        }
        if( b_end )
710
            height += PADV-9;
711
        for( int y = start; y < height; y++ )
712
        {
713
            pixel    *pix  = frame->plane[0] + y * stride - PADH;
Loren Merritt's avatar
Loren Merritt committed
714 715 716 717 718 719 720 721 722 723 724
            uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
            uint16_t *sum4;
            if( h->frames.b_have_sub8x8_esa )
            {
                h->mc.integral_init4h( sum8, pix, stride );
                sum8 -= 8*stride;
                sum4 = sum8 + stride * (frame->i_lines[0] + PADV*2);
                if( y >= 8-PADV )
                    h->mc.integral_init4v( sum8, sum4, stride );
            }
            else
Loren Merritt's avatar
Loren Merritt committed
725
            {
Loren Merritt's avatar
Loren Merritt committed
726 727 728
                h->mc.integral_init8h( sum8, pix, stride );
                if( y >= 8-PADV )
                    h->mc.integral_init8v( sum8-8*stride, stride );
Loren Merritt's avatar
Loren Merritt committed
729 730
            }
        }
731
    }
732
}