mc.c 22.5 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * mc.c: motion compensation
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2014 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
22 23 24
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
25 26
 *****************************************************************************/

Loren Merritt's avatar
Loren Merritt committed
27
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
28

Steven Walters's avatar
Steven Walters committed
29
#if HAVE_MMX
30
#include "x86/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
#endif
Steven Walters's avatar
Steven Walters committed
32
#if ARCH_PPC
33
#include "ppc/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
34
#endif
Steven Walters's avatar
Steven Walters committed
35
#if ARCH_ARM
36 37
#include "arm/mc.h"
#endif
Laurent Aimar's avatar
Laurent Aimar committed
38 39


40 41 42
static inline void pixel_avg( pixel *dst,  intptr_t i_dst_stride,
                              pixel *src1, intptr_t i_src1_stride,
                              pixel *src2, intptr_t i_src2_stride, int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
43
{
44
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
45
    {
46
        for( int x = 0; x < i_width; x++ )
Laurent Aimar's avatar
Laurent Aimar committed
47 48 49 50 51 52 53
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
        dst  += i_dst_stride;
        src1 += i_src1_stride;
        src2 += i_src2_stride;
    }
}

54 55 56
static inline void pixel_avg_wxh( pixel *dst,  intptr_t i_dst,
                                  pixel *src1, intptr_t i_src1,
                                  pixel *src2, intptr_t i_src2, int width, int height )
57
{
58
    for( int y = 0; y < height; y++ )
59
    {
60
        for( int x = 0; x < width; x++ )
61 62 63
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
        src1 += i_src1;
        src2 += i_src2;
64 65 66 67 68 69
        dst += i_dst;
    }
}

/* Implicit weighted bipred only:
 * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
70 71 72
static inline void pixel_avg_weight_wxh( pixel *dst,  intptr_t i_dst,
                                         pixel *src1, intptr_t i_src1,
                                         pixel *src2, intptr_t i_src2, int width, int height, int i_weight1 )
73
{
74
    int i_weight2 = 64 - i_weight1;
75
    for( int y = 0; y<height; y++, dst += i_dst, src1 += i_src1, src2 += i_src2 )
76 77
        for( int x = 0; x<width; x++ )
            dst[x] = x264_clip_pixel( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 );
78
}
Fiona Glaser's avatar
Fiona Glaser committed
79
#undef op_scale2
80

Fiona Glaser's avatar
Fiona Glaser committed
81
#define PIXEL_AVG_C( name, width, height ) \
82 83 84
static void name( pixel *pix1, intptr_t i_stride_pix1, \
                  pixel *pix2, intptr_t i_stride_pix2, \
                  pixel *pix3, intptr_t i_stride_pix3, int weight ) \
85
{ \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
86
    if( weight == 32 ) \
Fiona Glaser's avatar
Fiona Glaser committed
87
        pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
Anton Mitrofanov's avatar
Anton Mitrofanov committed
88
    else \
Fiona Glaser's avatar
Fiona Glaser committed
89
        pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
90
}
Fiona Glaser's avatar
Fiona Glaser committed
91 92 93 94 95
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
PIXEL_AVG_C( pixel_avg_16x8,  16, 8 )
PIXEL_AVG_C( pixel_avg_8x16,  8, 16 )
PIXEL_AVG_C( pixel_avg_8x8,   8, 8 )
PIXEL_AVG_C( pixel_avg_8x4,   8, 4 )
Henrik Gramner's avatar
Henrik Gramner committed
96
PIXEL_AVG_C( pixel_avg_4x16,  4, 16 )
Fiona Glaser's avatar
Fiona Glaser committed
97 98 99
PIXEL_AVG_C( pixel_avg_4x8,   4, 8 )
PIXEL_AVG_C( pixel_avg_4x4,   4, 4 )
PIXEL_AVG_C( pixel_avg_4x2,   4, 2 )
Henrik Gramner's avatar
Henrik Gramner committed
100
PIXEL_AVG_C( pixel_avg_2x8,   2, 8 )
Fiona Glaser's avatar
Fiona Glaser committed
101 102
PIXEL_AVG_C( pixel_avg_2x4,   2, 4 )
PIXEL_AVG_C( pixel_avg_2x2,   2, 2 )
103

Dylan Yudaken's avatar
Dylan Yudaken committed
104 105 106 107
static void x264_weight_cache( x264_t *h, x264_weight_t *w )
{
    w->weightfn = h->mc.weight;
}
108 109
#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
110 111
static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                       const x264_weight_t *weight, int i_width, int i_height )
Dylan Yudaken's avatar
Dylan Yudaken committed
112
{
113 114 115 116
    int offset = weight->i_offset << (BIT_DEPTH-8);
    int scale = weight->i_scale;
    int denom = weight->i_denom;
    if( denom >= 1 )
Dylan Yudaken's avatar
Dylan Yudaken committed
117
    {
118 119
        for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
            for( int x = 0; x < i_width; x++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
120 121 122 123
                opscale( x );
    }
    else
    {
124 125
        for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
            for( int x = 0; x < i_width; x++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
126 127 128 129
                opscale_noden( x );
    }
}

130
#define MC_WEIGHT_C( name, width ) \
131
    static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \
Dylan Yudaken's avatar
Dylan Yudaken committed
132
{ \
133
    mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
Dylan Yudaken's avatar
Dylan Yudaken committed
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

MC_WEIGHT_C( mc_weight_w20, 20 )
MC_WEIGHT_C( mc_weight_w16, 16 )
MC_WEIGHT_C( mc_weight_w12, 12 )
MC_WEIGHT_C( mc_weight_w8,   8 )
MC_WEIGHT_C( mc_weight_w4,   4 )
MC_WEIGHT_C( mc_weight_w2,   2 )

static weight_fn_t x264_mc_weight_wtab[6] =
{
    mc_weight_w2,
    mc_weight_w4,
    mc_weight_w8,
    mc_weight_w12,
    mc_weight_w16,
    mc_weight_w20,
};
Anton Mitrofanov's avatar
Anton Mitrofanov committed
152
const x264_weight_t x264_weight_none[3] = { {{0}} };
153
static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
154
{
155
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
156
    {
157
        memcpy( dst, src, i_width * sizeof(pixel) );
Laurent Aimar's avatar
Laurent Aimar committed
158 159 160 161 162 163

        src += i_src_stride;
        dst += i_dst_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
164
#define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
165
static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
166
                         intptr_t stride, int width, int height, int16_t *buf )
Laurent Aimar's avatar
Laurent Aimar committed
167
{
168
    const int pad = (BIT_DEPTH > 9) ? (-10 * PIXEL_MAX) : 0;
169
    for( int y = 0; y < height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
170
    {
171
        for( int x = -2; x < width+3; x++ )
Laurent Aimar's avatar
Laurent Aimar committed
172
        {
Loren Merritt's avatar
Loren Merritt committed
173
            int v = TAPFILTER(src,stride);
174
            dstv[x] = x264_clip_pixel( (v + 16) >> 5 );
175 176
            /* transform v for storage in a 16-bit integer */
            buf[x+2] = v + pad;
Laurent Aimar's avatar
Laurent Aimar committed
177
        }
178
        for( int x = 0; x < width; x++ )
179
            dstc[x] = x264_clip_pixel( (TAPFILTER(buf+2,1) - 32*pad + 512) >> 10 );
180
        for( int x = 0; x < width; x++ )
181
            dsth[x] = x264_clip_pixel( (TAPFILTER(src,1) + 16) >> 5 );
Loren Merritt's avatar
Loren Merritt committed
182 183 184 185
        dsth += stride;
        dstv += stride;
        dstc += stride;
        src += stride;
Laurent Aimar's avatar
Laurent Aimar committed
186
    }
187 188
}

189 190
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
Loren Merritt's avatar
Loren Merritt committed
191

192 193
static void mc_luma( pixel *dst,    intptr_t i_dst_stride,
                     pixel *src[4], intptr_t i_src_stride,
Loren Merritt's avatar
Loren Merritt committed
194
                     int mvx, int mvy,
Dylan Yudaken's avatar
Dylan Yudaken committed
195
                     int i_width, int i_height, const x264_weight_t *weight )
196
{
Loren Merritt's avatar
Loren Merritt committed
197 198
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
199
    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
200

Loren Merritt's avatar
Loren Merritt committed
201
    if( qpel_idx & 5 ) /* qpel interpolation needed */
202
    {
203
        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
204 205
        pixel_avg( dst, i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
206 207
        if( weight->weightfn )
            mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
208
    }
Dylan Yudaken's avatar
Dylan Yudaken committed
209 210
    else if( weight->weightfn )
        mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
211 212 213 214
    else
        mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
}

215 216
static pixel *get_ref( pixel *dst,   intptr_t *i_dst_stride,
                       pixel *src[4], intptr_t i_src_stride,
217 218
                       int mvx, int mvy,
                       int i_width, int i_height, const x264_weight_t *weight )
219
{
Loren Merritt's avatar
Loren Merritt committed
220 221
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
222
    pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
223

Loren Merritt's avatar
Loren Merritt committed
224
    if( qpel_idx & 5 ) /* qpel interpolation needed */
225
    {
226
        pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
227 228
        pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
229 230 231 232 233 234 235
        if( weight->weightfn )
            mc_weight( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_width, i_height );
        return dst;
    }
    else if( weight->weightfn )
    {
        mc_weight( dst, *i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
236 237 238 239 240 241 242 243 244
        return dst;
    }
    else
    {
        *i_dst_stride = i_src_stride;
        return src1;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
245
/* full chroma mc (ie until 1/8 pixel)*/
246 247
static void mc_chroma( pixel *dstu, pixel *dstv, intptr_t i_dst_stride,
                       pixel *src, intptr_t i_src_stride,
248 249
                       int mvx, int mvy,
                       int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
250
{
251
    pixel *srcp;
Laurent Aimar's avatar
Laurent Aimar committed
252

253 254 255 256 257 258
    int d8x = mvx&0x07;
    int d8y = mvy&0x07;
    int cA = (8-d8x)*(8-d8y);
    int cB = d8x    *(8-d8y);
    int cC = (8-d8x)*d8y;
    int cD = d8x    *d8y;
Laurent Aimar's avatar
Laurent Aimar committed
259

260
    src += (mvy >> 3) * i_src_stride + (mvx >> 3)*2;
Laurent Aimar's avatar
Laurent Aimar committed
261 262
    srcp = &src[i_src_stride];

263
    for( int y = 0; y < i_height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
264
    {
265
        for( int x = 0; x < i_width; x++ )
266 267 268 269 270 271 272 273
        {
            dstu[x] = ( cA*src[2*x]  + cB*src[2*x+2] +
                        cC*srcp[2*x] + cD*srcp[2*x+2] + 32 ) >> 6;
            dstv[x] = ( cA*src[2*x+1]  + cB*src[2*x+3] +
                        cC*srcp[2*x+1] + cD*srcp[2*x+3] + 32 ) >> 6;
        }
        dstu += i_dst_stride;
        dstv += i_dst_stride;
Laurent Aimar's avatar
Laurent Aimar committed
274 275 276 277 278
        src   = srcp;
        srcp += i_src_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
279
#define MC_COPY(W) \
280
static void mc_copy_w##W( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int i_height ) \
Loren Merritt's avatar
Loren Merritt committed
281 282 283 284 285 286 287
{ \
    mc_copy( src, i_src, dst, i_dst, W, i_height ); \
}
MC_COPY( 16 )
MC_COPY( 8 )
MC_COPY( 4 )

288 289
void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
                        pixel *src, intptr_t i_src, int w, int h )
290 291 292
{
    while( h-- )
    {
293
        memcpy( dst, src, w * sizeof(pixel) );
294 295 296 297 298
        dst += i_dst;
        src += i_src;
    }
}

299 300 301
void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
                                   pixel *srcu, intptr_t i_srcu,
                                   pixel *srcv, intptr_t i_srcv, int w, int h )
302 303 304 305
{
    for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
        for( int x=0; x<w; x++ )
        {
306 307
            dst[2*x]   = srcu[x];
            dst[2*x+1] = srcv[x];
308 309 310
        }
}

311 312 313
static void x264_plane_copy_deinterleave_c( pixel *dstu, intptr_t i_dstu,
                                            pixel *dstv, intptr_t i_dstv,
                                            pixel *src,  intptr_t i_src, int w, int h )
314 315 316 317 318 319 320 321 322
{
    for( int y=0; y<h; y++, dstu+=i_dstu, dstv+=i_dstv, src+=i_src )
        for( int x=0; x<w; x++ )
        {
            dstu[x] = src[2*x];
            dstv[x] = src[2*x+1];
        }
}

323 324 325 326
static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
                                                pixel *dstb, intptr_t i_dstb,
                                                pixel *dstc, intptr_t i_dstc,
                                                pixel *src,  intptr_t i_src, int pw, int w, int h )
xvidfan's avatar
xvidfan committed
327 328 329 330 331 332 333 334 335 336 337 338
{
    for( int y=0; y<h; y++, dsta+=i_dsta, dstb+=i_dstb, dstc+=i_dstc, src+=i_src )
    {
        for( int x=0; x<w; x++ )
        {
            dsta[x] = src[x*pw];
            dstb[x] = src[x*pw+1];
            dstc[x] = src[x*pw+2];
        }
    }
}

James Weaver's avatar
James Weaver committed
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
                                          pixel *dstc, intptr_t i_dstc,
                                          uint32_t *src, intptr_t i_src, int w, int h )
{
    for( int l = 0; l < h; l++ )
    {
        pixel *dsty0 = dsty;
        pixel *dstc0 = dstc;
        uint32_t *src0 = src;

        for( int n = 0; n < w; n += 3 )
        {
            *(dstc0++) = *src0 & 0x03FF;
            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
            src0++;
            *(dsty0++) = *src0 & 0x03FF;
            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
            src0++;
        }

        dsty += i_dsty;
        dstc += i_dstc;
        src  += i_src;
    }
}

367
static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
368
{
Henrik Gramner's avatar
Henrik Gramner committed
369
    for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
370 371 372 373 374 375 376
        for( int x=0; x<8; x++ )
        {
            dst[2*x]   = srcu[x];
            dst[2*x+1] = srcv[x];
        }
}

377
static void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, intptr_t i_src, int height )
378
{
Henrik Gramner's avatar
Henrik Gramner committed
379
    x264_plane_copy_deinterleave_c( dst, FENC_STRIDE, dst+FENC_STRIDE/2, FENC_STRIDE, src, i_src, 8, height );
380 381
}

382
static void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, intptr_t i_src, int height )
383
{
Henrik Gramner's avatar
Henrik Gramner committed
384
    x264_plane_copy_deinterleave_c( dst, FDEC_STRIDE, dst+FDEC_STRIDE/2, FDEC_STRIDE, src, i_src, 8, height );
385 386
}

387 388
static void prefetch_fenc_null( pixel *pix_y,  intptr_t stride_y,
                                pixel *pix_uv, intptr_t stride_uv, int mb_x )
389 390
{}

391
static void prefetch_ref_null( pixel *pix, intptr_t stride, int parity )
392 393
{}

394
static void memzero_aligned( void * dst, size_t n )
Fiona Glaser's avatar
Fiona Glaser committed
395 396 397 398
{
    memset( dst, 0, n );
}

399
static void integral_init4h( uint16_t *sum, pixel *pix, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
400
{
401 402
    int v = pix[0]+pix[1]+pix[2]+pix[3];
    for( int x = 0; x < stride-4; x++ )
Loren Merritt's avatar
Loren Merritt committed
403 404 405 406 407 408
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+4] - pix[x];
    }
}

409
static void integral_init8h( uint16_t *sum, pixel *pix, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
410
{
411 412
    int v = pix[0]+pix[1]+pix[2]+pix[3]+pix[4]+pix[5]+pix[6]+pix[7];
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
413 414 415 416 417 418
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+8] - pix[x];
    }
}

419
static void integral_init4v( uint16_t *sum8, uint16_t *sum4, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
420
{
421
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
422
        sum4[x] = sum8[x+4*stride] - sum8[x];
423
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
424 425 426
        sum8[x] = sum8[x+8*stride] + sum8[x+8*stride+4] - sum8[x] - sum8[x+4];
}

427
static void integral_init8v( uint16_t *sum8, intptr_t stride )
Loren Merritt's avatar
Loren Merritt committed
428
{
429
    for( int x = 0; x < stride-8; x++ )
Loren Merritt's avatar
Loren Merritt committed
430 431 432
        sum8[x] = sum8[x+8*stride] - sum8[x];
}

Loren Merritt's avatar
Loren Merritt committed
433 434
void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
{
435
    pixel *src = frame->plane[0];
Loren Merritt's avatar
Loren Merritt committed
436 437 438 439 440
    int i_stride = frame->i_stride[0];
    int i_height = frame->i_lines[0];
    int i_width  = frame->i_width[0];

    // duplicate last row and column so that their interpolation doesn't have to be special-cased
441
    for( int y = 0; y < i_height; y++ )
Loren Merritt's avatar
Loren Merritt committed
442
        src[i_width+y*i_stride] = src[i_width-1+y*i_stride];
443
    memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) );
Loren Merritt's avatar
Loren Merritt committed
444 445 446 447
    h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3],
                                  i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres );
    x264_frame_expand_border_lowres( frame );

448 449
    memset( frame->i_cost_est, -1, sizeof(frame->i_cost_est) );

450 451
    for( int y = 0; y < h->param.i_bframe + 2; y++ )
        for( int x = 0; x < h->param.i_bframe + 2; x++ )
452
            frame->i_row_satds[y][x][0] = -1;
453

454 455
    for( int y = 0; y <= !!h->param.i_bframe; y++ )
        for( int x = 0; x <= h->param.i_bframe; x++ )
456
            frame->lowres_mvs[y][x][0][0] = 0x7FFF;
Loren Merritt's avatar
Loren Merritt committed
457 458
}

459
static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
460
                                    intptr_t src_stride, intptr_t dst_stride, int width, int height )
Loren Merritt's avatar
Loren Merritt committed
461
{
462
    for( int y = 0; y < height; y++ )
Loren Merritt's avatar
Loren Merritt committed
463
    {
464 465
        pixel *src1 = src0+src_stride;
        pixel *src2 = src1+src_stride;
466
        for( int x = 0; x<width; x++ )
Loren Merritt's avatar
Loren Merritt committed
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
        {
            // slower than naive bilinear, but matches asm
#define FILTER(a,b,c,d) ((((a+b+1)>>1)+((c+d+1)>>1)+1)>>1)
            dst0[x] = FILTER(src0[2*x  ], src1[2*x  ], src0[2*x+1], src1[2*x+1]);
            dsth[x] = FILTER(src0[2*x+1], src1[2*x+1], src0[2*x+2], src1[2*x+2]);
            dstv[x] = FILTER(src1[2*x  ], src2[2*x  ], src1[2*x+1], src2[2*x+1]);
            dstc[x] = FILTER(src1[2*x+1], src2[2*x+1], src1[2*x+2], src2[2*x+2]);
#undef FILTER
        }
        src0 += src_stride*2;
        dst0 += dst_stride;
        dsth += dst_stride;
        dstv += dst_stride;
        dstc += dst_stride;
    }
}

484 485 486
/* Estimate the total amount of influence on future quality that could be had if we
 * were to improve the reference samples used to inter predict any given macroblock. */
static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
487
                                   uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
488
{
489
    float fps = *fps_factor;
490
    for( int i = 0; i < len; i++ )
491
    {
492 493 494 495 496
        float intra_cost       = intra_costs[i] * inv_qscales[i];
        float propagate_amount = propagate_in[i] + intra_cost*fps;
        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
        float propagate_denom  = intra_costs[i];
        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
497 498 499
    }
}

500
void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
Laurent Aimar's avatar
Laurent Aimar committed
501
{
502 503
    pf->mc_luma   = mc_luma;
    pf->get_ref   = get_ref;
Henrik Gramner's avatar
Henrik Gramner committed
504

505
    pf->mc_chroma = mc_chroma;
Laurent Aimar's avatar
Laurent Aimar committed
506

507 508 509 510 511
    pf->avg[PIXEL_16x16]= pixel_avg_16x16;
    pf->avg[PIXEL_16x8] = pixel_avg_16x8;
    pf->avg[PIXEL_8x16] = pixel_avg_8x16;
    pf->avg[PIXEL_8x8]  = pixel_avg_8x8;
    pf->avg[PIXEL_8x4]  = pixel_avg_8x4;
Henrik Gramner's avatar
Henrik Gramner committed
512
    pf->avg[PIXEL_4x16] = pixel_avg_4x16;
513 514 515
    pf->avg[PIXEL_4x8]  = pixel_avg_4x8;
    pf->avg[PIXEL_4x4]  = pixel_avg_4x4;
    pf->avg[PIXEL_4x2]  = pixel_avg_4x2;
Henrik Gramner's avatar
Henrik Gramner committed
516
    pf->avg[PIXEL_2x8]  = pixel_avg_2x8;
517 518
    pf->avg[PIXEL_2x4]  = pixel_avg_2x4;
    pf->avg[PIXEL_2x2]  = pixel_avg_2x2;
Loren Merritt's avatar
Loren Merritt committed
519

Dylan Yudaken's avatar
Dylan Yudaken committed
520 521 522 523 524
    pf->weight    = x264_mc_weight_wtab;
    pf->offsetadd = x264_mc_weight_wtab;
    pf->offsetsub = x264_mc_weight_wtab;
    pf->weight_cache = x264_weight_cache;

525
    pf->copy_16x16_unaligned = mc_copy_w16;
Loren Merritt's avatar
Loren Merritt committed
526 527 528 529
    pf->copy[PIXEL_16x16] = mc_copy_w16;
    pf->copy[PIXEL_8x8]   = mc_copy_w8;
    pf->copy[PIXEL_4x4]   = mc_copy_w4;

Henrik Gramner's avatar
Henrik Gramner committed
530 531 532
    pf->store_interleave_chroma       = store_interleave_chroma;
    pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc;
    pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
533

534
    pf->plane_copy = x264_plane_copy_c;
535 536
    pf->plane_copy_interleave = x264_plane_copy_interleave_c;
    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
xvidfan's avatar
xvidfan committed
537
    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
James Weaver's avatar
James Weaver committed
538
    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
539

540
    pf->hpel_filter = hpel_filter;
541

542 543
    pf->prefetch_fenc_420 = prefetch_fenc_null;
    pf->prefetch_fenc_422 = prefetch_fenc_null;
544
    pf->prefetch_ref  = prefetch_ref_null;
Fiona Glaser's avatar
Fiona Glaser committed
545
    pf->memcpy_aligned = memcpy;
Fiona Glaser's avatar
Fiona Glaser committed
546
    pf->memzero_aligned = memzero_aligned;
Loren Merritt's avatar
Loren Merritt committed
547
    pf->frame_init_lowres_core = frame_init_lowres_core;
548

Loren Merritt's avatar
Loren Merritt committed
549 550 551 552 553
    pf->integral_init4h = integral_init4h;
    pf->integral_init8h = integral_init8h;
    pf->integral_init4v = integral_init4v;
    pf->integral_init8v = integral_init8v;

554 555
    pf->mbtree_propagate_cost = mbtree_propagate_cost;

Steven Walters's avatar
Steven Walters committed
556
#if HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
557
    x264_mc_init_mmx( cpu, pf );
558
#endif
Steven Walters's avatar
Steven Walters committed
559
#if HAVE_ALTIVEC
Laurent Aimar's avatar
Laurent Aimar committed
560 561 562
    if( cpu&X264_CPU_ALTIVEC )
        x264_mc_altivec_init( pf );
#endif
Steven Walters's avatar
Steven Walters committed
563
#if HAVE_ARMV6
564 565
    x264_mc_init_arm( cpu, pf );
#endif
566 567 568

    if( cpu_independent )
        pf->mbtree_propagate_cost = mbtree_propagate_cost;
Laurent Aimar's avatar
Laurent Aimar committed
569 570
}

571
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
572
{
573
    const int b_interlaced = PARAM_INTERLACED;
Simon Horlick's avatar
Simon Horlick committed
574
    int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
575
    int height = (b_end ? frame->i_lines[0] + 16*PARAM_INTERLACED : (mb_y+b_interlaced)*16) + 8;
576

Loren Merritt's avatar
Loren Merritt committed
577 578
    if( mb_y & b_interlaced )
        return;
579

Fiona Glaser's avatar
Fiona Glaser committed
580
    for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
Simon Horlick's avatar
Simon Horlick committed
581
    {
Fiona Glaser's avatar
Fiona Glaser committed
582 583 584 585 586
        int stride = frame->i_stride[p];
        const int width = frame->i_width[p];
        int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd

        if( !b_interlaced || h->mb.b_adaptive_mbaff )
Simon Horlick's avatar
Simon Horlick committed
587
            h->mc.hpel_filter(
Fiona Glaser's avatar
Fiona Glaser committed
588 589 590 591 592
                frame->filtered[p][1] + offs,
                frame->filtered[p][2] + offs,
                frame->filtered[p][3] + offs,
                frame->plane[p] + offs,
                stride, width + 16, height - start,
Simon Horlick's avatar
Simon Horlick committed
593
                h->scratch_buffer );
Fiona Glaser's avatar
Fiona Glaser committed
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611

        if( b_interlaced )
        {
            /* MC must happen between pixels in the same field. */
            stride = frame->i_stride[p] << 1;
            start = (mb_y*16 >> 1) - 8;
            int height_fld = ((b_end ? frame->i_lines[p] : mb_y*16) >> 1) + 8;
            offs = start*stride - 8;
            for( int i = 0; i < 2; i++, offs += frame->i_stride[p] )
            {
                h->mc.hpel_filter(
                    frame->filtered_fld[p][1] + offs,
                    frame->filtered_fld[p][2] + offs,
                    frame->filtered_fld[p][3] + offs,
                    frame->plane_fld[p] + offs,
                    stride, width + 16, height_fld - start,
                    h->scratch_buffer );
            }
Simon Horlick's avatar
Simon Horlick committed
612
        }
613
    }
614 615

    /* generate integral image:
Loren Merritt's avatar
Loren Merritt committed
616 617
     * frame->integral contains 2 planes. in the upper plane, each element is
     * the sum of an 8x8 pixel region with top-left corner on that point.
Loren Merritt's avatar
Loren Merritt committed
618
     * in the lower plane, 4x4 sums (needed only with --partitions p4x4). */
619

Loren Merritt's avatar
Loren Merritt committed
620
    if( frame->integral )
621
    {
Fiona Glaser's avatar
Fiona Glaser committed
622
        int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
623 624
        if( start < 0 )
        {
625 626
            memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
            start = -PADV;
Loren Merritt's avatar
Loren Merritt committed
627 628
        }
        if( b_end )
629
            height += PADV-9;
630
        for( int y = start; y < height; y++ )
631
        {
632
            pixel    *pix  = frame->plane[0] + y * stride - PADH;
Loren Merritt's avatar
Loren Merritt committed
633 634 635 636 637 638 639 640 641 642 643
            uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
            uint16_t *sum4;
            if( h->frames.b_have_sub8x8_esa )
            {
                h->mc.integral_init4h( sum8, pix, stride );
                sum8 -= 8*stride;
                sum4 = sum8 + stride * (frame->i_lines[0] + PADV*2);
                if( y >= 8-PADV )
                    h->mc.integral_init4v( sum8, sum4, stride );
            }
            else
Loren Merritt's avatar
Loren Merritt committed
644
            {
Loren Merritt's avatar
Loren Merritt committed
645 646 647
                h->mc.integral_init8h( sum8, pix, stride );
                if( y >= 8-PADV )
                    h->mc.integral_init8v( sum8-8*stride, stride );
Loren Merritt's avatar
Loren Merritt committed
648 649
            }
        }
650
    }
651
}