mc.c 18.3 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
2
3
/*****************************************************************************
 * mc.c: h264 encoder library (Motion Compensation)
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8
9
10
11
12
13
14
15
16
17
18
19
20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
22
23
 *****************************************************************************/

Loren Merritt's avatar
Loren Merritt committed
24
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
25

26
#ifdef HAVE_MMX
27
#include "x86/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
28
#endif
29
#ifdef ARCH_PPC
30
#include "ppc/mc.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
#endif
32
33
34
#ifdef ARCH_ARM
#include "arm/mc.h"
#endif
Laurent Aimar's avatar
Laurent Aimar committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54


static inline void pixel_avg( uint8_t *dst,  int i_dst_stride,
                              uint8_t *src1, int i_src1_stride,
                              uint8_t *src2, int i_src2_stride,
                              int i_width, int i_height )
{
    int x, y;
    for( y = 0; y < i_height; y++ )
    {
        for( x = 0; x < i_width; x++ )
        {
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
        }
        dst  += i_dst_stride;
        src1 += i_src1_stride;
        src2 += i_src2_stride;
    }
}

55
static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height )
56
57
58
59
60
61
{
    int x, y;
    for( y = 0; y < height; y++ )
    {
        for( x = 0; x < width; x++ )
        {
62
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
63
        }
64
65
        src1 += i_src1;
        src2 += i_src2;
66
67
68
69
70
71
        dst += i_dst;
    }
}

/* Implicit weighted bipred only:
 * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
72
73
74
#define op_scale2(x) dst[x] = x264_clip_uint8( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 )
static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src1, int i_src1, uint8_t *src2, int i_src2, int width, int height, int i_weight1 )
{
75
76
    int y;
    const int i_weight2 = 64 - i_weight1;
77
78
    for( y = 0; y<height; y++, dst += i_dst, src1 += i_src1, src2 += i_src2 )
    {
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
        op_scale2(0);
        op_scale2(1);
        if(width==2) continue;
        op_scale2(2);
        op_scale2(3);
        if(width==4) continue;
        op_scale2(4);
        op_scale2(5);
        op_scale2(6);
        op_scale2(7);
        if(width==8) continue;
        op_scale2(8);
        op_scale2(9);
        op_scale2(10);
        op_scale2(11);
        op_scale2(12);
        op_scale2(13);
        op_scale2(14);
        op_scale2(15);
    }
}
Fiona Glaser's avatar
Fiona Glaser committed
100
#undef op_scale2
101

Fiona Glaser's avatar
Fiona Glaser committed
102
103
104
105
#define PIXEL_AVG_C( name, width, height ) \
static void name( uint8_t *pix1, int i_stride_pix1, \
                  uint8_t *pix2, int i_stride_pix2, \
                  uint8_t *pix3, int i_stride_pix3, int weight ) \
106
{ \
Fiona Glaser's avatar
Fiona Glaser committed
107
108
109
110
    if( weight == 32 )\
        pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
    else\
        pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
111
}
Fiona Glaser's avatar
Fiona Glaser committed
112
113
114
115
116
117
118
119
120
121
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
PIXEL_AVG_C( pixel_avg_16x8,  16, 8 )
PIXEL_AVG_C( pixel_avg_8x16,  8, 16 )
PIXEL_AVG_C( pixel_avg_8x8,   8, 8 )
PIXEL_AVG_C( pixel_avg_8x4,   8, 4 )
PIXEL_AVG_C( pixel_avg_4x8,   4, 8 )
PIXEL_AVG_C( pixel_avg_4x4,   4, 4 )
PIXEL_AVG_C( pixel_avg_4x2,   4, 2 )
PIXEL_AVG_C( pixel_avg_2x4,   2, 4 )
PIXEL_AVG_C( pixel_avg_2x2,   2, 2 )
122

Dylan Yudaken's avatar
Dylan Yudaken committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
static void x264_weight_cache( x264_t *h, x264_weight_t *w )
{
    w->weightfn = h->mc.weight;
}
#define opscale(x) dst[x] = x264_clip_uint8( ( ( ( src[x] * weight->i_scale ) + (1<<(weight->i_denom - 1) ) )>> weight->i_denom ) + weight->i_offset )
#define opscale_noden(x) dst[x] = x264_clip_uint8( ( src[x] * weight->i_scale ) + weight->i_offset )
static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
{

    int x, y;
    if( weight->i_denom >= 1 )
    {
        for( y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
        {
            for( x = 0; x < i_width; x++ )
                opscale( x );
        }
    }
    else
    {
        for( y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
            for( x = 0; x < i_width; x++ )
                opscale_noden( x );
    }
}

#define MC_WEIGHT_C( name, lx ) \
    static void name( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int height ) \
{ \
    int x, y; \
    if( weight->i_denom >= 1 ) \
    { \
        for( y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
            for( x = 0; x < lx; x++ ) \
                opscale( x ); \
    } \
    else \
    { \
        for( y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
            for( x = 0; x < lx; x++ ) \
                opscale_noden( x ); \
    } \
}

MC_WEIGHT_C( mc_weight_w20, 20 )
MC_WEIGHT_C( mc_weight_w16, 16 )
MC_WEIGHT_C( mc_weight_w12, 12 )
MC_WEIGHT_C( mc_weight_w8,   8 )
MC_WEIGHT_C( mc_weight_w4,   4 )
MC_WEIGHT_C( mc_weight_w2,   2 )

static weight_fn_t x264_mc_weight_wtab[6] =
{
    mc_weight_w2,
    mc_weight_w4,
    mc_weight_w8,
    mc_weight_w12,
    mc_weight_w16,
    mc_weight_w20,
};
const x264_weight_t weight_none[3] = { {{0}} };
Laurent Aimar's avatar
Laurent Aimar committed
184
185
186
187
188
189
190
191
192
193
194
195
196
static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
{
    int y;

    for( y = 0; y < i_height; y++ )
    {
        memcpy( dst, src, i_width );

        src += i_src_stride;
        dst += i_dst_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
197
198
#define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
199
                         int stride, int width, int height, int16_t *buf )
Laurent Aimar's avatar
Laurent Aimar committed
200
201
{
    int x, y;
Loren Merritt's avatar
Loren Merritt committed
202
    for( y=0; y<height; y++ )
Laurent Aimar's avatar
Laurent Aimar committed
203
    {
Loren Merritt's avatar
Loren Merritt committed
204
        for( x=-2; x<width+3; x++ )
Laurent Aimar's avatar
Laurent Aimar committed
205
        {
Loren Merritt's avatar
Loren Merritt committed
206
207
208
            int v = TAPFILTER(src,stride);
            dstv[x] = x264_clip_uint8((v + 16) >> 5);
            buf[x+2] = v;
Laurent Aimar's avatar
Laurent Aimar committed
209
        }
Loren Merritt's avatar
Loren Merritt committed
210
211
212
213
214
215
216
217
        for( x=0; x<width; x++ )
            dstc[x] = x264_clip_uint8((TAPFILTER(buf+2,1) + 512) >> 10);
        for( x=0; x<width; x++ )
            dsth[x] = x264_clip_uint8((TAPFILTER(src,1) + 16) >> 5);
        dsth += stride;
        dstv += stride;
        dstc += stride;
        src += stride;
Laurent Aimar's avatar
Laurent Aimar committed
218
    }
219
220
}

Loren Merritt's avatar
Loren Merritt committed
221
222
223
static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};

224
225
static void mc_luma( uint8_t *dst,    int i_dst_stride,
                     uint8_t *src[4], int i_src_stride,
Loren Merritt's avatar
Loren Merritt committed
226
                     int mvx, int mvy,
Dylan Yudaken's avatar
Dylan Yudaken committed
227
                     int i_width, int i_height, const x264_weight_t *weight )
228
{
Loren Merritt's avatar
Loren Merritt committed
229
230
231
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
    uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
232

Loren Merritt's avatar
Loren Merritt committed
233
    if( qpel_idx & 5 ) /* qpel interpolation needed */
234
    {
Loren Merritt's avatar
Loren Merritt committed
235
        uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
236
237
        pixel_avg( dst, i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
238
239
        if( weight->weightfn )
            mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
240
    }
Dylan Yudaken's avatar
Dylan Yudaken committed
241
242
    else if( weight->weightfn )
        mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
243
244
245
246
    else
        mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
}

247
248
static uint8_t *get_ref( uint8_t *dst,   int *i_dst_stride,
                         uint8_t *src[4], int i_src_stride,
Loren Merritt's avatar
Loren Merritt committed
249
                         int mvx, int mvy,
Dylan Yudaken's avatar
Dylan Yudaken committed
250
                         int i_width, int i_height, const x264_weight_t *weight )
251
{
Loren Merritt's avatar
Loren Merritt committed
252
253
254
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
    uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
255

Loren Merritt's avatar
Loren Merritt committed
256
    if( qpel_idx & 5 ) /* qpel interpolation needed */
257
    {
Loren Merritt's avatar
Loren Merritt committed
258
        uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
259
260
        pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
Dylan Yudaken's avatar
Dylan Yudaken committed
261
262
263
264
265
266
267
        if( weight->weightfn )
            mc_weight( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_width, i_height );
        return dst;
    }
    else if( weight->weightfn )
    {
        mc_weight( dst, *i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
268
269
270
271
272
273
274
275
276
        return dst;
    }
    else
    {
        *i_dst_stride = i_src_stride;
        return src1;
    }
}

Laurent Aimar's avatar
Laurent Aimar committed
277
/* full chroma mc (ie until 1/8 pixel)*/
278
279
280
281
static void mc_chroma( uint8_t *dst, int i_dst_stride,
                       uint8_t *src, int i_src_stride,
                       int mvx, int mvy,
                       int i_width, int i_height )
Laurent Aimar's avatar
Laurent Aimar committed
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
{
    uint8_t *srcp;
    int x, y;

    const int d8x = mvx&0x07;
    const int d8y = mvy&0x07;

    const int cA = (8-d8x)*(8-d8y);
    const int cB = d8x    *(8-d8y);
    const int cC = (8-d8x)*d8y;
    const int cD = d8x    *d8y;

    src  += (mvy >> 3) * i_src_stride + (mvx >> 3);
    srcp = &src[i_src_stride];

    for( y = 0; y < i_height; y++ )
    {
        for( x = 0; x < i_width; x++ )
        {
            dst[x] = ( cA*src[x]  + cB*src[x+1] +
                       cC*srcp[x] + cD*srcp[x+1] + 32 ) >> 6;
        }
        dst  += i_dst_stride;

        src   = srcp;
        srcp += i_src_stride;
    }
}

Loren Merritt's avatar
Loren Merritt committed
311
312
313
314
315
316
317
318
319
#define MC_COPY(W) \
static void mc_copy_w##W( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_height ) \
{ \
    mc_copy( src, i_src, dst, i_dst, W, i_height ); \
}
MC_COPY( 16 )
MC_COPY( 8 )
MC_COPY( 4 )

320
321
322
323
324
325
326
327
328
329
330
static void plane_copy( uint8_t *dst, int i_dst,
                        uint8_t *src, int i_src, int w, int h)
{
    while( h-- )
    {
        memcpy( dst, src, w );
        dst += i_dst;
        src += i_src;
    }
}

Fiona Glaser's avatar
Fiona Glaser committed
331
332
static void prefetch_fenc_null( uint8_t *pix_y, int stride_y,
                                uint8_t *pix_uv, int stride_uv, int mb_x )
333
334
{}

Fiona Glaser's avatar
Fiona Glaser committed
335
static void prefetch_ref_null( uint8_t *pix, int stride, int parity )
336
337
{}

Fiona Glaser's avatar
Fiona Glaser committed
338
339
340
341
342
static void memzero_aligned( void * dst, int n )
{
    memset( dst, 0, n );
}

Loren Merritt's avatar
Loren Merritt committed
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
static void integral_init4h( uint16_t *sum, uint8_t *pix, int stride )
{
    int x, v = pix[0]+pix[1]+pix[2]+pix[3];
    for( x=0; x<stride-4; x++ )
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+4] - pix[x];
    }
}

static void integral_init8h( uint16_t *sum, uint8_t *pix, int stride )
{
    int x, v = pix[0]+pix[1]+pix[2]+pix[3]+pix[4]+pix[5]+pix[6]+pix[7];
    for( x=0; x<stride-8; x++ )
    {
        sum[x] = v + sum[x-stride];
        v += pix[x+8] - pix[x];
    }
}

static void integral_init4v( uint16_t *sum8, uint16_t *sum4, int stride )
{
    int x;
    for( x=0; x<stride-8; x++ )
        sum4[x] = sum8[x+4*stride] - sum8[x];
    for( x=0; x<stride-8; x++ )
        sum8[x] = sum8[x+8*stride] + sum8[x+8*stride+4] - sum8[x] - sum8[x+4];
}

static void integral_init8v( uint16_t *sum8, int stride )
{
    int x;
    for( x=0; x<stride-8; x++ )
        sum8[x] = sum8[x+8*stride] - sum8[x];
}

Loren Merritt's avatar
Loren Merritt committed
379
380
381
382
383
384
385
386
387
388
389
void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
{
    uint8_t *src = frame->plane[0];
    int i_stride = frame->i_stride[0];
    int i_height = frame->i_lines[0];
    int i_width  = frame->i_width[0];
    int x, y;

    // duplicate last row and column so that their interpolation doesn't have to be special-cased
    for( y=0; y<i_height; y++ )
        src[i_width+y*i_stride] = src[i_width-1+y*i_stride];
390
    memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), i_width+1 );
Loren Merritt's avatar
Loren Merritt committed
391
392
393
394
    h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3],
                                  i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres );
    x264_frame_expand_border_lowres( frame );

395
396
397
398
399
    memset( frame->i_cost_est, -1, sizeof(frame->i_cost_est) );

    for( x = 0; x < h->param.i_bframe + 2; x++ )
        for( y = 0; y < h->param.i_bframe + 2; y++ )
            frame->i_row_satds[y][x][0] = -1;
400
401
402
403

    for( y = 0; y <= !!h->param.i_bframe; y++ )
        for( x = 0; x <= h->param.i_bframe; x++ )
            frame->lowres_mvs[y][x][0][0] = 0x7FFF;
Loren Merritt's avatar
Loren Merritt committed
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
}

static void frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
                                    int src_stride, int dst_stride, int width, int height )
{
    int x,y;
    for( y=0; y<height; y++ )
    {
        uint8_t *src1 = src0+src_stride;
        uint8_t *src2 = src1+src_stride;
        for( x=0; x<width; x++ )
        {
            // slower than naive bilinear, but matches asm
#define FILTER(a,b,c,d) ((((a+b+1)>>1)+((c+d+1)>>1)+1)>>1)
            dst0[x] = FILTER(src0[2*x  ], src1[2*x  ], src0[2*x+1], src1[2*x+1]);
            dsth[x] = FILTER(src0[2*x+1], src1[2*x+1], src0[2*x+2], src1[2*x+2]);
            dstv[x] = FILTER(src1[2*x  ], src2[2*x  ], src1[2*x+1], src2[2*x+1]);
            dstc[x] = FILTER(src1[2*x+1], src2[2*x+1], src1[2*x+2], src2[2*x+2]);
#undef FILTER
        }
        src0 += src_stride*2;
        dst0 += dst_stride;
        dsth += dst_stride;
        dstv += dst_stride;
        dstc += dst_stride;
    }
}

432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#if defined(__GNUC__) && (defined(ARCH_X86) || defined(ARCH_X86_64))
// gcc isn't smart enough to use the "idiv" instruction
static ALWAYS_INLINE int32_t div_64_32(int64_t x, int32_t y) {
    int32_t quotient, remainder;
    asm("idiv %4"
        :"=a"(quotient), "=d"(remainder)
        :"a"((uint32_t)x), "d"((int32_t)(x>>32)), "r"(y)
    );
    return quotient;
}
#else
#define div_64_32(x,y) ((x)/(y))
#endif

/* Estimate the total amount of influence on future quality that could be had if we
 * were to improve the reference samples used to inter predict any given macroblock. */
static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                   uint16_t *inter_costs, uint16_t *inv_qscales, int len )
{
    int i;
    for( i=0; i<len; i++ )
    {
        int propagate_amount = propagate_in[i] + ((intra_costs[i] * inv_qscales[i] + 128)>>8);
        dst[i] = div_64_32((int64_t)propagate_amount * (intra_costs[i] - inter_costs[i]), intra_costs[i]);
    }
}

459
void x264_mc_init( int cpu, x264_mc_functions_t *pf )
Laurent Aimar's avatar
Laurent Aimar committed
460
{
461
462
    pf->mc_luma   = mc_luma;
    pf->get_ref   = get_ref;
463
    pf->mc_chroma = mc_chroma;
Laurent Aimar's avatar
Laurent Aimar committed
464

465
466
467
468
469
470
471
472
473
474
    pf->avg[PIXEL_16x16]= pixel_avg_16x16;
    pf->avg[PIXEL_16x8] = pixel_avg_16x8;
    pf->avg[PIXEL_8x16] = pixel_avg_8x16;
    pf->avg[PIXEL_8x8]  = pixel_avg_8x8;
    pf->avg[PIXEL_8x4]  = pixel_avg_8x4;
    pf->avg[PIXEL_4x8]  = pixel_avg_4x8;
    pf->avg[PIXEL_4x4]  = pixel_avg_4x4;
    pf->avg[PIXEL_4x2]  = pixel_avg_4x2;
    pf->avg[PIXEL_2x4]  = pixel_avg_2x4;
    pf->avg[PIXEL_2x2]  = pixel_avg_2x2;
Loren Merritt's avatar
Loren Merritt committed
475

Dylan Yudaken's avatar
Dylan Yudaken committed
476
477
478
479
480
    pf->weight    = x264_mc_weight_wtab;
    pf->offsetadd = x264_mc_weight_wtab;
    pf->offsetsub = x264_mc_weight_wtab;
    pf->weight_cache = x264_weight_cache;

481
    pf->copy_16x16_unaligned = mc_copy_w16;
Loren Merritt's avatar
Loren Merritt committed
482
483
484
485
    pf->copy[PIXEL_16x16] = mc_copy_w16;
    pf->copy[PIXEL_8x8]   = mc_copy_w8;
    pf->copy[PIXEL_4x4]   = mc_copy_w4;

486
    pf->plane_copy = plane_copy;
487
    pf->hpel_filter = hpel_filter;
488

489
490
    pf->prefetch_fenc = prefetch_fenc_null;
    pf->prefetch_ref  = prefetch_ref_null;
Fiona Glaser's avatar
Fiona Glaser committed
491
    pf->memcpy_aligned = memcpy;
Fiona Glaser's avatar
Fiona Glaser committed
492
    pf->memzero_aligned = memzero_aligned;
Loren Merritt's avatar
Loren Merritt committed
493
    pf->frame_init_lowres_core = frame_init_lowres_core;
494

Loren Merritt's avatar
Loren Merritt committed
495
496
497
498
499
    pf->integral_init4h = integral_init4h;
    pf->integral_init8h = integral_init8h;
    pf->integral_init4v = integral_init4v;
    pf->integral_init8v = integral_init8v;

500
501
    pf->mbtree_propagate_cost = mbtree_propagate_cost;

502
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
503
    x264_mc_init_mmx( cpu, pf );
504
#endif
505
#ifdef ARCH_PPC
Laurent Aimar's avatar
Laurent Aimar committed
506
507
508
    if( cpu&X264_CPU_ALTIVEC )
        x264_mc_altivec_init( pf );
#endif
509
510
511
#ifdef HAVE_ARMV6
    x264_mc_init_arm( cpu, pf );
#endif
Laurent Aimar's avatar
Laurent Aimar committed
512
513
}

514
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
515
{
516
    const int b_interlaced = h->sh.b_mbaff;
517
    const int stride = frame->i_stride[0] << b_interlaced;
518
    const int width = frame->i_width[0];
519
    int start = (mb_y*16 >> b_interlaced) - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
Loren Merritt's avatar
Loren Merritt committed
520
    int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
521
    int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
Loren Merritt's avatar
Loren Merritt committed
522
    int y;
523

Loren Merritt's avatar
Loren Merritt committed
524
525
    if( mb_y & b_interlaced )
        return;
526

527
    for( y=0; y<=b_interlaced; y++, offs+=frame->i_stride[0] )
528
    {
529
        h->mc.hpel_filter(
530
531
532
533
            frame->filtered[1] + offs,
            frame->filtered[2] + offs,
            frame->filtered[3] + offs,
            frame->plane[0] + offs,
534
535
            stride, width + 16, height - start,
            h->scratch_buffer );
536
    }
537
538

    /* generate integral image:
Loren Merritt's avatar
Loren Merritt committed
539
540
     * frame->integral contains 2 planes. in the upper plane, each element is
     * the sum of an 8x8 pixel region with top-left corner on that point.
Loren Merritt's avatar
Loren Merritt committed
541
     * in the lower plane, 4x4 sums (needed only with --partitions p4x4). */
542

Loren Merritt's avatar
Loren Merritt committed
543
    if( frame->integral )
544
    {
Loren Merritt's avatar
Loren Merritt committed
545
546
        if( start < 0 )
        {
547
548
            memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
            start = -PADV;
Loren Merritt's avatar
Loren Merritt committed
549
550
        }
        if( b_end )
551
            height += PADV-9;
Loren Merritt's avatar
Loren Merritt committed
552
        for( y = start; y < height; y++ )
553
        {
Loren Merritt's avatar
Loren Merritt committed
554
555
556
557
558
559
560
561
562
563
564
565
            uint8_t  *pix  = frame->plane[0] + y * stride - PADH;
            uint16_t *sum8 = frame->integral + (y+1) * stride - PADH;
            uint16_t *sum4;
            if( h->frames.b_have_sub8x8_esa )
            {
                h->mc.integral_init4h( sum8, pix, stride );
                sum8 -= 8*stride;
                sum4 = sum8 + stride * (frame->i_lines[0] + PADV*2);
                if( y >= 8-PADV )
                    h->mc.integral_init4v( sum8, sum4, stride );
            }
            else
Loren Merritt's avatar
Loren Merritt committed
566
            {
Loren Merritt's avatar
Loren Merritt committed
567
568
569
                h->mc.integral_init8h( sum8, pix, stride );
                if( y >= 8-PADV )
                    h->mc.integral_init8v( sum8-8*stride, stride );
Loren Merritt's avatar
Loren Merritt committed
570
571
            }
        }
572
    }
573
}