pixel.c 31.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * pixel.c: h264 encoder
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
 *
6 7
 * Authors: Loren Merritt <lorenm@u.washington.edu>
 *          Laurent Aimar <fenrir@via.ecp.fr>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
22 23
 *****************************************************************************/

Loren Merritt's avatar
Loren Merritt committed
24
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
25

Steven Walters's avatar
Steven Walters committed
26
#if HAVE_MMX
27
#   include "x86/pixel.h"
Laurent Aimar's avatar
Laurent Aimar committed
28
#endif
Steven Walters's avatar
Steven Walters committed
29
#if ARCH_PPC
Laurent Aimar's avatar
Laurent Aimar committed
30 31
#   include "ppc/pixel.h"
#endif
Steven Walters's avatar
Steven Walters committed
32
#if ARCH_ARM
33 34
#   include "arm/pixel.h"
#endif
Steven Walters's avatar
Steven Walters committed
35
#if ARCH_UltraSparc
36 37
#   include "sparc/pixel.h"
#endif
Laurent Aimar's avatar
Laurent Aimar committed
38 39 40 41 42 43


/****************************************************************************
 * pixel_sad_WxH
 ****************************************************************************/
#define PIXEL_SAD_C( name, lx, ly ) \
44 45
static int name( pixel *pix1, int i_stride_pix1,  \
                 pixel *pix2, int i_stride_pix2 ) \
Laurent Aimar's avatar
Laurent Aimar committed
46 47
{                                                   \
    int i_sum = 0;                                  \
48
    for( int y = 0; y < ly; y++ )                   \
Laurent Aimar's avatar
Laurent Aimar committed
49
    {                                               \
50
        for( int x = 0; x < lx; x++ )               \
Laurent Aimar's avatar
Laurent Aimar committed
51 52 53 54 55 56 57 58 59 60
        {                                           \
            i_sum += abs( pix1[x] - pix2[x] );      \
        }                                           \
        pix1 += i_stride_pix1;                      \
        pix2 += i_stride_pix2;                      \
    }                                               \
    return i_sum;                                   \
}


61 62 63 64 65 66 67
PIXEL_SAD_C( x264_pixel_sad_16x16, 16, 16 )
PIXEL_SAD_C( x264_pixel_sad_16x8,  16,  8 )
PIXEL_SAD_C( x264_pixel_sad_8x16,   8, 16 )
PIXEL_SAD_C( x264_pixel_sad_8x8,    8,  8 )
PIXEL_SAD_C( x264_pixel_sad_8x4,    8,  4 )
PIXEL_SAD_C( x264_pixel_sad_4x8,    4,  8 )
PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
Laurent Aimar's avatar
Laurent Aimar committed
68

69 70 71 72 73

/****************************************************************************
 * pixel_ssd_WxH
 ****************************************************************************/
#define PIXEL_SSD_C( name, lx, ly ) \
74 75
static int name( pixel *pix1, int i_stride_pix1,  \
                 pixel *pix2, int i_stride_pix2 ) \
76 77
{                                                   \
    int i_sum = 0;                                  \
78
    for( int y = 0; y < ly; y++ )                   \
79
    {                                               \
80
        for( int x = 0; x < lx; x++ )               \
81 82 83 84 85 86 87 88 89 90
        {                                           \
            int d = pix1[x] - pix2[x];              \
            i_sum += d*d;                           \
        }                                           \
        pix1 += i_stride_pix1;                      \
        pix2 += i_stride_pix2;                      \
    }                                               \
    return i_sum;                                   \
}

91 92 93 94 95 96 97
PIXEL_SSD_C( x264_pixel_ssd_16x16, 16, 16 )
PIXEL_SSD_C( x264_pixel_ssd_16x8,  16,  8 )
PIXEL_SSD_C( x264_pixel_ssd_8x16,   8, 16 )
PIXEL_SSD_C( x264_pixel_ssd_8x8,    8,  8 )
PIXEL_SSD_C( x264_pixel_ssd_8x4,    8,  4 )
PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )
98

99
int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height )
Loren Merritt's avatar
Loren Merritt committed
100 101
{
    int64_t i_ssd = 0;
102
    int y;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
103
    int align = !(((intptr_t)pix1 | (intptr_t)pix2 | i_pix1 | i_pix2) & 15);
Loren Merritt's avatar
Loren Merritt committed
104 105 106 107 108

#define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, \
                                          pix2 + y*i_pix2 + x, i_pix2 );
    for( y = 0; y < i_height-15; y += 16 )
    {
109
        int x = 0;
Loren Merritt's avatar
Loren Merritt committed
110 111 112 113
        if( align )
            for( ; x < i_width-15; x += 16 )
                SSD(PIXEL_16x16);
        for( ; x < i_width-7; x += 8 )
Loren Merritt's avatar
Loren Merritt committed
114 115 116
            SSD(PIXEL_8x16);
    }
    if( y < i_height-7 )
117
        for( int x = 0; x < i_width-7; x += 8 )
Loren Merritt's avatar
Loren Merritt committed
118 119 120 121
            SSD(PIXEL_8x8);
#undef SSD

#define SSD1 { int d = pix1[y*i_pix1+x] - pix2[y*i_pix2+x]; i_ssd += d*d; }
122
    if( i_width & 7 )
Loren Merritt's avatar
Loren Merritt committed
123 124
    {
        for( y = 0; y < (i_height & ~7); y++ )
125
            for( int x = i_width & ~7; x < i_width; x++ )
Loren Merritt's avatar
Loren Merritt committed
126 127
                SSD1;
    }
128
    if( i_height & 7 )
Loren Merritt's avatar
Loren Merritt committed
129 130
    {
        for( y = i_height & ~7; y < i_height; y++ )
131
            for( int x = 0; x < i_width; x++ )
Loren Merritt's avatar
Loren Merritt committed
132 133 134 135 136 137 138
                SSD1;
    }
#undef SSD1

    return i_ssd;
}

139

140 141 142
/****************************************************************************
 * pixel_var_wxh
 ****************************************************************************/
Fiona Glaser's avatar
Fiona Glaser committed
143
#define PIXEL_VAR_C( name, w ) \
144
static uint64_t name( pixel *pix, int i_stride ) \
145
{                                             \
Fiona Glaser's avatar
Fiona Glaser committed
146
    uint32_t sum = 0, sqr = 0;                \
147
    for( int y = 0; y < w; y++ )              \
148
    {                                         \
149
        for( int x = 0; x < w; x++ )          \
150 151 152 153 154 155
        {                                     \
            sum += pix[x];                    \
            sqr += pix[x] * pix[x];           \
        }                                     \
        pix += i_stride;                      \
    }                                         \
Fiona Glaser's avatar
Fiona Glaser committed
156
    return sum + ((uint64_t)sqr << 32);       \
157 158
}

Fiona Glaser's avatar
Fiona Glaser committed
159 160
PIXEL_VAR_C( x264_pixel_var_16x16, 16 )
PIXEL_VAR_C( x264_pixel_var_8x8,    8 )
161

162 163 164
/****************************************************************************
 * pixel_var2_wxh
 ****************************************************************************/
165
static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride2, int *ssd )
166 167
{
    uint32_t var = 0, sum = 0, sqr = 0;
168
    for( int y = 0; y < 8; y++ )
169
    {
170
        for( int x = 0; x < 8; x++ )
171 172 173 174 175 176 177 178 179 180 181 182 183 184
        {
            int diff = pix1[x] - pix2[x];
            sum += diff;
            sqr += diff * diff;
        }
        pix1 += i_stride1;
        pix2 += i_stride2;
    }
    sum = abs(sum);
    var = sqr - (sum * sum >> 6);
    *ssd = sqr;
    return var;
}

185

186
#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
Loren Merritt's avatar
Loren Merritt committed
187 188 189 190 191 192 193 194 195
    int t0 = s0 + s1;\
    int t1 = s0 - s1;\
    int t2 = s2 + s3;\
    int t3 = s2 - s3;\
    d0 = t0 + t2;\
    d2 = t0 - t2;\
    d1 = t1 + t3;\
    d3 = t1 - t3;\
}
196

197 198 199 200 201 202 203 204
// in: a pseudo-simd number of the form x+(y<<16)
// return: abs(x)+(abs(y)<<16)
static ALWAYS_INLINE uint32_t abs2( uint32_t a )
{
    uint32_t s = ((a>>15)&0x10001)*0xffff;
    return (a+s)^s;
}

205 206 207
/****************************************************************************
 * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
 ****************************************************************************/
Laurent Aimar's avatar
Laurent Aimar committed
208

209
static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
210 211
{
    uint32_t tmp[4][2];
212 213 214
    uint32_t a0, a1, a2, a3, b0, b1;
    int sum = 0;
    for( int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2 )
Laurent Aimar's avatar
Laurent Aimar committed
215
    {
216 217 218 219 220 221 222 223
        a0 = pix1[0] - pix2[0];
        a1 = pix1[1] - pix2[1];
        b0 = (a0+a1) + ((a0-a1)<<16);
        a2 = pix1[2] - pix2[2];
        a3 = pix1[3] - pix2[3];
        b1 = (a2+a3) + ((a2-a3)<<16);
        tmp[i][0] = b0 + b1;
        tmp[i][1] = b0 - b1;
Laurent Aimar's avatar
Laurent Aimar committed
224
    }
225
    for( int i = 0; i < 2; i++ )
226
    {
227
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
228 229 230 231
        a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
        sum += ((uint16_t)a0) + (a0>>16);
    }
    return sum >> 1;
Laurent Aimar's avatar
Laurent Aimar committed
232 233
}

234
static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
235 236
{
    uint32_t tmp[4][4];
237 238 239
    uint32_t a0, a1, a2, a3;
    int sum = 0;
    for( int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2 )
240 241 242 243 244 245 246
    {
        a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
        a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
        a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
        a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
        HADAMARD4( tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0,a1,a2,a3 );
    }
247
    for( int i = 0; i < 4; i++ )
248
    {
249
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
250 251 252 253
        sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
    }
    return (((uint16_t)sum) + ((uint32_t)sum>>16)) >> 1;
}
Laurent Aimar's avatar
Laurent Aimar committed
254

255
#define PIXEL_SATD_C( w, h, sub )\
256
static int x264_pixel_satd_##w##x##h( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )\
257 258 259 260 261 262 263 264 265 266 267 268 269
{\
    int sum = sub( pix1, i_pix1, pix2, i_pix2 )\
            + sub( pix1+4*i_pix1, i_pix1, pix2+4*i_pix2, i_pix2 );\
    if( w==16 )\
        sum+= sub( pix1+8, i_pix1, pix2+8, i_pix2 )\
            + sub( pix1+8+4*i_pix1, i_pix1, pix2+8+4*i_pix2, i_pix2 );\
    if( h==16 )\
        sum+= sub( pix1+8*i_pix1, i_pix1, pix2+8*i_pix2, i_pix2 )\
            + sub( pix1+12*i_pix1, i_pix1, pix2+12*i_pix2, i_pix2 );\
    if( w==16 && h==16 )\
        sum+= sub( pix1+8+8*i_pix1, i_pix1, pix2+8+8*i_pix2, i_pix2 )\
            + sub( pix1+8+12*i_pix1, i_pix1, pix2+8+12*i_pix2, i_pix2 );\
    return sum;\
270
}
271 272 273 274 275
PIXEL_SATD_C( 16, 16, x264_pixel_satd_8x4 )
PIXEL_SATD_C( 16, 8,  x264_pixel_satd_8x4 )
PIXEL_SATD_C( 8,  16, x264_pixel_satd_8x4 )
PIXEL_SATD_C( 8,  8,  x264_pixel_satd_8x4 )
PIXEL_SATD_C( 4,  8,  x264_pixel_satd_4x4 )
276 277


278
static NOINLINE int sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
279 280
{
    uint32_t tmp[8][4];
281 282 283
    uint32_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
    int sum = 0;
    for( int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2 )
284
    {
285 286 287 288 289 290 291 292 293 294 295 296 297
        a0 = pix1[0] - pix2[0];
        a1 = pix1[1] - pix2[1];
        b0 = (a0+a1) + ((a0-a1)<<16);
        a2 = pix1[2] - pix2[2];
        a3 = pix1[3] - pix2[3];
        b1 = (a2+a3) + ((a2-a3)<<16);
        a4 = pix1[4] - pix2[4];
        a5 = pix1[5] - pix2[5];
        b2 = (a4+a5) + ((a4-a5)<<16);
        a6 = pix1[6] - pix2[6];
        a7 = pix1[7] - pix2[7];
        b3 = (a6+a7) + ((a6-a7)<<16);
        HADAMARD4( tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0,b1,b2,b3 );
298
    }
299
    for( int i = 0; i < 4; i++ )
300
    {
301 302
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
        HADAMARD4( a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i] );
303 304 305 306 307 308 309 310
        b0  = abs2(a0+a4) + abs2(a0-a4);
        b0 += abs2(a1+a5) + abs2(a1-a5);
        b0 += abs2(a2+a6) + abs2(a2-a6);
        b0 += abs2(a3+a7) + abs2(a3-a7);
        sum += (uint16_t)b0 + (b0>>16);
    }
    return sum;
}
311

312
static int x264_pixel_sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
313 314 315
{
    int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 );
    return (sum+2)>>2;
316 317
}

318
static int x264_pixel_sa8d_16x16( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
319 320 321 322 323 324
{
    int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 )
            + sa8d_8x8( pix1+8, i_pix1, pix2+8, i_pix2 )
            + sa8d_8x8( pix1+8*i_pix1, i_pix1, pix2+8*i_pix2, i_pix2 )
            + sa8d_8x8( pix1+8+8*i_pix1, i_pix1, pix2+8+8*i_pix2, i_pix2 );
    return (sum+2)>>2;
325 326
}

Loren Merritt's avatar
Loren Merritt committed
327

328
static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, int stride )
Loren Merritt's avatar
Loren Merritt committed
329
{
330
    uint32_t tmp[32];
331 332 333
    uint32_t a0, a1, a2, a3, dc;
    int sum4 = 0, sum8 = 0;
    for( int i = 0; i < 8; i++, pix+=stride )
Loren Merritt's avatar
Loren Merritt committed
334
    {
335 336 337 338 339 340 341 342 343
        uint32_t *t = tmp + (i&3) + (i&4)*4;
        a0 = (pix[0]+pix[1]) + ((pix[0]-pix[1])<<16);
        a1 = (pix[2]+pix[3]) + ((pix[2]-pix[3])<<16);
        t[0] = a0 + a1;
        t[4] = a0 - a1;
        a2 = (pix[4]+pix[5]) + ((pix[4]-pix[5])<<16);
        a3 = (pix[6]+pix[7]) + ((pix[6]-pix[7])<<16);
        t[8] = a2 + a3;
        t[12] = a2 - a3;
Loren Merritt's avatar
Loren Merritt committed
344
    }
345
    for( int i = 0; i < 8; i++ )
Loren Merritt's avatar
Loren Merritt committed
346
    {
347
        HADAMARD4( a0, a1, a2, a3, tmp[i*4+0], tmp[i*4+1], tmp[i*4+2], tmp[i*4+3] );
348 349 350 351 352
        tmp[i*4+0] = a0;
        tmp[i*4+1] = a1;
        tmp[i*4+2] = a2;
        tmp[i*4+3] = a3;
        sum4 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
Loren Merritt's avatar
Loren Merritt committed
353
    }
354
    for( int i = 0; i < 8; i++ )
Loren Merritt's avatar
Loren Merritt committed
355
    {
356 357
        HADAMARD4( a0,a1,a2,a3, tmp[i], tmp[8+i], tmp[16+i], tmp[24+i] );
        sum8 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
Loren Merritt's avatar
Loren Merritt committed
358
    }
359 360 361
    dc = (uint16_t)(tmp[0] + tmp[8] + tmp[16] + tmp[24]);
    sum4 = (uint16_t)sum4 + ((uint32_t)sum4>>16) - dc;
    sum8 = (uint16_t)sum8 + ((uint32_t)sum8>>16) - dc;
Loren Merritt's avatar
Loren Merritt committed
362 363 364 365
    return ((uint64_t)sum8<<32) + sum4;
}

#define HADAMARD_AC(w,h) \
366
static uint64_t x264_pixel_hadamard_ac_##w##x##h( pixel *pix, int stride )\
Loren Merritt's avatar
Loren Merritt committed
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
{\
    uint64_t sum = pixel_hadamard_ac( pix, stride );\
    if( w==16 )\
        sum += pixel_hadamard_ac( pix+8, stride );\
    if( h==16 )\
        sum += pixel_hadamard_ac( pix+8*stride, stride );\
    if( w==16 && h==16 )\
        sum += pixel_hadamard_ac( pix+8*stride+8, stride );\
    return ((sum>>34)<<32) + ((uint32_t)sum>>1);\
}
HADAMARD_AC( 16, 16 )
HADAMARD_AC( 16, 8 )
HADAMARD_AC( 8, 16 )
HADAMARD_AC( 8, 8 )


Loren Merritt's avatar
Loren Merritt committed
383 384 385
/****************************************************************************
 * pixel_sad_x4
 ****************************************************************************/
386
#define SAD_X( size ) \
387
static void x264_pixel_sad_x3_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
388 389 390 391 392
{\
    scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
}\
393
static void x264_pixel_sad_x4_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
{\
    scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
    scores[3] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix3, i_stride );\
}

SAD_X( 16x16 )
SAD_X( 16x8 )
SAD_X( 8x16 )
SAD_X( 8x8 )
SAD_X( 8x4 )
SAD_X( 4x8 )
SAD_X( 4x4 )

Steven Walters's avatar
Steven Walters committed
409
#if ARCH_UltraSparc
410 411 412 413 414
SAD_X( 16x16_vis )
SAD_X( 16x8_vis )
SAD_X( 8x16_vis )
SAD_X( 8x8_vis )
#endif
415

416 417 418 419 420 421
/****************************************************************************
 * pixel_satd_x4
 * no faster than single satd, but needed for satd to be a drop-in replacement for sad
 ****************************************************************************/

#define SATD_X( size, cpu ) \
422
static void x264_pixel_satd_x3_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
423 424 425 426 427
{\
    scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
}\
428
static void x264_pixel_satd_x4_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
429 430 431 432 433 434
{\
    scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
    scores[3] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix3, i_stride );\
}
435
#define SATD_X_DECL6( cpu )\
436 437 438 439
SATD_X( 16x16, cpu )\
SATD_X( 16x8, cpu )\
SATD_X( 8x16, cpu )\
SATD_X( 8x8, cpu )\
440 441
SATD_X( 8x4, cpu )\
SATD_X( 4x8, cpu )
442
#define SATD_X_DECL7( cpu )\
443
SATD_X_DECL6( cpu )\
444 445 446
SATD_X( 4x4, cpu )

SATD_X_DECL7()
Steven Walters's avatar
Steven Walters committed
447
#if HAVE_MMX
448
SATD_X_DECL7( _mmxext )
449
SATD_X_DECL6( _sse2 )
Fiona Glaser's avatar
Fiona Glaser committed
450
SATD_X_DECL7( _ssse3 )
451
SATD_X_DECL7( _sse4 )
452 453
#endif

Steven Walters's avatar
Steven Walters committed
454
#if HAVE_ARMV6
455 456 457
SATD_X_DECL7( _neon )
#endif

458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
#define INTRA_MBCMP_8x8( mbcmp )\
void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
{\
    pixel pix[8*FDEC_STRIDE];\
    x264_predict_8x8_v_c( pix, edge );\
    res[0] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_8x8_h_c( pix, edge );\
    res[1] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_8x8_dc_c( pix, edge );\
    res[2] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
}

INTRA_MBCMP_8x8(sad)
INTRA_MBCMP_8x8(sa8d)

#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\
void x264_intra_##mbcmp##_x3_##size##x##size##chroma( pixel *fenc, pixel *fdec, int res[3] )\
{\
    x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
    res[0] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\
    res[1] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\
    res[2] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
}

INTRA_MBCMP(sad, 4, v, h, dc, )
INTRA_MBCMP(satd, 4, v, h, dc, )
INTRA_MBCMP(sad, 8, dc, h, v, c )
INTRA_MBCMP(satd, 8, dc, h, v, c )
INTRA_MBCMP(sad, 16, v, h, dc, )
INTRA_MBCMP(satd, 16, v, h, dc, )

Loren Merritt's avatar
Loren Merritt committed
491 492 493
/****************************************************************************
 * structural similarity metric
 ****************************************************************************/
494 495
static void ssim_4x4x2_core( const pixel *pix1, int stride1,
                             const pixel *pix2, int stride2,
496 497
                             int sums[2][4])
{
498
    for( int z = 0; z < 2; z++ )
499
    {
500 501 502
        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
        for( int y = 0; y < 4; y++ )
            for( int x = 0; x < 4; x++ )
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
            {
                int a = pix1[x+y*stride1];
                int b = pix2[x+y*stride2];
                s1  += a;
                s2  += b;
                ss  += a*a;
                ss  += b*b;
                s12 += a*b;
            }
        sums[z][0] = s1;
        sums[z][1] = s2;
        sums[z][2] = ss;
        sums[z][3] = s12;
        pix1 += 4;
        pix2 += 4;
    }
}

static float ssim_end1( int s1, int s2, int ss, int s12 )
{
    static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
    static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
    int vars = ss*64 - s1*s1 - s2*s2;
    int covar = s12*64 - s1*s2;
527 528
    return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
         / ((float)(s1*s1 + s2*s2 + ssim_c1) * (float)(vars + ssim_c2));
529 530 531 532 533
}

static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
{
    float ssim = 0.0;
534
    for( int i = 0; i < width; i++ )
535 536 537 538 539 540 541 542
        ssim += ssim_end1( sum0[i][0] + sum0[i+1][0] + sum1[i][0] + sum1[i+1][0],
                           sum0[i][1] + sum0[i+1][1] + sum1[i][1] + sum1[i+1][1],
                           sum0[i][2] + sum0[i+1][2] + sum1[i][2] + sum1[i+1][2],
                           sum0[i][3] + sum0[i+1][3] + sum1[i][3] + sum1[i+1][3] );
    return ssim;
}

float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
543 544
                           pixel *pix1, int stride1,
                           pixel *pix2, int stride2,
545
                           int width, int height, void *buf )
546
{
547
    int z = 0;
548
    float ssim = 0.0;
549 550
    int (*sum0)[4] = buf;
    int (*sum1)[4] = sum0 + width/4+3;
551 552
    width >>= 2;
    height >>= 2;
553
    for( int y = 1; y < height; y++ )
554 555 556 557
    {
        for( ; z <= y; z++ )
        {
            XCHG( void*, sum0, sum1 );
558
            for( int x = 0; x < width; x+=2 )
559 560
                pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
        }
561
        for( int x = 0; x < width-1; x += 4 )
562 563
            ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
    }
564
    return ssim;
565 566 567
}


Loren Merritt's avatar
Loren Merritt committed
568 569 570
/****************************************************************************
 * successive elimination
 ****************************************************************************/
571 572
static int x264_pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
573
{
574 575
    int nmv = 0;
    for( int i = 0; i < width; i++, sums++ )
576 577 578 579 580 581 582 583 584 585
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + abs( enc_dc[1] - sums[8] )
                + abs( enc_dc[2] - sums[delta] )
                + abs( enc_dc[3] - sums[delta+8] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
586 587
}

588 589
static int x264_pixel_ads2( int enc_dc[2], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
590
{
591 592
    int nmv = 0;
    for( int i = 0; i < width; i++, sums++ )
593 594 595 596 597 598 599 600
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + abs( enc_dc[1] - sums[delta] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
601 602
}

603 604
static int x264_pixel_ads1( int enc_dc[1], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
605
{
606 607
    int nmv = 0;
    for( int i = 0; i<width; i++, sums++ )
608 609 610 611 612 613 614
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
615 616 617
}


Laurent Aimar's avatar
Laurent Aimar committed
618 619 620 621 622
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
623 624
    memset( pixf, 0, sizeof(*pixf) );

625 626 627 628 629 630 631 632 633 634
#define INIT2_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_16x16] = x264_pixel_##name2##_16x16##cpu;\
    pixf->name1[PIXEL_16x8]  = x264_pixel_##name2##_16x8##cpu;
#define INIT4_NAME( name1, name2, cpu ) \
    INIT2_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_8x16]  = x264_pixel_##name2##_8x16##cpu;\
    pixf->name1[PIXEL_8x8]   = x264_pixel_##name2##_8x8##cpu;
#define INIT5_NAME( name1, name2, cpu ) \
    INIT4_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_8x4]   = x264_pixel_##name2##_8x4##cpu;
635
#define INIT6_NAME( name1, name2, cpu ) \
636
    INIT5_NAME( name1, name2, cpu ) \
637 638 639
    pixf->name1[PIXEL_4x8]   = x264_pixel_##name2##_4x8##cpu;
#define INIT7_NAME( name1, name2, cpu ) \
    INIT6_NAME( name1, name2, cpu ) \
640 641 642 643
    pixf->name1[PIXEL_4x4]   = x264_pixel_##name2##_4x4##cpu;
#define INIT2( name, cpu ) INIT2_NAME( name, name, cpu )
#define INIT4( name, cpu ) INIT4_NAME( name, name, cpu )
#define INIT5( name, cpu ) INIT5_NAME( name, name, cpu )
644
#define INIT6( name, cpu ) INIT6_NAME( name, name, cpu )
645
#define INIT7( name, cpu ) INIT7_NAME( name, name, cpu )
646

647 648 649 650 651
#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

652
    INIT7( sad, );
653
    INIT7_NAME( sad_aligned, sad, );
654 655 656 657
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
658 659
    INIT7( satd_x3, );
    INIT7( satd_x4, );
Loren Merritt's avatar
Loren Merritt committed
660
    INIT4( hadamard_ac, );
661
    INIT_ADS( );
662

663 664
    pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16;
    pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8;
665 666 667
    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16;
    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8;

668 669
    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;
670
    pixf->var2_8x8 = pixel_var2_8x8;
671

672 673 674 675 676 677 678 679 680
    pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4;
    pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4;
    pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8;
    pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8;
    pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c;
    pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c;
    pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16;
    pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;

Steven Walters's avatar
Steven Walters committed
681
#if HAVE_MMX
682 683
    if( cpu&X264_CPU_MMX )
    {
684
        INIT7( ssd, _mmx );
685 686
    }

Laurent Aimar's avatar
Laurent Aimar committed
687 688
    if( cpu&X264_CPU_MMXEXT )
    {
689
        INIT7( sad, _mmxext );
690
        INIT7_NAME( sad_aligned, sad, _mmxext );
691 692 693
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
694 695
        INIT7( satd_x3, _mmxext );
        INIT7( satd_x4, _mmxext );
Loren Merritt's avatar
Loren Merritt committed
696
        INIT4( hadamard_ac, _mmxext );
697
        INIT_ADS( _mmxext );
698 699
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmxext;
Steven Walters's avatar
Steven Walters committed
700
#if ARCH_X86
701 702
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
703
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
704
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;
705
        pixf->var2_8x8 = x264_pixel_var2_8x8_mmxext;
706

707
        if( cpu&X264_CPU_CACHELINE_32 )
708
        {
709 710 711 712 713 714 715 716 717
            INIT5( sad, _cache32_mmxext );
            INIT4( sad_x3, _cache32_mmxext );
            INIT4( sad_x4, _cache32_mmxext );
        }
        else if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT5( sad, _cache64_mmxext );
            INIT4( sad_x3, _cache64_mmxext );
            INIT4( sad_x4, _cache64_mmxext );
718 719
        }
#else
720
        if( cpu&X264_CPU_CACHELINE_64 )
721 722 723 724 725 726 727 728 729
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
730
#endif
731
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
732
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmxext;
733
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
734
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
735
        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmxext;
736
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
737
        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_mmxext;
Laurent Aimar's avatar
Laurent Aimar committed
738
    }
739

740 741 742 743 744 745 746 747 748
    if( cpu&X264_CPU_SSE2 )
    {
        INIT5( ssd, _sse2slow );
        INIT2_NAME( sad_aligned, sad, _sse2_aligned );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
Steven Walters's avatar
Steven Walters committed
749
#if ARCH_X86_64
750 751
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
752
        pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
753 754
    }

755
    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
756
    {
757 758 759
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
760 761 762
        INIT6( satd, _sse2 );
        INIT6( satd_x3, _sse2 );
        INIT6( satd_x4, _sse2 );
763 764 765 766
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _sse2 );
        }
767
        INIT_ADS( _sse2 );
768
        pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;
769
        pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2;
770
        if( cpu&X264_CPU_CACHELINE_64 )
771
        {
772
            INIT2( ssd, _sse2); /* faster for width 16 on p4 */
Steven Walters's avatar
Steven Walters committed
773
#if ARCH_X86
774 775 776 777
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
#endif
778 779 780 781 782 783 784
           if( cpu&X264_CPU_SSE2_IS_FAST )
           {
               pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_sse2;
               pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
           }
        }

Fiona Glaser's avatar
Fiona Glaser committed
785 786 787 788 789
        if( cpu&X264_CPU_SSE_MISALIGN )
        {
            INIT2( sad_x3, _sse2_misalign );
            INIT2( sad_x4, _sse2_misalign );
        }
790
    }
791

792 793 794 795 796 797 798 799 800 801 802 803
    if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
    {
        pixf->sad_aligned[PIXEL_8x16] = x264_pixel_sad_8x16_sse2;
        pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_sse2;
        pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_sse2;
        pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_sse2;
        pixf->sad_x3[PIXEL_8x4] = x264_pixel_sad_x3_8x4_sse2;
        pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_sse2;
        pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_sse2;
        pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_sse2;
    }

804
    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) )
805 806 807 808 809 810
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

811 812
    if( cpu&X264_CPU_SSSE3 )
    {
813 814 815 816
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _ssse3 );
        }
817
        INIT_ADS( _ssse3 );
818 819 820 821 822 823 824 825 826
        if( !(cpu&X264_CPU_SLOW_ATOM) )
        {
            INIT7( ssd, _ssse3 );
            pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
            pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
            INIT7( satd, _ssse3 );
            INIT7( satd_x3, _ssse3 );
            INIT7( satd_x4, _ssse3 );
        }
827
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3;
828
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
829
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_ssse3;
Fiona Glaser's avatar
Fiona Glaser committed
830
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
831
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_ssse3;
Steven Walters's avatar
Steven Walters committed
832
#if ARCH_X86_64
833
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
Loren Merritt's avatar
Loren Merritt committed
834
#endif
835
        pixf->var2_8x8 = x264_pixel_var2_8x8_ssse3;
836
        if( cpu&X264_CPU_CACHELINE_64 )
837 838 839 840 841
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
842
        if( cpu&X264_CPU_SLOW_ATOM || !(cpu&X264_CPU_SHUFFLE_IS_FAST) )
843
        {
844
            INIT5( ssd, _sse2 ); /* on conroe, sse2 is faster for width8/16 */
845
        }
846
    }
847 848 849

    if( cpu&X264_CPU_SSE4 )
    {
850 851 852 853 854 855 856 857 858
        INIT7( satd, _sse4 );
        INIT7( satd_x3, _sse4 );
        INIT7( satd_x4, _sse4 );
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _sse4 );
        }
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
859
    }
860
#endif //HAVE_MMX
861

Steven Walters's avatar
Steven Walters committed
862
#if HAVE_ARMV6
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
    if( cpu&X264_CPU_ARMV6 )
    {
        pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
        pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
        pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
        pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
    }
    if( cpu&X264_CPU_NEON )
    {
        INIT5( sad, _neon );
        INIT5( sad_aligned, _neon );
        INIT7( sad_x3, _neon );
        INIT7( sad_x4, _neon );
        INIT7( ssd, _neon );
        INIT7( satd, _neon );
        INIT7( satd_x3, _neon );
        INIT7( satd_x4, _neon );
        INIT4( hadamard_ac, _neon );
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
        pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
        pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
        pixf->var2_8x8          = x264_pixel_var2_8x8_neon;

        pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
        pixf->ssim_end4         = x264_pixel_ssim_end4_neon;

        if( cpu&X264_CPU_FAST_NEON_MRC )
        {
            pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_neon;
            pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_neon;
            pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_aligned_4x8_neon;
            pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_aligned_4x4_neon;
        }
        else    // really just scheduled for dual issue / A8
        {
            INIT5( sad_aligned, _neon_dual );
        }
    }
#endif
Steven Walters's avatar
Steven Walters committed
903
#if HAVE_ALTIVEC
Laurent Aimar's avatar
Laurent Aimar committed
904 905 906 907 908
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
Steven Walters's avatar
Steven Walters committed
909
#if ARCH_UltraSparc
910 911 912
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
913
#endif
Loren Merritt's avatar
Loren Merritt committed
914 915 916 917 918

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
Laurent Aimar's avatar
Laurent Aimar committed
919 920
}