pixel.c 32.3 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * pixel.c: h264 encoder
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
 *
6 7
 * Authors: Loren Merritt <lorenm@u.washington.edu>
 *          Laurent Aimar <fenrir@via.ecp.fr>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
22 23
 *****************************************************************************/

Loren Merritt's avatar
Loren Merritt committed
24
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
25

Steven Walters's avatar
Steven Walters committed
26
#if HAVE_MMX
27
#   include "x86/pixel.h"
Laurent Aimar's avatar
Laurent Aimar committed
28
#endif
Steven Walters's avatar
Steven Walters committed
29
#if ARCH_PPC
Laurent Aimar's avatar
Laurent Aimar committed
30 31
#   include "ppc/pixel.h"
#endif
Steven Walters's avatar
Steven Walters committed
32
#if ARCH_ARM
33 34
#   include "arm/pixel.h"
#endif
Steven Walters's avatar
Steven Walters committed
35
#if ARCH_UltraSparc
36 37
#   include "sparc/pixel.h"
#endif
Laurent Aimar's avatar
Laurent Aimar committed
38 39 40 41 42 43


/****************************************************************************
 * pixel_sad_WxH
 ****************************************************************************/
#define PIXEL_SAD_C( name, lx, ly ) \
44 45
static int name( pixel *pix1, int i_stride_pix1,  \
                 pixel *pix2, int i_stride_pix2 ) \
Laurent Aimar's avatar
Laurent Aimar committed
46 47
{                                                   \
    int i_sum = 0;                                  \
48
    for( int y = 0; y < ly; y++ )                   \
Laurent Aimar's avatar
Laurent Aimar committed
49
    {                                               \
50
        for( int x = 0; x < lx; x++ )               \
Laurent Aimar's avatar
Laurent Aimar committed
51 52 53 54 55 56 57 58 59 60
        {                                           \
            i_sum += abs( pix1[x] - pix2[x] );      \
        }                                           \
        pix1 += i_stride_pix1;                      \
        pix2 += i_stride_pix2;                      \
    }                                               \
    return i_sum;                                   \
}


61 62 63 64 65 66 67
PIXEL_SAD_C( x264_pixel_sad_16x16, 16, 16 )
PIXEL_SAD_C( x264_pixel_sad_16x8,  16,  8 )
PIXEL_SAD_C( x264_pixel_sad_8x16,   8, 16 )
PIXEL_SAD_C( x264_pixel_sad_8x8,    8,  8 )
PIXEL_SAD_C( x264_pixel_sad_8x4,    8,  4 )
PIXEL_SAD_C( x264_pixel_sad_4x8,    4,  8 )
PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
Laurent Aimar's avatar
Laurent Aimar committed
68

69 70 71 72 73

/****************************************************************************
 * pixel_ssd_WxH
 ****************************************************************************/
#define PIXEL_SSD_C( name, lx, ly ) \
74 75
static int name( pixel *pix1, int i_stride_pix1,  \
                 pixel *pix2, int i_stride_pix2 ) \
76 77
{                                                   \
    int i_sum = 0;                                  \
78
    for( int y = 0; y < ly; y++ )                   \
79
    {                                               \
80
        for( int x = 0; x < lx; x++ )               \
81 82 83 84 85 86 87 88 89 90
        {                                           \
            int d = pix1[x] - pix2[x];              \
            i_sum += d*d;                           \
        }                                           \
        pix1 += i_stride_pix1;                      \
        pix2 += i_stride_pix2;                      \
    }                                               \
    return i_sum;                                   \
}

91 92 93 94 95 96 97
PIXEL_SSD_C( x264_pixel_ssd_16x16, 16, 16 )
PIXEL_SSD_C( x264_pixel_ssd_16x8,  16,  8 )
PIXEL_SSD_C( x264_pixel_ssd_8x16,   8, 16 )
PIXEL_SSD_C( x264_pixel_ssd_8x8,    8,  8 )
PIXEL_SSD_C( x264_pixel_ssd_8x4,    8,  4 )
PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )
98

99
int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height )
Loren Merritt's avatar
Loren Merritt committed
100 101
{
    int64_t i_ssd = 0;
102
    int y;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
103
    int align = !(((intptr_t)pix1 | (intptr_t)pix2 | i_pix1 | i_pix2) & 15);
Loren Merritt's avatar
Loren Merritt committed
104 105 106 107 108

#define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, \
                                          pix2 + y*i_pix2 + x, i_pix2 );
    for( y = 0; y < i_height-15; y += 16 )
    {
109
        int x = 0;
Loren Merritt's avatar
Loren Merritt committed
110 111 112 113
        if( align )
            for( ; x < i_width-15; x += 16 )
                SSD(PIXEL_16x16);
        for( ; x < i_width-7; x += 8 )
Loren Merritt's avatar
Loren Merritt committed
114 115 116
            SSD(PIXEL_8x16);
    }
    if( y < i_height-7 )
117
        for( int x = 0; x < i_width-7; x += 8 )
Loren Merritt's avatar
Loren Merritt committed
118 119 120 121
            SSD(PIXEL_8x8);
#undef SSD

#define SSD1 { int d = pix1[y*i_pix1+x] - pix2[y*i_pix2+x]; i_ssd += d*d; }
122
    if( i_width & 7 )
Loren Merritt's avatar
Loren Merritt committed
123 124
    {
        for( y = 0; y < (i_height & ~7); y++ )
125
            for( int x = i_width & ~7; x < i_width; x++ )
Loren Merritt's avatar
Loren Merritt committed
126 127
                SSD1;
    }
128
    if( i_height & 7 )
Loren Merritt's avatar
Loren Merritt committed
129 130
    {
        for( y = i_height & ~7; y < i_height; y++ )
131
            for( int x = 0; x < i_width; x++ )
Loren Merritt's avatar
Loren Merritt committed
132 133 134 135 136 137 138
                SSD1;
    }
#undef SSD1

    return i_ssd;
}

139

140 141 142
/****************************************************************************
 * pixel_var_wxh
 ****************************************************************************/
Fiona Glaser's avatar
Fiona Glaser committed
143
#define PIXEL_VAR_C( name, w ) \
144
static uint64_t name( pixel *pix, int i_stride ) \
145
{                                             \
Fiona Glaser's avatar
Fiona Glaser committed
146
    uint32_t sum = 0, sqr = 0;                \
147
    for( int y = 0; y < w; y++ )              \
148
    {                                         \
149
        for( int x = 0; x < w; x++ )          \
150 151 152 153 154 155
        {                                     \
            sum += pix[x];                    \
            sqr += pix[x] * pix[x];           \
        }                                     \
        pix += i_stride;                      \
    }                                         \
Fiona Glaser's avatar
Fiona Glaser committed
156
    return sum + ((uint64_t)sqr << 32);       \
157 158
}

Fiona Glaser's avatar
Fiona Glaser committed
159 160
PIXEL_VAR_C( x264_pixel_var_16x16, 16 )
PIXEL_VAR_C( x264_pixel_var_8x8,    8 )
161

162 163 164
/****************************************************************************
 * pixel_var2_wxh
 ****************************************************************************/
165
static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride2, int *ssd )
166 167
{
    uint32_t var = 0, sum = 0, sqr = 0;
168
    for( int y = 0; y < 8; y++ )
169
    {
170
        for( int x = 0; x < 8; x++ )
171 172 173 174 175 176 177 178 179
        {
            int diff = pix1[x] - pix2[x];
            sum += diff;
            sqr += diff * diff;
        }
        pix1 += i_stride1;
        pix2 += i_stride2;
    }
    sum = abs(sum);
180
    var = sqr - ((uint64_t)sum * sum >> 6);
181 182 183 184
    *ssd = sqr;
    return var;
}

185

186
#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
Loren Merritt's avatar
Loren Merritt committed
187 188 189 190 191 192 193 194 195
    int t0 = s0 + s1;\
    int t1 = s0 - s1;\
    int t2 = s2 + s3;\
    int t3 = s2 - s3;\
    d0 = t0 + t2;\
    d2 = t0 - t2;\
    d1 = t1 + t3;\
    d3 = t1 - t3;\
}
196

197 198 199 200 201 202 203 204
// in: a pseudo-simd number of the form x+(y<<16)
// return: abs(x)+(abs(y)<<16)
static ALWAYS_INLINE uint32_t abs2( uint32_t a )
{
    uint32_t s = ((a>>15)&0x10001)*0xffff;
    return (a+s)^s;
}

205 206 207
/****************************************************************************
 * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
 ****************************************************************************/
Laurent Aimar's avatar
Laurent Aimar committed
208

209
static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
210 211
{
    uint32_t tmp[4][2];
212 213 214
    uint32_t a0, a1, a2, a3, b0, b1;
    int sum = 0;
    for( int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2 )
Laurent Aimar's avatar
Laurent Aimar committed
215
    {
216 217 218 219 220 221 222 223
        a0 = pix1[0] - pix2[0];
        a1 = pix1[1] - pix2[1];
        b0 = (a0+a1) + ((a0-a1)<<16);
        a2 = pix1[2] - pix2[2];
        a3 = pix1[3] - pix2[3];
        b1 = (a2+a3) + ((a2-a3)<<16);
        tmp[i][0] = b0 + b1;
        tmp[i][1] = b0 - b1;
Laurent Aimar's avatar
Laurent Aimar committed
224
    }
225
    for( int i = 0; i < 2; i++ )
226
    {
227
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
228 229 230 231
        a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
        sum += ((uint16_t)a0) + (a0>>16);
    }
    return sum >> 1;
Laurent Aimar's avatar
Laurent Aimar committed
232 233
}

234
static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
235 236
{
    uint32_t tmp[4][4];
237 238 239
    uint32_t a0, a1, a2, a3;
    int sum = 0;
    for( int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2 )
240 241 242 243 244 245 246
    {
        a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
        a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
        a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
        a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
        HADAMARD4( tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0,a1,a2,a3 );
    }
247
    for( int i = 0; i < 4; i++ )
248
    {
249
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
250 251 252 253
        sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
    }
    return (((uint16_t)sum) + ((uint32_t)sum>>16)) >> 1;
}
Laurent Aimar's avatar
Laurent Aimar committed
254

255
#define PIXEL_SATD_C( w, h, sub )\
256
static int x264_pixel_satd_##w##x##h( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )\
257 258 259 260 261 262 263 264 265 266 267 268 269
{\
    int sum = sub( pix1, i_pix1, pix2, i_pix2 )\
            + sub( pix1+4*i_pix1, i_pix1, pix2+4*i_pix2, i_pix2 );\
    if( w==16 )\
        sum+= sub( pix1+8, i_pix1, pix2+8, i_pix2 )\
            + sub( pix1+8+4*i_pix1, i_pix1, pix2+8+4*i_pix2, i_pix2 );\
    if( h==16 )\
        sum+= sub( pix1+8*i_pix1, i_pix1, pix2+8*i_pix2, i_pix2 )\
            + sub( pix1+12*i_pix1, i_pix1, pix2+12*i_pix2, i_pix2 );\
    if( w==16 && h==16 )\
        sum+= sub( pix1+8+8*i_pix1, i_pix1, pix2+8+8*i_pix2, i_pix2 )\
            + sub( pix1+8+12*i_pix1, i_pix1, pix2+8+12*i_pix2, i_pix2 );\
    return sum;\
270
}
271 272 273 274 275
PIXEL_SATD_C( 16, 16, x264_pixel_satd_8x4 )
PIXEL_SATD_C( 16, 8,  x264_pixel_satd_8x4 )
PIXEL_SATD_C( 8,  16, x264_pixel_satd_8x4 )
PIXEL_SATD_C( 8,  8,  x264_pixel_satd_8x4 )
PIXEL_SATD_C( 4,  8,  x264_pixel_satd_4x4 )
276 277


278
static NOINLINE int sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
279 280
{
    uint32_t tmp[8][4];
281 282 283
    uint32_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
    int sum = 0;
    for( int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2 )
284
    {
285 286 287 288 289 290 291 292 293 294 295 296 297
        a0 = pix1[0] - pix2[0];
        a1 = pix1[1] - pix2[1];
        b0 = (a0+a1) + ((a0-a1)<<16);
        a2 = pix1[2] - pix2[2];
        a3 = pix1[3] - pix2[3];
        b1 = (a2+a3) + ((a2-a3)<<16);
        a4 = pix1[4] - pix2[4];
        a5 = pix1[5] - pix2[5];
        b2 = (a4+a5) + ((a4-a5)<<16);
        a6 = pix1[6] - pix2[6];
        a7 = pix1[7] - pix2[7];
        b3 = (a6+a7) + ((a6-a7)<<16);
        HADAMARD4( tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0,b1,b2,b3 );
298
    }
299
    for( int i = 0; i < 4; i++ )
300
    {
301 302
        HADAMARD4( a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i] );
        HADAMARD4( a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i] );
303 304 305 306 307 308 309 310
        b0  = abs2(a0+a4) + abs2(a0-a4);
        b0 += abs2(a1+a5) + abs2(a1-a5);
        b0 += abs2(a2+a6) + abs2(a2-a6);
        b0 += abs2(a3+a7) + abs2(a3-a7);
        sum += (uint16_t)b0 + (b0>>16);
    }
    return sum;
}
311

312
static int x264_pixel_sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
313 314 315
{
    int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 );
    return (sum+2)>>2;
316 317
}

318
static int x264_pixel_sa8d_16x16( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
319 320 321 322 323 324
{
    int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 )
            + sa8d_8x8( pix1+8, i_pix1, pix2+8, i_pix2 )
            + sa8d_8x8( pix1+8*i_pix1, i_pix1, pix2+8*i_pix2, i_pix2 )
            + sa8d_8x8( pix1+8+8*i_pix1, i_pix1, pix2+8+8*i_pix2, i_pix2 );
    return (sum+2)>>2;
325 326
}

Loren Merritt's avatar
Loren Merritt committed
327

328
static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, int stride )
Loren Merritt's avatar
Loren Merritt committed
329
{
330
    uint32_t tmp[32];
331 332 333
    uint32_t a0, a1, a2, a3, dc;
    int sum4 = 0, sum8 = 0;
    for( int i = 0; i < 8; i++, pix+=stride )
Loren Merritt's avatar
Loren Merritt committed
334
    {
335 336 337 338 339 340 341 342 343
        uint32_t *t = tmp + (i&3) + (i&4)*4;
        a0 = (pix[0]+pix[1]) + ((pix[0]-pix[1])<<16);
        a1 = (pix[2]+pix[3]) + ((pix[2]-pix[3])<<16);
        t[0] = a0 + a1;
        t[4] = a0 - a1;
        a2 = (pix[4]+pix[5]) + ((pix[4]-pix[5])<<16);
        a3 = (pix[6]+pix[7]) + ((pix[6]-pix[7])<<16);
        t[8] = a2 + a3;
        t[12] = a2 - a3;
Loren Merritt's avatar
Loren Merritt committed
344
    }
345
    for( int i = 0; i < 8; i++ )
Loren Merritt's avatar
Loren Merritt committed
346
    {
347
        HADAMARD4( a0, a1, a2, a3, tmp[i*4+0], tmp[i*4+1], tmp[i*4+2], tmp[i*4+3] );
348 349 350 351 352
        tmp[i*4+0] = a0;
        tmp[i*4+1] = a1;
        tmp[i*4+2] = a2;
        tmp[i*4+3] = a3;
        sum4 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
Loren Merritt's avatar
Loren Merritt committed
353
    }
354
    for( int i = 0; i < 8; i++ )
Loren Merritt's avatar
Loren Merritt committed
355
    {
356 357
        HADAMARD4( a0,a1,a2,a3, tmp[i], tmp[8+i], tmp[16+i], tmp[24+i] );
        sum8 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
Loren Merritt's avatar
Loren Merritt committed
358
    }
359 360 361
    dc = (uint16_t)(tmp[0] + tmp[8] + tmp[16] + tmp[24]);
    sum4 = (uint16_t)sum4 + ((uint32_t)sum4>>16) - dc;
    sum8 = (uint16_t)sum8 + ((uint32_t)sum8>>16) - dc;
Loren Merritt's avatar
Loren Merritt committed
362 363 364 365
    return ((uint64_t)sum8<<32) + sum4;
}

#define HADAMARD_AC(w,h) \
366
static uint64_t x264_pixel_hadamard_ac_##w##x##h( pixel *pix, int stride )\
Loren Merritt's avatar
Loren Merritt committed
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
{\
    uint64_t sum = pixel_hadamard_ac( pix, stride );\
    if( w==16 )\
        sum += pixel_hadamard_ac( pix+8, stride );\
    if( h==16 )\
        sum += pixel_hadamard_ac( pix+8*stride, stride );\
    if( w==16 && h==16 )\
        sum += pixel_hadamard_ac( pix+8*stride+8, stride );\
    return ((sum>>34)<<32) + ((uint32_t)sum>>1);\
}
HADAMARD_AC( 16, 16 )
HADAMARD_AC( 16, 8 )
HADAMARD_AC( 8, 16 )
HADAMARD_AC( 8, 8 )


Loren Merritt's avatar
Loren Merritt committed
383 384 385
/****************************************************************************
 * pixel_sad_x4
 ****************************************************************************/
386
#define SAD_X( size ) \
387
static void x264_pixel_sad_x3_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
388 389 390 391 392
{\
    scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
}\
393
static void x264_pixel_sad_x4_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
{\
    scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
    scores[3] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix3, i_stride );\
}

SAD_X( 16x16 )
SAD_X( 16x8 )
SAD_X( 8x16 )
SAD_X( 8x8 )
SAD_X( 8x4 )
SAD_X( 4x8 )
SAD_X( 4x4 )

409
#if !X264_HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
410
#if ARCH_UltraSparc
411 412 413 414 415
SAD_X( 16x16_vis )
SAD_X( 16x8_vis )
SAD_X( 8x16_vis )
SAD_X( 8x8_vis )
#endif
416
#endif // !X264_HIGH_BIT_DEPTH
417

418 419 420 421 422 423
/****************************************************************************
 * pixel_satd_x4
 * no faster than single satd, but needed for satd to be a drop-in replacement for sad
 ****************************************************************************/

#define SATD_X( size, cpu ) \
424
static void x264_pixel_satd_x3_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
425 426 427 428 429
{\
    scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
}\
430
static void x264_pixel_satd_x4_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
431 432 433 434 435 436
{\
    scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
    scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
    scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
    scores[3] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix3, i_stride );\
}
437
#define SATD_X_DECL6( cpu )\
438 439 440 441
SATD_X( 16x16, cpu )\
SATD_X( 16x8, cpu )\
SATD_X( 8x16, cpu )\
SATD_X( 8x8, cpu )\
442 443
SATD_X( 8x4, cpu )\
SATD_X( 4x8, cpu )
444
#define SATD_X_DECL7( cpu )\
445
SATD_X_DECL6( cpu )\
446 447 448
SATD_X( 4x4, cpu )

SATD_X_DECL7()
449
#if !X264_HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
450
#if HAVE_MMX
451
SATD_X_DECL7( _mmxext )
452
SATD_X_DECL6( _sse2 )
Fiona Glaser's avatar
Fiona Glaser committed
453
SATD_X_DECL7( _ssse3 )
454
SATD_X_DECL7( _sse4 )
455 456
#endif

Steven Walters's avatar
Steven Walters committed
457
#if HAVE_ARMV6
458 459
SATD_X_DECL7( _neon )
#endif
460
#endif // !X264_HIGH_BIT_DEPTH
461

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
#define INTRA_MBCMP_8x8( mbcmp )\
void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
{\
    pixel pix[8*FDEC_STRIDE];\
    x264_predict_8x8_v_c( pix, edge );\
    res[0] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_8x8_h_c( pix, edge );\
    res[1] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_8x8_dc_c( pix, edge );\
    res[2] = x264_pixel_##mbcmp##_8x8( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
}

INTRA_MBCMP_8x8(sad)
INTRA_MBCMP_8x8(sa8d)

#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\
void x264_intra_##mbcmp##_x3_##size##x##size##chroma( pixel *fenc, pixel *fdec, int res[3] )\
{\
    x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
    res[0] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\
    res[1] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
    x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\
    res[2] = x264_pixel_##mbcmp##_##size##x##size( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
}

INTRA_MBCMP(sad, 4, v, h, dc, )
INTRA_MBCMP(satd, 4, v, h, dc, )
INTRA_MBCMP(sad, 8, dc, h, v, c )
INTRA_MBCMP(satd, 8, dc, h, v, c )
INTRA_MBCMP(sad, 16, v, h, dc, )
INTRA_MBCMP(satd, 16, v, h, dc, )

Loren Merritt's avatar
Loren Merritt committed
495 496 497
/****************************************************************************
 * structural similarity metric
 ****************************************************************************/
498 499
static void ssim_4x4x2_core( const pixel *pix1, int stride1,
                             const pixel *pix2, int stride2,
500 501
                             int sums[2][4])
{
502
    for( int z = 0; z < 2; z++ )
503
    {
504 505 506
        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
        for( int y = 0; y < 4; y++ )
            for( int x = 0; x < 4; x++ )
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
            {
                int a = pix1[x+y*stride1];
                int b = pix2[x+y*stride2];
                s1  += a;
                s2  += b;
                ss  += a*a;
                ss  += b*b;
                s12 += a*b;
            }
        sums[z][0] = s1;
        sums[z][1] = s2;
        sums[z][2] = ss;
        sums[z][3] = s12;
        pix1 += 4;
        pix2 += 4;
    }
}

static float ssim_end1( int s1, int s2, int ss, int s12 )
{
527 528
    static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
    static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
529 530
    int vars = ss*64 - s1*s1 - s2*s2;
    int covar = s12*64 - s1*s2;
531 532
    return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
         / ((float)(s1*s1 + s2*s2 + ssim_c1) * (float)(vars + ssim_c2));
533 534 535 536 537
}

static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
{
    float ssim = 0.0;
538
    for( int i = 0; i < width; i++ )
539 540 541 542 543 544 545 546
        ssim += ssim_end1( sum0[i][0] + sum0[i+1][0] + sum1[i][0] + sum1[i+1][0],
                           sum0[i][1] + sum0[i+1][1] + sum1[i][1] + sum1[i+1][1],
                           sum0[i][2] + sum0[i+1][2] + sum1[i][2] + sum1[i+1][2],
                           sum0[i][3] + sum0[i+1][3] + sum1[i][3] + sum1[i+1][3] );
    return ssim;
}

float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
547 548
                           pixel *pix1, int stride1,
                           pixel *pix2, int stride2,
549
                           int width, int height, void *buf )
550
{
551
    int z = 0;
552
    float ssim = 0.0;
553
    int (*sum0)[4] = buf;
554
    int (*sum1)[4] = sum0 + (width >> 2) + 3;
555 556
    width >>= 2;
    height >>= 2;
557
    for( int y = 1; y < height; y++ )
558 559 560 561
    {
        for( ; z <= y; z++ )
        {
            XCHG( void*, sum0, sum1 );
562
            for( int x = 0; x < width; x+=2 )
563 564
                pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
        }
565
        for( int x = 0; x < width-1; x += 4 )
566 567
            ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
    }
568
    return ssim;
569 570 571
}


Loren Merritt's avatar
Loren Merritt committed
572 573 574
/****************************************************************************
 * successive elimination
 ****************************************************************************/
575 576
static int x264_pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
577
{
578 579
    int nmv = 0;
    for( int i = 0; i < width; i++, sums++ )
580 581 582 583 584 585 586 587 588 589
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + abs( enc_dc[1] - sums[8] )
                + abs( enc_dc[2] - sums[delta] )
                + abs( enc_dc[3] - sums[delta+8] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
590 591
}

592 593
static int x264_pixel_ads2( int enc_dc[2], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
594
{
595 596
    int nmv = 0;
    for( int i = 0; i < width; i++, sums++ )
597 598 599 600 601 602 603 604
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + abs( enc_dc[1] - sums[delta] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
605 606
}

607 608
static int x264_pixel_ads1( int enc_dc[1], uint16_t *sums, int delta,
                            uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
Loren Merritt's avatar
Loren Merritt committed
609
{
610 611
    int nmv = 0;
    for( int i = 0; i<width; i++, sums++ )
612 613 614 615 616 617 618
    {
        int ads = abs( enc_dc[0] - sums[0] )
                + cost_mvx[i];
        if( ads < thresh )
            mvs[nmv++] = i;
    }
    return nmv;
Loren Merritt's avatar
Loren Merritt committed
619 620 621
}


Laurent Aimar's avatar
Laurent Aimar committed
622 623 624 625 626
/****************************************************************************
 * x264_pixel_init:
 ****************************************************************************/
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
627 628
    memset( pixf, 0, sizeof(*pixf) );

629 630 631 632 633 634 635 636 637 638
#define INIT2_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_16x16] = x264_pixel_##name2##_16x16##cpu;\
    pixf->name1[PIXEL_16x8]  = x264_pixel_##name2##_16x8##cpu;
#define INIT4_NAME( name1, name2, cpu ) \
    INIT2_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_8x16]  = x264_pixel_##name2##_8x16##cpu;\
    pixf->name1[PIXEL_8x8]   = x264_pixel_##name2##_8x8##cpu;
#define INIT5_NAME( name1, name2, cpu ) \
    INIT4_NAME( name1, name2, cpu ) \
    pixf->name1[PIXEL_8x4]   = x264_pixel_##name2##_8x4##cpu;
639
#define INIT6_NAME( name1, name2, cpu ) \
640
    INIT5_NAME( name1, name2, cpu ) \
641 642 643
    pixf->name1[PIXEL_4x8]   = x264_pixel_##name2##_4x8##cpu;
#define INIT7_NAME( name1, name2, cpu ) \
    INIT6_NAME( name1, name2, cpu ) \
644 645 646 647
    pixf->name1[PIXEL_4x4]   = x264_pixel_##name2##_4x4##cpu;
#define INIT2( name, cpu ) INIT2_NAME( name, name, cpu )
#define INIT4( name, cpu ) INIT4_NAME( name, name, cpu )
#define INIT5( name, cpu ) INIT5_NAME( name, name, cpu )
648
#define INIT6( name, cpu ) INIT6_NAME( name, name, cpu )
649
#define INIT7( name, cpu ) INIT7_NAME( name, name, cpu )
650

651 652 653 654 655
#define INIT_ADS( cpu ) \
    pixf->ads[PIXEL_16x16] = x264_pixel_ads4##cpu;\
    pixf->ads[PIXEL_16x8] = x264_pixel_ads2##cpu;\
    pixf->ads[PIXEL_8x8] = x264_pixel_ads1##cpu;

656
    INIT7( sad, );
657
    INIT7_NAME( sad_aligned, sad, );
658 659 660 661
    INIT7( sad_x3, );
    INIT7( sad_x4, );
    INIT7( ssd, );
    INIT7( satd, );
662 663
    INIT7( satd_x3, );
    INIT7( satd_x4, );
Loren Merritt's avatar
Loren Merritt committed
664
    INIT4( hadamard_ac, );
665
    INIT_ADS( );
666

667 668
    pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16;
    pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8;
669 670 671
    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16;
    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8;

672 673
    pixf->ssim_4x4x2_core = ssim_4x4x2_core;
    pixf->ssim_end4 = ssim_end4;
674
    pixf->var2_8x8 = pixel_var2_8x8;
675

676 677 678 679 680 681 682 683 684
    pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4;
    pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4;
    pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8;
    pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8;
    pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c;
    pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c;
    pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16;
    pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;

685
#if !X264_HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
686
#if HAVE_MMX
687 688
    if( cpu&X264_CPU_MMX )
    {
689
        INIT7( ssd, _mmx );
690 691
    }

Laurent Aimar's avatar
Laurent Aimar committed
692 693
    if( cpu&X264_CPU_MMXEXT )
    {
694
        INIT7( sad, _mmxext );
695
        INIT7_NAME( sad_aligned, sad, _mmxext );
696 697 698
        INIT7( sad_x3, _mmxext );
        INIT7( sad_x4, _mmxext );
        INIT7( satd, _mmxext );
699 700
        INIT7( satd_x3, _mmxext );
        INIT7( satd_x4, _mmxext );
Loren Merritt's avatar
Loren Merritt committed
701
        INIT4( hadamard_ac, _mmxext );
702
        INIT_ADS( _mmxext );
703 704
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmxext;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmxext;
Steven Walters's avatar
Steven Walters committed
705
#if ARCH_X86
706 707
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
708
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
709
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;
710
        pixf->var2_8x8 = x264_pixel_var2_8x8_mmxext;
711

712
        if( cpu&X264_CPU_CACHELINE_32 )
713
        {
714 715 716 717 718 719 720 721 722
            INIT5( sad, _cache32_mmxext );
            INIT4( sad_x3, _cache32_mmxext );
            INIT4( sad_x4, _cache32_mmxext );
        }
        else if( cpu&X264_CPU_CACHELINE_64 )
        {
            INIT5( sad, _cache64_mmxext );
            INIT4( sad_x3, _cache64_mmxext );
            INIT4( sad_x4, _cache64_mmxext );
723 724
        }
#else
725
        if( cpu&X264_CPU_CACHELINE_64 )
726 727 728 729 730 731 732 733 734
        {
            pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
            pixf->sad[PIXEL_8x8]  = x264_pixel_sad_8x8_cache64_mmxext;
            pixf->sad[PIXEL_8x4]  = x264_pixel_sad_8x4_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
            pixf->sad_x3[PIXEL_8x8]  = x264_pixel_sad_x3_8x8_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
            pixf->sad_x4[PIXEL_8x8]  = x264_pixel_sad_x4_8x8_cache64_mmxext;
        }
735
#endif
736
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
737
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmxext;
738
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
739
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
740
        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmxext;
741
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
Fiona Glaser's avatar
Fiona Glaser committed
742
        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_mmxext;
Laurent Aimar's avatar
Laurent Aimar committed
743
    }
744

745 746 747 748 749 750 751 752 753
    if( cpu&X264_CPU_SSE2 )
    {
        INIT5( ssd, _sse2slow );
        INIT2_NAME( sad_aligned, sad, _sse2_aligned );
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2;
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
Steven Walters's avatar
Steven Walters committed
754
#if ARCH_X86_64
755 756
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
757
        pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
758 759
    }

760
    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
761
    {
762 763 764
        INIT2( sad, _sse2 );
        INIT2( sad_x3, _sse2 );
        INIT2( sad_x4, _sse2 );
765 766 767
        INIT6( satd, _sse2 );
        INIT6( satd_x3, _sse2 );
        INIT6( satd_x4, _sse2 );
768 769 770 771
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _sse2 );
        }
772
        INIT_ADS( _sse2 );
773
        pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;
774
        pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2;
775
        if( cpu&X264_CPU_CACHELINE_64 )
776
        {
777
            INIT2( ssd, _sse2); /* faster for width 16 on p4 */
Steven Walters's avatar
Steven Walters committed
778
#if ARCH_X86
779 780 781 782
            INIT2( sad, _cache64_sse2 );
            INIT2( sad_x3, _cache64_sse2 );
            INIT2( sad_x4, _cache64_sse2 );
#endif
783 784 785 786 787 788 789
           if( cpu&X264_CPU_SSE2_IS_FAST )
           {
               pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_sse2;
               pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
           }
        }

Fiona Glaser's avatar
Fiona Glaser committed
790 791 792 793 794
        if( cpu&X264_CPU_SSE_MISALIGN )
        {
            INIT2( sad_x3, _sse2_misalign );
            INIT2( sad_x4, _sse2_misalign );
        }
795
    }
796

797 798 799 800 801 802 803 804 805 806 807 808
    if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
    {
        pixf->sad_aligned[PIXEL_8x16] = x264_pixel_sad_8x16_sse2;
        pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_sse2;
        pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_sse2;
        pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_sse2;
        pixf->sad_x3[PIXEL_8x4] = x264_pixel_sad_x3_8x4_sse2;
        pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_sse2;
        pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_sse2;
        pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_sse2;
    }

809
    if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_64) )
810 811 812 813 814 815
    {
        INIT2( sad, _sse3 );
        INIT2( sad_x3, _sse3 );
        INIT2( sad_x4, _sse3 );
    }

816 817
    if( cpu&X264_CPU_SSSE3 )
    {
818 819 820 821
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _ssse3 );
        }
822
        INIT_ADS( _ssse3 );
823 824 825 826 827 828 829 830 831
        if( !(cpu&X264_CPU_SLOW_ATOM) )
        {
            INIT7( ssd, _ssse3 );
            pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
            pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
            INIT7( satd, _ssse3 );
            INIT7( satd_x3, _ssse3 );
            INIT7( satd_x4, _ssse3 );
        }
832
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3;
833
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
834
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_ssse3;
Fiona Glaser's avatar
Fiona Glaser committed
835
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
836
        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_ssse3;
Steven Walters's avatar
Steven Walters committed
837
#if ARCH_X86_64
838
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_ssse3;
Loren Merritt's avatar
Loren Merritt committed
839
#endif
840
        pixf->var2_8x8 = x264_pixel_var2_8x8_ssse3;
841
        if( cpu&X264_CPU_CACHELINE_64 )
842 843 844 845 846
        {
            INIT2( sad, _cache64_ssse3 );
            INIT2( sad_x3, _cache64_ssse3 );
            INIT2( sad_x4, _cache64_ssse3 );
        }
847
        if( cpu&X264_CPU_SLOW_ATOM || !(cpu&X264_CPU_SHUFFLE_IS_FAST) )
848
        {
849
            INIT5( ssd, _sse2 ); /* on conroe, sse2 is faster for width8/16 */
850
        }
851
    }
852 853 854

    if( cpu&X264_CPU_SSE4 )
    {
855 856 857 858 859 860 861 862 863
        INIT7( satd, _sse4 );
        INIT7( satd_x3, _sse4 );
        INIT7( satd_x4, _sse4 );
        if( !(cpu&X264_CPU_STACK_MOD4) )
        {
            INIT4( hadamard_ac, _sse4 );
        }
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
864 865 866
        pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
        /* Slower on Conroe, so only enable under SSE4 */
        pixf->intra_sad_x3_8x8  = x264_intra_sad_x3_8x8_ssse3;
867
    }
868
#endif //HAVE_MMX
869

Steven Walters's avatar
Steven Walters committed
870
#if HAVE_ARMV6
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
    if( cpu&X264_CPU_ARMV6 )
    {
        pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
        pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
        pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
        pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
    }
    if( cpu&X264_CPU_NEON )
    {
        INIT5( sad, _neon );
        INIT5( sad_aligned, _neon );
        INIT7( sad_x3, _neon );
        INIT7( sad_x4, _neon );
        INIT7( ssd, _neon );
        INIT7( satd, _neon );
        INIT7( satd_x3, _neon );
        INIT7( satd_x4, _neon );
        INIT4( hadamard_ac, _neon );
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
        pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
        pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
        pixf->var2_8x8          = x264_pixel_var2_8x8_neon;

        pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
        pixf->ssim_end4         = x264_pixel_ssim_end4_neon;

        if( cpu&X264_CPU_FAST_NEON_MRC )
        {
            pixf->sad[PIXEL_4x8] = x264_pixel_sad_4x8_neon;
            pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_neon;
            pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_aligned_4x8_neon;
            pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_aligned_4x4_neon;
        }
        else    // really just scheduled for dual issue / A8
        {
            INIT5( sad_aligned, _neon_dual );
        }
    }
#endif
911
#endif // !X264_HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
912
#if HAVE_ALTIVEC
Laurent Aimar's avatar
Laurent Aimar committed
913 914 915 916 917
    if( cpu&X264_CPU_ALTIVEC )
    {
        x264_pixel_altivec_init( pixf );
    }
#endif
918
#if !X264_HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
919
#if ARCH_UltraSparc
920 921 922
    INIT4( sad, _vis );
    INIT4( sad_x3, _vis );
    INIT4( sad_x4, _vis );
923
#endif
924
#endif // !X264_HIGH_BIT_DEPTH
Loren Merritt's avatar
Loren Merritt committed
925 926 927 928 929

    pixf->ads[PIXEL_8x16] =
    pixf->ads[PIXEL_8x4] =
    pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
    pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
Laurent Aimar's avatar
Laurent Aimar committed
930 931
}