deblock.c 39.9 KB
Newer Older
1
/*****************************************************************************
2
 * deblock.c: deblocking
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2014 x264 project
5 6 7 8
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Henrik Gramner's avatar
Henrik Gramner committed
9
 *          Henrik Gramner <henrik@gramner.com>
10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
24 25 26
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
27 28 29 30 31
 *****************************************************************************/

#include "common.h"

/* Deblocking filter */
32
static const uint8_t i_alpha_table[52+12*3] =
33
{
34
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
35 36 37 38 39 40 41 42 43
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,
};
44
static const uint8_t i_beta_table[52+12*3] =
45
{
46
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
47 48 49 50 51 52 53 54 55
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
};
56
static const int8_t i_tc0_table[52+12*3][4] =
57 58 59 60 61
{
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
62 63
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
64 65 66 67 68 69 70 71 72 73
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
};
74 75 76
#define alpha_table(x) i_alpha_table[(x)+24]
#define beta_table(x)  i_beta_table[(x)+24]
#define tc0_table(x)   i_tc0_table[(x)+24]
77 78

/* From ffmpeg */
79
static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, intptr_t xstride, int alpha, int beta, int8_t tc0 )
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
{
    int p2 = pix[-3*xstride];
    int p1 = pix[-2*xstride];
    int p0 = pix[-1*xstride];
    int q0 = pix[ 0*xstride];
    int q1 = pix[ 1*xstride];
    int q2 = pix[ 2*xstride];

    if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
    {
        int tc = tc0;
        int delta;
        if( abs( p2 - p0 ) < beta )
        {
            if( tc0 )
                pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0, tc0 );
            tc++;
        }
        if( abs( q2 - q0 ) < beta )
        {
            if( tc0 )
                pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0, tc0 );
            tc++;
        }

        delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
        pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
        pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
    }
}
110
static inline void deblock_luma_c( pixel *pix, intptr_t xstride, intptr_t ystride, int alpha, int beta, int8_t *tc0 )
111 112 113 114 115 116 117 118
{
    for( int i = 0; i < 4; i++ )
    {
        if( tc0[i] < 0 )
        {
            pix += 4*ystride;
            continue;
        }
119 120
        for( int d = 0; d < 4; d++, pix += ystride )
            deblock_edge_luma_c( pix, xstride, alpha, beta, tc0[i] );
121 122
    }
}
123
static void deblock_h_luma_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
124 125 126 127
{
    for( int d = 0; d < 8; d++, pix += stride )
        deblock_edge_luma_c( pix, 1, alpha, beta, tc0[d>>1] );
}
128
static void deblock_v_luma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
129 130 131
{
    deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
}
132
static void deblock_h_luma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
133 134 135 136
{
    deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}

137
static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, intptr_t xstride, int alpha, int beta, int8_t tc )
138 139 140 141 142 143 144 145 146 147 148 149 150
{
    int p1 = pix[-2*xstride];
    int p0 = pix[-1*xstride];
    int q0 = pix[ 0*xstride];
    int q1 = pix[ 1*xstride];

    if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
    {
        int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
        pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
        pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
    }
}
151
static ALWAYS_INLINE void deblock_chroma_c( pixel *pix, int height, intptr_t xstride, intptr_t ystride, int alpha, int beta, int8_t *tc0 )
152 153 154 155 156 157
{
    for( int i = 0; i < 4; i++ )
    {
        int tc = tc0[i];
        if( tc <= 0 )
        {
Henrik Gramner's avatar
Henrik Gramner committed
158
            pix += height*ystride;
159 160
            continue;
        }
Henrik Gramner's avatar
Henrik Gramner committed
161 162 163
        for( int d = 0; d < height; d++, pix += ystride-2 )
            for( int e = 0; e < 2; e++, pix++ )
                deblock_edge_chroma_c( pix, xstride, alpha, beta, tc0[i] );
164 165
    }
}
166
static void deblock_h_chroma_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
167
{
168
    deblock_chroma_c( pix, 1, 2, stride, alpha, beta, tc0 );
169
}
170
static void deblock_v_chroma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
171
{
Henrik Gramner's avatar
Henrik Gramner committed
172
    deblock_chroma_c( pix, 2, stride, 2, alpha, beta, tc0 );
173
}
174
static void deblock_h_chroma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
175
{
Henrik Gramner's avatar
Henrik Gramner committed
176 177
    deblock_chroma_c( pix, 2, 2, stride, alpha, beta, tc0 );
}
178
static void deblock_h_chroma_422_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
Henrik Gramner's avatar
Henrik Gramner committed
179 180
{
    deblock_chroma_c( pix, 4, 2, stride, alpha, beta, tc0 );
181 182
}

183
static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, intptr_t xstride, int alpha, int beta )
184
{
185 186 187 188 189 190
    int p2 = pix[-3*xstride];
    int p1 = pix[-2*xstride];
    int p0 = pix[-1*xstride];
    int q0 = pix[ 0*xstride];
    int q1 = pix[ 1*xstride];
    int q2 = pix[ 2*xstride];
191

192 193 194
    if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
    {
        if( abs( p0 - q0 ) < ((alpha >> 2) + 2) )
195
        {
196
            if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
197
            {
198 199 200 201
                const int p3 = pix[-4*xstride];
                pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
                pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
                pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
202
            }
203
            else /* p0' */
204
                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
205 206 207 208 209 210
            if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
            {
                const int q3 = pix[3*xstride];
                pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
                pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
                pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
211
            }
212 213 214 215 216 217 218
            else /* q0' */
                pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
        }
        else /* p0', q0' */
        {
            pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
            pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
219 220 221
        }
    }
}
222
static inline void deblock_luma_intra_c( pixel *pix, intptr_t xstride, intptr_t ystride, int alpha, int beta )
223 224 225 226
{
    for( int d = 0; d < 16; d++, pix += ystride )
        deblock_edge_luma_intra_c( pix, xstride, alpha, beta );
}
227
static void deblock_h_luma_intra_mbaff_c( pixel *pix, intptr_t ystride, int alpha, int beta )
228 229 230 231
{
    for( int d = 0; d < 8; d++, pix += ystride )
        deblock_edge_luma_intra_c( pix, 1, alpha, beta );
}
232
static void deblock_v_luma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
233 234 235
{
    deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
236
static void deblock_h_luma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
237 238 239 240
{
    deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}

241
static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, intptr_t xstride, int alpha, int beta )
242 243 244 245 246 247 248 249 250 251 252 253
{
    int p1 = pix[-2*xstride];
    int p0 = pix[-1*xstride];
    int q0 = pix[ 0*xstride];
    int q1 = pix[ 1*xstride];

    if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
    {
        pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
        pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
    }
}
254
static ALWAYS_INLINE void deblock_chroma_intra_c( pixel *pix, int width, int height, intptr_t xstride, intptr_t ystride, int alpha, int beta )
255
{
Henrik Gramner's avatar
Henrik Gramner committed
256 257 258
    for( int d = 0; d < height; d++, pix += ystride-2 )
        for( int e = 0; e < width; e++, pix++ )
            deblock_edge_chroma_intra_c( pix, xstride, alpha, beta );
259
}
260
static void deblock_h_chroma_intra_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta )
261
{
262
    deblock_chroma_intra_c( pix, 2, 4, 2, stride, alpha, beta );
263
}
264
static void deblock_v_chroma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
265
{
Henrik Gramner's avatar
Henrik Gramner committed
266
    deblock_chroma_intra_c( pix, 1, 16, stride, 2, alpha, beta );
267
}
268
static void deblock_h_chroma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
269
{
Henrik Gramner's avatar
Henrik Gramner committed
270 271
    deblock_chroma_intra_c( pix, 2, 8, 2, stride, alpha, beta );
}
272
static void deblock_h_chroma_422_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
Henrik Gramner's avatar
Henrik Gramner committed
273 274
{
    deblock_chroma_intra_c( pix, 2, 16, 2, stride, alpha, beta );
275 276
}

Fiona Glaser's avatar
Fiona Glaser committed
277
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
278
                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
279
                                int bframe )
280 281 282 283 284
{
    for( int dir = 0; dir < 2; dir++ )
    {
        int s1 = dir ? 1 : 8;
        int s2 = dir ? 8 : 1;
Fiona Glaser's avatar
Fiona Glaser committed
285
        for( int edge = 0; edge < 4; edge++ )
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
            for( int i = 0, loc = X264_SCAN8_0+edge*s2; i < 4; i++, loc += s1 )
            {
                int locn = loc - s2;
                if( nnz[loc] || nnz[locn] )
                    bs[dir][edge][i] = 2;
                else if( ref[0][loc] != ref[0][locn] ||
                         abs( mv[0][loc][0] - mv[0][locn][0] ) >= 4 ||
                         abs( mv[0][loc][1] - mv[0][locn][1] ) >= mvy_limit ||
                        (bframe && (ref[1][loc] != ref[1][locn] ||
                         abs( mv[1][loc][0] - mv[1][locn][0] ) >= 4 ||
                         abs( mv[1][loc][1] - mv[1][locn][1] ) >= mvy_limit )))
                {
                    bs[dir][edge][i] = 1;
                }
                else
                    bs[dir][edge][i] = 0;
            }
    }
}
305

306 307
static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, intptr_t i_stride, uint8_t bS[4], int i_qp,
                                        int a, int b, int b_chroma, x264_deblock_inter_t pf_inter )
308
{
309 310
    int index_a = i_qp + a;
    int index_b = i_qp + b;
311 312
    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
313 314
    int8_t tc[4];

315
    if( !M32(bS) || !alpha || !beta )
316 317
        return;

318 319 320 321
    tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
    tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
    tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
    tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
322

323
    pf_inter( pix, i_stride, alpha, beta, tc );
324 325
}

326 327
static ALWAYS_INLINE void deblock_edge_intra( x264_t *h, pixel *pix, intptr_t i_stride, uint8_t bS[4], int i_qp,
                                              int a, int b, int b_chroma, x264_deblock_intra_t pf_intra )
328
{
329 330
    int index_a = i_qp + a;
    int index_b = i_qp + b;
331 332
    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
333 334 335 336

    if( !alpha || !beta )
        return;

337
    pf_intra( pix, i_stride, alpha, beta );
338 339
}

340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static ALWAYS_INLINE void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
{
    int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;

    h->mb.i_neighbour = 0;
    h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
    h->mb.b_interlaced = PARAM_INTERLACED && h->mb.field[h->mb.i_mb_xy];
    h->mb.i_mb_top_y = mb_y - (1 << MB_INTERLACED);
    h->mb.i_mb_top_xy = mb_x + h->mb.i_mb_stride*h->mb.i_mb_top_y;
    h->mb.i_mb_left_xy[1] =
    h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
    if( SLICE_MBAFF )
    {
        if( mb_y&1 )
        {
            if( mb_x && h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
                h->mb.i_mb_left_xy[0] -= h->mb.i_mb_stride;
        }
        else
        {
            if( h->mb.i_mb_top_xy >= 0 && MB_INTERLACED && !h->mb.field[h->mb.i_mb_top_xy] )
            {
                h->mb.i_mb_top_xy += h->mb.i_mb_stride;
                h->mb.i_mb_top_y++;
            }
            if( mb_x && h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
                h->mb.i_mb_left_xy[1] += h->mb.i_mb_stride;
        }
    }

    if( mb_x > 0 && (deblock_on_slice_edges ||
        h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy]) )
        h->mb.i_neighbour |= MB_LEFT;
    if( mb_y > MB_INTERLACED && (deblock_on_slice_edges
        || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy]) )
        h->mb.i_neighbour |= MB_TOP;
}

378 379
void x264_frame_deblock_row( x264_t *h, int mb_y )
{
380
    int b_interlaced = SLICE_MBAFF;
381 382 383
    int a = h->sh.i_alpha_c0_offset - QP_BD_OFFSET;
    int b = h->sh.i_beta_offset - QP_BD_OFFSET;
    int qp_thresh = 15 - X264_MIN( a, b ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
384 385
    int stridey   = h->fdec->i_stride[0];
    int strideuv  = h->fdec->i_stride[1];
386
    int chroma444 = CHROMA444;
387
    int chroma_height = 16 >> CHROMA_V_SHIFT;
388
    intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
389

390
    for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
391
    {
392
        x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
Fiona Glaser's avatar
Fiona Glaser committed
393
        x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
394 395

        int mb_xy = h->mb.i_mb_xy;
396
        int transform_8x8 = h->mb.mb_transform_size[mb_xy];
397
        int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
398
        uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?mb_xy:mb_x];
399

400
        pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
Henrik Gramner's avatar
Henrik Gramner committed
401
        pixel *pixuv = h->fdec->plane[1] + chroma_height*mb_y*strideuv + 16*mb_x;
402

403
        if( mb_y & MB_INTERLACED )
404 405
        {
            pixy -= 15*stridey;
Henrik Gramner's avatar
Henrik Gramner committed
406
            pixuv -= (chroma_height-1)*strideuv;
407 408
        }

409 410
        int stride2y  = stridey << MB_INTERLACED;
        int stride2uv = strideuv << MB_INTERLACED;
411 412
        int qp = h->mb.qp[mb_xy];
        int qpc = h->chroma_qp_table[qp];
413
        int first_edge_only = (h->mb.partition[mb_xy] == D_16x16 && !h->mb.cbp[mb_xy] && !intra_cur) || qp <= qp_thresh;
414

415 416
        #define FILTER( intra, dir, edge, qp, chroma_qp )\
        do\
417
        {\
Henrik Gramner's avatar
Henrik Gramner committed
418
            if( !(edge & 1) || !transform_8x8 )\
Fiona Glaser's avatar
Fiona Glaser committed
419
            {\
Henrik Gramner's avatar
Henrik Gramner committed
420 421
                deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
                                     stride2y, bs[dir][edge], qp, a, b, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
422
                                     h->loopf.deblock_luma##intra[dir] );\
Henrik Gramner's avatar
Henrik Gramner committed
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
                if( CHROMA_FORMAT == CHROMA_444 )\
                {\
                    deblock_edge##intra( h, pixuv          + 4*edge*(dir?stride2uv:1),\
                                         stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
                                         h->loopf.deblock_luma##intra[dir] );\
                    deblock_edge##intra( h, pixuv + uvdiff + 4*edge*(dir?stride2uv:1),\
                                         stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
                                         h->loopf.deblock_luma##intra[dir] );\
                }\
                else if( CHROMA_FORMAT == CHROMA_420 && !(edge & 1) )\
                {\
                    deblock_edge##intra( h, pixuv + edge*(dir?2*stride2uv:4),\
                                         stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
                                         h->loopf.deblock_chroma##intra[dir] );\
                }\
Fiona Glaser's avatar
Fiona Glaser committed
438
            }\
Henrik Gramner's avatar
Henrik Gramner committed
439 440 441
            if( CHROMA_FORMAT == CHROMA_422 && (dir || !(edge & 1)) )\
            {\
                deblock_edge##intra( h, pixuv + edge*(dir?4*stride2uv:4),\
442
                                     stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
443
                                     h->loopf.deblock_chroma##intra[dir] );\
Henrik Gramner's avatar
Henrik Gramner committed
444
            }\
445 446 447 448
        } while(0)

        if( h->mb.i_neighbour & MB_LEFT )
        {
449
            if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
450 451 452 453
            {
                int luma_qp[2];
                int chroma_qp[2];
                int left_qp[2];
454
                x264_deblock_inter_t luma_deblock = h->loopf.deblock_luma_mbaff;
Henrik Gramner's avatar
Henrik Gramner committed
455
                x264_deblock_inter_t chroma_deblock = h->loopf.deblock_chroma_mbaff;
456
                x264_deblock_intra_t luma_intra_deblock = h->loopf.deblock_luma_intra_mbaff;
Henrik Gramner's avatar
Henrik Gramner committed
457
                x264_deblock_intra_t chroma_intra_deblock = h->loopf.deblock_chroma_intra_mbaff;
458
                int c = chroma444 ? 0 : 1;
Fiona Glaser's avatar
Fiona Glaser committed
459

460
                left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
461 462
                luma_qp[0] = (qp + left_qp[0] + 1) >> 1;
                chroma_qp[0] = (qpc + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
463
                if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) )
464
                {
465 466
                    deblock_edge_intra( h, pixy,           2*stridey,  bs[0][0], luma_qp[0],   a, b, 0, luma_intra_deblock );
                    deblock_edge_intra( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
467 468
                    if( chroma444 )
                        deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
469 470 471
                }
                else
                {
472 473
                    deblock_edge( h, pixy,           2*stridey,  bs[0][0], luma_qp[0],   a, b, 0, luma_deblock );
                    deblock_edge( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
474 475
                    if( chroma444 )
                        deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
476 477
                }

478
                int offy = MB_INTERLACED ? 4 : 0;
479
                int offuv = MB_INTERLACED ? 4-CHROMA_V_SHIFT : 0;
480
                left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
481 482
                luma_qp[1] = (qp + left_qp[1] + 1) >> 1;
                chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
483
                if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) )
484
                {
485 486
                    deblock_edge_intra( h, pixy           + (stridey<<offy),   2*stridey,  bs[0][4], luma_qp[1],   a, b, 0, luma_intra_deblock );
                    deblock_edge_intra( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
487 488
                    if( chroma444 )
                        deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
489 490 491
                }
                else
                {
492 493
                    deblock_edge( h, pixy           + (stridey<<offy),   2*stridey,  bs[0][4], luma_qp[1],   a, b, 0, luma_deblock );
                    deblock_edge( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
494 495
                    if( chroma444 )
                        deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
496 497
                }
            }
498
            else
499 500 501
            {
                int qpl = h->mb.qp[h->mb.i_mb_xy-1];
                int qp_left = (qp + qpl + 1) >> 1;
502
                int qpc_left = (qpc + h->chroma_qp_table[qpl] + 1) >> 1;
503
                int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_xy-1] );
504
                int intra_deblock = intra_cur || intra_left;
505

506 507 508
                /* Any MB that was coded, or that analysis decided to skip, has quality commensurate with its QP.
                 * But if deblocking affects neighboring MBs that were force-skipped, blur might accumulate there.
                 * So reset their effective QP to max, to indicate that lack of guarantee. */
509
                if( h->fdec->mb_info && M32( bs[0][0] ) )
510
                {
511
#define RESET_EFFECTIVE_QP(xy) h->fdec->effective_qp[xy] |= 0xff * !!(h->fdec->mb_info[xy] & X264_MBINFO_CONSTANT);
512 513 514 515 516
                    RESET_EFFECTIVE_QP(mb_xy);
                    RESET_EFFECTIVE_QP(h->mb.i_mb_left_xy[0]);
                }

                if( intra_deblock )
517 518 519 520 521
                    FILTER( _intra, 0, 0, qp_left, qpc_left );
                else
                    FILTER(       , 0, 0, qp_left, qpc_left );
            }
        }
522 523
        if( !first_edge_only )
        {
Henrik Gramner's avatar
Henrik Gramner committed
524 525 526
            FILTER( , 0, 1, qp, qpc );
            FILTER( , 0, 2, qp, qpc );
            FILTER( , 0, 3, qp, qpc );
527 528
        }

529 530
        if( h->mb.i_neighbour & MB_TOP )
        {
531
            if( b_interlaced && !(mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
532 533 534
            {
                int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;

535
                for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
536 537 538
                {
                    int qpt = h->mb.qp[mbn_xy];
                    int qp_top = (qp + qpt + 1) >> 1;
539
                    int qpc_top = (qpc + h->chroma_qp_table[qpt] + 1) >> 1;
540 541 542
                    int intra_top = IS_INTRA( h->mb.type[mbn_xy] );
                    if( intra_cur || intra_top )
                        M32( bs[1][4*j] ) = 0x03030303;
543 544

                    // deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows
545
                    deblock_edge( h, pixy      + j*stridey,  2* stridey, bs[1][4*j], qp_top, a, b, 0, h->loopf.deblock_luma[1] );
546
                    if( chroma444 )
Fiona Glaser's avatar
Fiona Glaser committed
547
                    {
548 549
                        deblock_edge( h, pixuv          + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
                        deblock_edge( h, pixuv + uvdiff + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
Fiona Glaser's avatar
Fiona Glaser committed
550 551
                    }
                    else
552
                        deblock_edge( h, pixuv          + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 1, h->loopf.deblock_chroma[1] );
553 554
                }
            }
555 556
            else
            {
557 558
                int qpt = h->mb.qp[h->mb.i_mb_top_xy];
                int qp_top = (qp + qpt + 1) >> 1;
559
                int qpc_top = (qpc + h->chroma_qp_table[qpt] + 1) >> 1;
560
                int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
561
                int intra_deblock = intra_cur || intra_top;
562

563
                /* This edge has been modified, reset effective qp to max. */
564
                if( h->fdec->mb_info && M32( bs[1][0] ) )
565 566 567 568 569 570
                {
                    RESET_EFFECTIVE_QP(mb_xy);
                    RESET_EFFECTIVE_QP(h->mb.i_mb_top_xy);
                }

                if( (!b_interlaced || (!MB_INTERLACED && !h->mb.field[h->mb.i_mb_top_xy])) && intra_deblock )
571 572 573 574 575
                {
                    FILTER( _intra, 1, 0, qp_top, qpc_top );
                }
                else
                {
576
                    if( intra_deblock )
577 578 579
                        M32( bs[1][0] ) = 0x03030303;
                    FILTER(       , 1, 0, qp_top, qpc_top );
                }
580
            }
581 582
        }

583 584
        if( !first_edge_only )
        {
Henrik Gramner's avatar
Henrik Gramner committed
585 586 587
            FILTER( , 1, 1, qp, qpc );
            FILTER( , 1, 2, qp, qpc );
            FILTER( , 1, 3, qp, qpc );
588
        }
Fiona Glaser's avatar
Fiona Glaser committed
589 590

        #undef FILTER
591 592 593
    }
}

Fiona Glaser's avatar
Fiona Glaser committed
594 595 596 597
/* For deblock-aware RD.
 * TODO:
 *  deblock macroblock edges
 *  support analysis partitions smaller than 16x16
Henrik Gramner's avatar
Henrik Gramner committed
598
 *  deblock chroma for 4:2:0/4:2:2
Fiona Glaser's avatar
Fiona Glaser committed
599 600 601 602
 *  handle duplicate refs correctly
 */
void x264_macroblock_deblock( x264_t *h )
{
603 604 605 606
    int a = h->sh.i_alpha_c0_offset - QP_BD_OFFSET;
    int b = h->sh.i_beta_offset - QP_BD_OFFSET;
    int qp_thresh = 15 - X264_MIN( a, b ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
    int intra_cur = IS_INTRA( h->mb.i_type );
Fiona Glaser's avatar
Fiona Glaser committed
607
    int qp = h->mb.i_qp;
Fiona Glaser's avatar
Fiona Glaser committed
608
    int qpc = h->mb.i_chroma_qp;
609
    if( (h->mb.i_partition == D_16x16 && !h->mb.i_cbp_luma && !intra_cur) || qp <= qp_thresh )
Fiona Glaser's avatar
Fiona Glaser committed
610 611
        return;

612
    uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength;
613 614 615 616 617
    if( intra_cur )
    {
        memset( &bs[0][1], 3, 3*4*sizeof(uint8_t) );
        memset( &bs[1][1], 3, 3*4*sizeof(uint8_t) );
    }
Fiona Glaser's avatar
Fiona Glaser committed
618 619
    else
        h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
620
                                   bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
Fiona Glaser's avatar
Fiona Glaser committed
621 622 623 624 625 626

    int transform_8x8 = h->mb.b_transform_8x8;

    #define FILTER( dir, edge )\
    do\
    {\
Fiona Glaser's avatar
Fiona Glaser committed
627
        deblock_edge( h, h->mb.pic.p_fdec[0] + 4*edge*(dir?FDEC_STRIDE:1),\
628
                      FDEC_STRIDE, bs[dir][edge], qp, a, b, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
629
                      h->loopf.deblock_luma[dir] );\
Fiona Glaser's avatar
Fiona Glaser committed
630 631 632
        if( CHROMA444 )\
        {\
            deblock_edge( h, h->mb.pic.p_fdec[1] + 4*edge*(dir?FDEC_STRIDE:1),\
633
                          FDEC_STRIDE, bs[dir][edge], qpc, a, b, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
634 635
                          h->loopf.deblock_luma[dir] );\
            deblock_edge( h, h->mb.pic.p_fdec[2] + 4*edge*(dir?FDEC_STRIDE:1),\
636
                          FDEC_STRIDE, bs[dir][edge], qpc, a, b, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
637 638
                          h->loopf.deblock_luma[dir] );\
        }\
Fiona Glaser's avatar
Fiona Glaser committed
639 640 641 642 643 644 645 646 647 648 649 650 651
    } while(0)

    if( !transform_8x8 ) FILTER( 0, 1 );
                         FILTER( 0, 2 );
    if( !transform_8x8 ) FILTER( 0, 3 );

    if( !transform_8x8 ) FILTER( 1, 1 );
                         FILTER( 1, 2 );
    if( !transform_8x8 ) FILTER( 1, 3 );

    #undef FILTER
}

Steven Walters's avatar
Steven Walters committed
652
#if HAVE_MMX
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
677 678 679 680 681 682 683 684 685 686 687 688
void x264_deblock_strength_mmx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
void x264_deblock_strength_ssse3( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
void x264_deblock_strength_avx  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
689 690 691
void x264_deblock_strength_avx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
692

693 694 695
void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
Steven Walters's avatar
Steven Walters committed
696
#if ARCH_X86
697 698 699 700 701 702 703 704 705 706
void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
707

708
#if HIGH_BIT_DEPTH
709 710
void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
711
#else
712
// FIXME this wrapper has a significant cpu cost
713
static void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
714
{
715 716
    x264_deblock_v8_luma_mmx2( pix,   stride, alpha, beta, tc0   );
    x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 );
717
}
718
static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta )
719
{
720 721
    x264_deblock_v8_luma_intra_mmx2( pix,   stride, alpha, beta );
    x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta );
722
}
723
#endif // HIGH_BIT_DEPTH
724 725 726
#endif
#endif

Steven Walters's avatar
Steven Walters committed
727
#if ARCH_PPC
728 729
void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
730 731
#endif // ARCH_PPC

732
#if HAVE_ARMV6 || ARCH_AARCH64
733 734 735 736
void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
737 738 739
void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                 int mvy_limit, int bframe );
740
#if ARCH_AARCH64
741
void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
742
void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
743 744 745 746 747
void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
#endif
748 749
#endif

750
void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
751
{
752 753 754
    pf->deblock_luma[1] = deblock_v_luma_c;
    pf->deblock_luma[0] = deblock_h_luma_c;
    pf->deblock_chroma[1] = deblock_v_chroma_c;
Henrik Gramner's avatar
Henrik Gramner committed
755 756
    pf->deblock_h_chroma_420 = deblock_h_chroma_c;
    pf->deblock_h_chroma_422 = deblock_h_chroma_422_c;
757 758 759
    pf->deblock_luma_intra[1] = deblock_v_luma_intra_c;
    pf->deblock_luma_intra[0] = deblock_h_luma_intra_c;
    pf->deblock_chroma_intra[1] = deblock_v_chroma_intra_c;
Henrik Gramner's avatar
Henrik Gramner committed
760 761 762 763 764 765
    pf->deblock_h_chroma_420_intra = deblock_h_chroma_intra_c;
    pf->deblock_h_chroma_422_intra = deblock_h_chroma_422_intra_c;
    pf->deblock_luma_mbaff = deblock_h_luma_mbaff_c;
    pf->deblock_chroma_420_mbaff = deblock_h_chroma_mbaff_c;
    pf->deblock_luma_intra_mbaff = deblock_h_luma_intra_mbaff_c;
    pf->deblock_chroma_420_intra_mbaff = deblock_h_chroma_intra_mbaff_c;
766
    pf->deblock_strength = deblock_strength_c;
767

Steven Walters's avatar
Steven Walters committed
768
#if HAVE_MMX
769
    if( cpu&X264_CPU_MMX2 )
770
    {
Steven Walters's avatar
Steven Walters committed
771
#if ARCH_X86
772 773 774
        pf->deblock_luma[1] = x264_deblock_v_luma_mmx2;
        pf->deblock_luma[0] = x264_deblock_h_luma_mmx2;
        pf->deblock_chroma[1] = x264_deblock_v_chroma_mmx2;
Henrik Gramner's avatar
Henrik Gramner committed
775
        pf->deblock_h_chroma_420 = x264_deblock_h_chroma_mmx2;
776
        pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_mmx2;
Fiona Glaser's avatar
Fiona Glaser committed
777
        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_mmx2;
Ilia's avatar
Ilia committed
778
        pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_mmx2;
779 780 781
        pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmx2;
        pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmx2;
        pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmx2;
Henrik Gramner's avatar
Henrik Gramner committed
782
        pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_mmx2;
783 784 785 786
        pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_mmx2;
#endif
#if !HIGH_BIT_DEPTH
        pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_mmx2;
787
#endif
788
        pf->deblock_strength = x264_deblock_strength_mmx2;
789
        if( cpu&X264_CPU_SSE2 )
790
        {
791
            pf->deblock_strength = x264_deblock_strength_sse2;
Fiona Glaser's avatar
Fiona Glaser committed
792 793
            pf->deblock_h_chroma_420 = x264_deblock_h_chroma_sse2;
            pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_sse2;
Ilia's avatar
Ilia committed
794
            pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_sse2;
795
            pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_sse2;
796 797 798 799
            pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
            pf->deblock_luma[0] = x264_deblock_h_luma_sse2;
            pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
            pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
800 801
            if( !(cpu&X264_CPU_STACK_MOD4) )
            {
802 803
                pf->deblock_chroma[1] = x264_deblock_v_chroma_sse2;
                pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_sse2;
Henrik Gramner's avatar
Henrik Gramner committed
804
                pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_sse2;
805 806 807
#if HIGH_BIT_DEPTH
                pf->deblock_chroma_420_intra_mbaff= x264_deblock_h_chroma_intra_mbaff_sse2;
#endif
808
            }
809
        }
810 811
        if( cpu&X264_CPU_SSSE3 )
            pf->deblock_strength = x264_deblock_strength_ssse3;
812 813 814
        if( cpu&X264_CPU_AVX )
        {
            pf->deblock_strength = x264_deblock_strength_avx;
Fiona Glaser's avatar
Fiona Glaser committed
815 816
            pf->deblock_h_chroma_420 = x264_deblock_h_chroma_avx;
            pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_avx;
817
            pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_avx;
818 819 820 821
            pf->deblock_luma[1] = x264_deblock_v_luma_avx;
            pf->deblock_luma[0] = x264_deblock_h_luma_avx;
            pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_avx;
            pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_avx;
822 823 824 825
            if( !(cpu&X264_CPU_STACK_MOD4) )
            {
                pf->deblock_chroma[1] = x264_deblock_v_chroma_avx;
                pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_avx;
Henrik Gramner's avatar
Henrik Gramner committed
826
                pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_avx;
827 828 829 830
#if HIGH_BIT_DEPTH
                pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_avx;
                pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_avx;
#endif
831 832
            }
        }
833 834 835 836
        if( cpu&X264_CPU_AVX2 )
        {
            pf->deblock_strength = x264_deblock_strength_avx2;
        }
837 838 839
    }
#endif

840
#if !HIGH_BIT_DEPTH
Steven Walters's avatar
Steven Walters committed
841
#if HAVE_ALTIVEC
842 843
    if( cpu&X264_CPU_ALTIVEC )
    {
844 845
        pf->deblock_luma[1] = x264_deblock_v_luma_altivec;
        pf->deblock_luma[0] = x264_deblock_h_luma_altivec;
846
    }
847 848
#endif // HAVE_ALTIVEC

849
#if HAVE_ARMV6 || ARCH_AARCH64
850 851
    if( cpu&X264_CPU_NEON )
    {
852 853
        pf->deblock_luma[1] = x264_deblock_v_luma_neon;
        pf->deblock_luma[0] = x264_deblock_h_luma_neon;
854 855
        pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
        pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
856
#if ARCH_AARCH64
857
        pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon;
858 859
        pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon;
        pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon;
860
        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
861 862 863
        pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon;
        pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon;
#endif
864
        pf->deblock_strength     = x264_deblock_strength_neon;
865
    }
866
#endif
867
#endif // !HIGH_BIT_DEPTH
868 869 870 871

    /* These functions are equivalent, so don't duplicate them. */
    pf->deblock_chroma_422_mbaff = pf->deblock_h_chroma_420;
    pf->deblock_chroma_422_intra_mbaff = pf->deblock_h_chroma_420_intra;
872
}