frame.c 35.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * frame.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24 25 26
 *****************************************************************************/

#include "common.h"

27 28
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))

Laurent Aimar's avatar
Laurent Aimar committed
29 30
x264_frame_t *x264_frame_new( x264_t *h )
{
31
    x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
32
    int i, j;
Laurent Aimar's avatar
Laurent Aimar committed
33

34
    int i_mb_count = h->mb.i_mb_count;
35
    int i_stride, i_width, i_lines;
36
    int i_padv = PADV << h->param.b_interlaced;
37
    int luma_plane_size;
38
    int chroma_plane_size;
39
    int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
Laurent Aimar's avatar
Laurent Aimar committed
40

41 42
    if( !frame ) return NULL;

43 44
    memset( frame, 0, sizeof(x264_frame_t) );

Laurent Aimar's avatar
Laurent Aimar committed
45
    /* allocate frame data (+64 for extra data for me) */
46 47 48
    i_width  = ALIGN( h->param.i_width, 16 );
    i_stride = ALIGN( i_width + 2*PADH, align );
    i_lines  = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
49

Laurent Aimar's avatar
Laurent Aimar committed
50 51 52
    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
53
        frame->i_stride[i] = ALIGN( i_stride >> !!i, align );
54 55
        frame->i_width[i] = i_width >> !!i;
        frame->i_lines[i] = i_lines >> !!i;
Laurent Aimar's avatar
Laurent Aimar committed
56 57
    }

58
    luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
59
    chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
60
    for( i = 1; i < 3; i++ )
61
    {
62
        CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
63
        frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
64
    }
65 66
    /* all 4 luma planes allocated together, since the cacheline split code
     * requires them to be in-phase wrt cacheline alignment. */
67 68 69 70 71 72 73 74 75 76 77 78
    if( h->param.analyse.i_subpel_refine )
    {
        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
        for( i = 0; i < 4; i++ )
            frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
        frame->plane[0] = frame->filtered[0];
    }
    else
    {
        CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
        frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
    }
79

80
    if( h->frames.b_have_lowres )
81
    {
82
        frame->i_width_lowres = frame->i_width[0]/2;
83
        frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
84
        frame->i_lines_lowres = frame->i_lines[0]/2;
85 86 87 88

        luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );

        CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
89
        for( i = 0; i < 4; i++ )
90
            frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
91 92 93 94 95 96 97 98

        for( j = 0; j <= !!h->param.i_bframe; j++ )
            for( i = 0; i <= h->param.i_bframe; i++ )
            {
                CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
                memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
                CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
            }
99
    }
100

101
    if( h->param.analyse.i_me_method >= X264_ME_ESA )
102
    {
103
        CHECKED_MALLOC( frame->buffer[3],
Loren Merritt's avatar
Loren Merritt committed
104
                        frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
105
        frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
106 107
    }

Laurent Aimar's avatar
Laurent Aimar committed
108 109 110
    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
111 112
    frame->i_pts = -1;
    frame->i_frame = -1;
113
    frame->i_frame_num = -1;
Loren Merritt's avatar
Loren Merritt committed
114
    frame->i_lines_completed = -1;
Laurent Aimar's avatar
Laurent Aimar committed
115

116 117 118
    CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
    CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
119
    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
120 121
    if( h->param.i_bframe )
    {
122 123
        CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
124 125 126 127 128 129 130
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

131 132
    CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
    CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
133 134
    for( i = 0; i < h->param.i_bframe + 2; i++ )
        for( j = 0; j < h->param.i_bframe + 2; j++ )
135
            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
136

137
    if( h->param.rc.i_aq_mode )
138
    {
139
        CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
140 141 142
        if( h->frames.b_have_lowres )
            CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
    }
143

144 145
    x264_pthread_mutex_init( &frame->mutex, NULL );
    x264_pthread_cond_init( &frame->cv, NULL );
146

Laurent Aimar's avatar
Laurent Aimar committed
147
    return frame;
148 149 150 151

fail:
    x264_frame_delete( frame );
    return NULL;
Laurent Aimar's avatar
Laurent Aimar committed
152 153 154 155
}

void x264_frame_delete( x264_frame_t *frame )
{
156
    int i, j;
157
    for( i = 0; i < 4; i++ )
158
        x264_free( frame->buffer[i] );
159 160
    for( i = 0; i < 4; i++ )
        x264_free( frame->buffer_lowres[i] );
161 162 163
    for( i = 0; i < X264_BFRAME_MAX+2; i++ )
        for( j = 0; j < X264_BFRAME_MAX+2; j++ )
            x264_free( frame->i_row_satds[i][j] );
164 165 166 167 168 169
    for( j = 0; j < 2; j++ )
        for( i = 0; i <= X264_BFRAME_MAX; i++ )
        {
            x264_free( frame->lowres_mvs[j][i] );
            x264_free( frame->lowres_mv_costs[j][i] );
        }
170
    x264_free( frame->f_qp_offset );
Fiona Glaser's avatar
Fiona Glaser committed
171
    x264_free( frame->i_inv_qscale_factor );
172
    x264_free( frame->i_intra_cost );
173 174
    x264_free( frame->i_row_bits );
    x264_free( frame->i_row_qp );
175
    x264_free( frame->mb_type );
176 177 178 179
    x264_free( frame->mv[0] );
    x264_free( frame->mv[1] );
    x264_free( frame->ref[0] );
    x264_free( frame->ref[1] );
180 181
    x264_pthread_mutex_destroy( &frame->mutex );
    x264_pthread_cond_destroy( &frame->cv );
Laurent Aimar's avatar
Laurent Aimar committed
182 183 184
    x264_free( frame );
}

185
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
Laurent Aimar's avatar
Laurent Aimar committed
186
{
187
    int i_csp = src->img.i_csp & X264_CSP_MASK;
188 189 190 191 192 193 194
    int i;
    if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
    {
        x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
        return -1;
    }

Laurent Aimar's avatar
Laurent Aimar committed
195 196 197 198
    dst->i_type     = src->i_type;
    dst->i_qpplus1  = src->i_qpplus1;
    dst->i_pts      = src->i_pts;

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
    for( i=0; i<3; i++ )
    {
        int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
        uint8_t *plane = src->img.plane[s];
        int stride = src->img.i_stride[s];
        int width = h->param.i_width >> !!i;
        int height = h->param.i_height >> !!i;
        if( src->img.i_csp & X264_CSP_VFLIP )
        {
            plane += (height-1)*stride;
            stride = -stride;
        }
        h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
    }
    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
214 215 216 217
}



Loren Merritt's avatar
Loren Merritt committed
218
static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
219 220 221 222 223 224
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
    int y;
    for( y = 0; y < i_height; y++ )
    {
        /* left band */
225
        memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
226
        /* right band */
227
        memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
228 229
    }
    /* upper band */
Loren Merritt's avatar
Loren Merritt committed
230
    if( b_pad_top )
231 232
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
233
    /* lower band */
Loren Merritt's avatar
Loren Merritt committed
234
    if( b_pad_bottom )
235 236
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
237 238 239
#undef PPIXEL
}

Loren Merritt's avatar
Loren Merritt committed
240
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
Laurent Aimar's avatar
Laurent Aimar committed
241
{
242
    int i;
Loren Merritt's avatar
Loren Merritt committed
243 244 245
    int b_start = !mb_y;
    if( mb_y & h->sh.b_mbaff )
        return;
Laurent Aimar's avatar
Laurent Aimar committed
246 247
    for( i = 0; i < frame->i_plane; i++ )
    {
248 249
        int stride = frame->i_stride[i];
        int width = 16*h->sps->i_mb_width >> !!i;
Loren Merritt's avatar
Loren Merritt committed
250
        int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
251 252
        int padh = PADH >> !!i;
        int padv = PADV >> !!i;
Loren Merritt's avatar
Loren Merritt committed
253 254 255 256 257
        // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
        uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
        if( b_end && !b_start )
            height += 4 >> (!!i + h->sh.b_mbaff);
        if( h->sh.b_mbaff )
258
        {
Loren Merritt's avatar
Loren Merritt committed
259 260
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
261 262 263
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
264
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
265
        }
Laurent Aimar's avatar
Laurent Aimar committed
266 267 268
    }
}

Loren Merritt's avatar
Loren Merritt committed
269
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
270
{
Loren Merritt's avatar
Loren Merritt committed
271
    /* during filtering, 8 extra pixels were filtered on each edge,
Loren Merritt's avatar
Loren Merritt committed
272
     * but up to 3 of the horizontal ones may be wrong.
273
       we want to expand border from the last filtered pixel */
Loren Merritt's avatar
Loren Merritt committed
274
    int b_start = !mb_y;
275
    int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
276
    int width = 16*h->sps->i_mb_width + 8;
Loren Merritt's avatar
Loren Merritt committed
277
    int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
Loren Merritt's avatar
Loren Merritt committed
278
    int padh = PADH - 4;
279
    int padv = PADV - 8;
280
    int i;
281
    for( i = 1; i < 4; i++ )
282
    {
Loren Merritt's avatar
Loren Merritt committed
283
        // buffer: 8 luma, to match the hpel filter
Loren Merritt's avatar
Loren Merritt committed
284
        uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
Loren Merritt's avatar
Loren Merritt committed
285
        if( h->sh.b_mbaff )
286
        {
Loren Merritt's avatar
Loren Merritt committed
287 288
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
289 290 291
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
292
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
293 294
        }
    }
295 296
}

297
void x264_frame_expand_border_lowres( x264_frame_t *frame )
298
{
299
    int i;
300
    for( i = 0; i < 4; i++ )
Loren Merritt's avatar
Loren Merritt committed
301
        plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
302 303
}

Loren Merritt's avatar
Loren Merritt committed
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
    int i, y;
    for( i = 0; i < frame->i_plane; i++ )
    {
        int i_subsample = i ? 1 : 0;
        int i_width = h->param.i_width >> i_subsample;
        int i_height = h->param.i_height >> i_subsample;
        int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
        int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;

        if( i_padx )
        {
            for( y = 0; y < i_height; y++ )
                memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
                         frame->plane[i][y*frame->i_stride[i] + i_width - 1],
                         i_padx );
        }
        if( i_pady )
        {
324
            //FIXME interlace? or just let it pad using the wrong field
Loren Merritt's avatar
Loren Merritt committed
325
            for( y = i_height; y < i_height + i_pady; y++ )
Loren Merritt's avatar
Loren Merritt committed
326 327 328 329 330 331 332
                memcpy( &frame->plane[i][y*frame->i_stride[i]],
                        &frame->plane[i][(i_height-1)*frame->i_stride[i]],
                        i_width + i_padx );
        }
    }
}

333

334 335
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 * entropy coding, but per 64 coeffs for the purpose of deblocking */
Loic Le Loarer's avatar
Loic Le Loarer committed
336
static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
337 338 339
{
    uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
340
    int x, nnz;
341 342 343 344 345
    for( x=0; x<h->sps->i_mb_width; x++ )
    {
        memcpy( buf+x, src+x, 16 );
        if( transform[x] )
        {
346 347 348 349
            nnz = src[x][0] | src[x][1];
            src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
            nnz = src[x][2] | src[x][3];
            src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
        }
    }
}

static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
{
    uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int x;
    for( x=0; x<h->sps->i_mb_width; x++ )
        memcpy( dst+x, buf+x, 16 );
}

static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
{
    func( h, mb_y, buf );
    if( mb_y > 0 )
        func( h, mb_y-1, buf + h->sps->i_mb_width );
    if( h->sh.b_mbaff )
    {
        func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
        if( mb_y > 0 )
            func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
    }
}


376
/* Deblocking filter */
377
static const uint8_t i_alpha_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
378
{
379
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
380 381 382 383 384
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
385 386
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,
Laurent Aimar's avatar
Laurent Aimar committed
387
};
388
static const uint8_t i_beta_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
389
{
390
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
391 392 393 394 395
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
396 397
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
Laurent Aimar's avatar
Laurent Aimar committed
398
};
399
static const int8_t i_tc0_table[52+12*2][4] =
Laurent Aimar's avatar
Laurent Aimar committed
400
{
401 402
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
Fiona Glaser's avatar
Fiona Glaser committed
403 404 405 406 407 408 409 410
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
411 412 413
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
Laurent Aimar's avatar
Laurent Aimar committed
414
};
415 416 417
#define alpha_table(x) i_alpha_table[(x)+12]
#define beta_table(x)  i_beta_table[(x)+12]
#define tc0_table(x)   i_tc0_table[(x)+12]
Laurent Aimar's avatar
Laurent Aimar committed
418 419

/* From ffmpeg */
Loren Merritt's avatar
Loren Merritt committed
420
static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
421 422
{
    int i, d;
423 424 425 426
    for( i = 0; i < 4; i++ )
    {
        if( tc0[i] < 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
427
            pix += 4*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
428 429
            continue;
        }
430 431
        for( d = 0; d < 4; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
432 433 434 435 436 437
            const int p2 = pix[-3*xstride];
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];
            const int q2 = pix[ 2*xstride];
438 439 440

            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
441 442
                int tc = tc0[i];
                int delta;
443 444
                if( abs( p2 - p0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
445
                    pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
Fiona Glaser's avatar
Fiona Glaser committed
446
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
447
                }
448 449
                if( abs( q2 - q0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
450 451
                    pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
452
                }
453

Loren Merritt's avatar
Loren Merritt committed
454
                delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
455 456
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
457
            }
Loren Merritt's avatar
Loren Merritt committed
458
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
459 460 461
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
462 463
static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
Loren Merritt's avatar
Loren Merritt committed
464
    deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
Loren Merritt's avatar
Loren Merritt committed
465 466 467 468 469
}
static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
    deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
470

Loren Merritt's avatar
Loren Merritt committed
471
static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
472 473
{
    int i, d;
474 475
    for( i = 0; i < 4; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
476
        const int tc = tc0[i];
477 478
        if( tc <= 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
479
            pix += 2*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
480 481
            continue;
        }
482 483
        for( d = 0; d < 2; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
484 485 486 487 488
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];

489 490
            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
491
                int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
492 493
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
494
            }
Loren Merritt's avatar
Loren Merritt committed
495
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
496 497 498
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
499
static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
500
{
Loren Merritt's avatar
Loren Merritt committed
501 502 503
    deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
504
{
Loren Merritt's avatar
Loren Merritt committed
505 506
    deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
507

Loren Merritt's avatar
Loren Merritt committed
508
static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
Laurent Aimar's avatar
Laurent Aimar committed
509
{
Loren Merritt's avatar
Loren Merritt committed
510
    int d;
511 512
    for( d = 0; d < 16; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
513 514 515 516 517 518 519
        const int p2 = pix[-3*xstride];
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];
        const int q2 = pix[ 2*xstride];

520 521 522 523 524
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
            if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
            {
                if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
Laurent Aimar's avatar
Laurent Aimar committed
525
                {
Loren Merritt's avatar
Loren Merritt committed
526 527 528 529
                    const int p3 = pix[-4*xstride];
                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
530
                }
531 532 533
                else /* p0' */
                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
Laurent Aimar's avatar
Laurent Aimar committed
534
                {
Loren Merritt's avatar
Loren Merritt committed
535 536 537 538
                    const int q3 = pix[3*xstride];
                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
539
                }
540 541 542 543 544
                else /* q0' */
                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
            }
            else /* p0', q0' */
            {
Loren Merritt's avatar
Loren Merritt committed
545 546
                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
Laurent Aimar's avatar
Laurent Aimar committed
547 548
            }
        }
Loren Merritt's avatar
Loren Merritt committed
549
        pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
550 551
    }
}
Loren Merritt's avatar
Loren Merritt committed
552
static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
553
{
Loren Merritt's avatar
Loren Merritt committed
554 555 556
    deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
557
{
Loren Merritt's avatar
Loren Merritt committed
558 559
    deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
Laurent Aimar's avatar
Laurent Aimar committed
560

Loren Merritt's avatar
Loren Merritt committed
561
static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
562
{
Loren Merritt's avatar
Loren Merritt committed
563
    int d;
564 565
    for( d = 0; d < 8; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
566 567 568 569 570
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];

571 572
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
Loren Merritt's avatar
Loren Merritt committed
573 574
            pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
            pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
575
        }
Loren Merritt's avatar
Loren Merritt committed
576 577 578 579
        pix += ystride;
    }
}
static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
580
{
Loren Merritt's avatar
Loren Merritt committed
581 582 583
    deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
584
{
Loren Merritt's avatar
Loren Merritt committed
585 586 587
    deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}

Fiona Glaser's avatar
Fiona Glaser committed
588 589
static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
590 591 592
    const int index_a = i_qp + h->sh.i_alpha_c0_offset;
    const int alpha = alpha_table(index_a);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Fiona Glaser's avatar
Fiona Glaser committed
593 594 595 596 597
    int8_t tc[4];

    if( !alpha || !beta )
        return;

598 599 600 601
    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
Fiona Glaser's avatar
Fiona Glaser committed
602

603
    pf_inter( pix1, i_stride, alpha, beta, tc );
604
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
605 606 607 608
        pf_inter( pix2, i_stride, alpha, beta, tc );
}

static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
Loren Merritt's avatar
Loren Merritt committed
609
{
610 611
    const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Loren Merritt's avatar
Loren Merritt committed
612

Fiona Glaser's avatar
Fiona Glaser committed
613 614 615
    if( !alpha || !beta )
        return;

616
    pf_intra( pix1, i_stride, alpha, beta );
617
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
618
        pf_intra( pix2, i_stride, alpha, beta );
Laurent Aimar's avatar
Laurent Aimar committed
619 620
}

Loren Merritt's avatar
Loren Merritt committed
621
void x264_frame_deblock_row( x264_t *h, int mb_y )
Laurent Aimar's avatar
Laurent Aimar committed
622 623 624
{
    const int s8x8 = 2 * h->mb.i_mb_stride;
    const int s4x4 = 4 * h->mb.i_mb_stride;
Loren Merritt's avatar
Loren Merritt committed
625
    const int b_interlaced = h->sh.b_mbaff;
626
    const int mvy_limit = 4 >> b_interlaced;
627
    const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
Fiona Glaser's avatar
Fiona Glaser committed
628
    const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
Loren Merritt's avatar
Loren Merritt committed
629
    int mb_x;
630 631 632 633
    int stridey   = h->fdec->i_stride[0];
    int stride2y  = stridey << b_interlaced;
    int strideuv  = h->fdec->i_stride[1];
    int stride2uv = strideuv << b_interlaced;
634

635 636 637
    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );

638
    for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
Laurent Aimar's avatar
Laurent Aimar committed
639 640 641 642
    {
        const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
        const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
        const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
643
        const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
644 645 646 647 648
        const int i_qp = h->mb.qp[mb_xy];
        int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
        uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
        uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
        uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
649 650
        if( b_interlaced && (mb_y&1) )
        {
651 652 653
            pixy -= 15*stridey;
            pixu -=  7*strideuv;
            pixv -=  7*strideuv;
654 655
        }

656 657
        x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );

658 659 660
        if( i_qp <= qp_thresh )
            i_edge_end = 1;

Fiona Glaser's avatar
Fiona Glaser committed
661 662 663 664 665 666 667
        #define FILTER_DIR(intra, i_dir)\
        {\
            /* Y plane */\
            i_qpn= h->mb.qp[mbn_xy];\
            if( i_dir == 0 )\
            {\
                /* vertical edge */\
668 669
                deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
670 671 672 673
                              h->loopf.deblock_h_luma##intra );\
                if( !(i_edge & 1) )\
                {\
                    /* U/V planes */\
674 675 676
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
677 678 679 680 681 682
                                  h->loopf.deblock_h_chroma##intra );\
                }\
            }\
            else\
            {\
                /* horizontal edge */\
683 684
                deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
685 686 687 688
                              h->loopf.deblock_v_luma##intra );\
                /* U/V planes */\
                if( !(i_edge & 1) )\
                {\
689 690 691
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
692 693 694 695 696
                                  h->loopf.deblock_v_chroma##intra );\
                }\
            }\
        }

697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
        #define DEBLOCK_STRENGTH(i_dir)\
        {\
            /* *** Get bS for each 4px for the current edge *** */\
            if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                *(uint32_t*)bS = 0x03030303;\
            else\
            {\
                *(uint32_t*)bS = 0x00000000;\
                for( i = 0; i < 4; i++ )\
                {\
                    int x  = i_dir == 0 ? i_edge : i;\
                    int y  = i_dir == 0 ? i      : i_edge;\
                    int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
                    int yn = i_dir == 0 ? y : (y - 1)&0x03;\
                    if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
                        h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
                        bS[i] = 2;\
714
                    else if(!(i_edge&no_sub8x8))\
715
                    {\
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
                        if((i&no_sub8x8) && bS[i-1] != 2)\
                            bS[i] = bS[i-1];\
                        else\
                        {\
                            /* FIXME: A given frame may occupy more than one position in\
                             * the reference list. So we should compare the frame numbers,\
                             * not the indices in the ref list.\
                             * No harm yet, as we don't generate that case.*/\
                            int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
                            int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
                            int i4p= mb_4x4+x+y*s4x4;\
                            int i4q= mbn_4x4+xn+yn*s4x4;\
                            if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
                                abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
                               (h->sh.i_type == SLICE_TYPE_B &&\
                               (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
                                abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
735 736 737
                            {\
                                bS[i] = 1;\
                            }\
738
                        }\
739 740 741 742 743
                    }\
                }\
            }\
        }

Laurent Aimar's avatar
Laurent Aimar committed
744 745
        /* i_dir == 0 -> vertical edge
         * i_dir == 1 -> horizontal edge */
746
        #define DEBLOCK_DIR(i_dir)\
Fiona Glaser's avatar
Fiona Glaser committed
747
        {\
Fiona Glaser's avatar
Fiona Glaser committed
748
            int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
749
            int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
750 751 752 753 754 755 756 757 758
            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
            if( i_edge )\
                i_edge+= b_8x8_transform;\
            else\
            {\
                mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
                mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
                mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
                if( b_interlaced && i_dir == 1 )\
Fiona Glaser's avatar
Fiona Glaser committed
759 760 761 762 763
                {\
                    mbn_xy -= h->mb.i_mb_stride;\
                    mbn_8x8 -= 2 * s8x8;\
                    mbn_4x4 -= 4 * s4x4;\
                }\
Fiona Glaser's avatar
Fiona Glaser committed
764 765 766
                else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                {\
                    FILTER_DIR( _intra, i_dir );\
767
                    goto end##i_dir;\
Fiona Glaser's avatar
Fiona Glaser committed
768
                }\
769 770 771 772 773
                DEBLOCK_STRENGTH(i_dir);\
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
                end##i_dir:\
                i_edge += b_8x8_transform+1;\
Fiona Glaser's avatar
Fiona Glaser committed
774
            }\
775 776 777
            mbn_xy  = mb_xy;\
            mbn_8x8 = mb_8x8;\
            mbn_4x4 = mb_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
778 779
            for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
            {\
780
                DEBLOCK_STRENGTH(i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
781 782
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
783
            }\
Laurent Aimar's avatar
Laurent Aimar committed
784 785
        }

786 787
        DEBLOCK_DIR(0);
        DEBLOCK_DIR(1);
Laurent Aimar's avatar
Laurent Aimar committed
788
    }
789 790 791

    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
Laurent Aimar's avatar
Laurent Aimar committed
792 793
}

Loren Merritt's avatar
Loren Merritt committed
794 795 796 797 798 799 800
void x264_frame_deblock( x264_t *h )
{
    int mb_y;
    for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
        x264_frame_deblock_row( h, mb_y );
}

801
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
802 803 804 805 806 807 808
void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );

void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
809 810
void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
811
#ifdef ARCH_X86
Loren Merritt's avatar
Loren Merritt committed
812 813
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
814 815
void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
Loren Merritt's avatar
Loren Merritt committed
816

Loic Le Loarer's avatar
Loic Le Loarer committed
817
static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
Loren Merritt's avatar
Loren Merritt committed
818 819 820 821
{
    x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
    x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
Loic Le Loarer's avatar
Loic Le Loarer committed
822
static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
Loren Merritt's avatar
Loren Merritt committed
823 824 825 826
{
    x264_deblock_v8_luma_intra_mmxext( pix,   stride, alpha, beta );
    x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
}
Loren Merritt's avatar
Loren Merritt committed
827
#endif
Loren Merritt's avatar
Loren Merritt committed
828
#endif
Laurent Aimar's avatar
Laurent Aimar committed
829

830 831 832 833 834
#ifdef ARCH_PPC
void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
#endif // ARCH_PPC

Loren Merritt's avatar
Loren Merritt committed
835 836 837 838 839 840 841 842 843 844 845
void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
{
    pf->deblock_v_luma = deblock_v_luma_c;
    pf->deblock_h_luma = deblock_h_luma_c;
    pf->deblock_v_chroma = deblock_v_chroma_c;
    pf->deblock_h_chroma = deblock_h_chroma_c;
    pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
    pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
    pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
    pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;