frame.c 35.7 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * frame.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24 25 26
 *****************************************************************************/

#include "common.h"

27 28
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))

Laurent Aimar's avatar
Laurent Aimar committed
29 30
x264_frame_t *x264_frame_new( x264_t *h )
{
31
    x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
32
    int i, j;
Laurent Aimar's avatar
Laurent Aimar committed
33

34
    int i_mb_count = h->mb.i_mb_count;
35
    int i_stride, i_width, i_lines;
36
    int i_padv = PADV << h->param.b_interlaced;
37
    int luma_plane_size;
38
    int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
Laurent Aimar's avatar
Laurent Aimar committed
39

40 41
    if( !frame ) return NULL;

42 43
    memset( frame, 0, sizeof(x264_frame_t) );

Laurent Aimar's avatar
Laurent Aimar committed
44
    /* allocate frame data (+64 for extra data for me) */
45 46 47
    i_width  = ALIGN( h->param.i_width, 16 );
    i_stride = ALIGN( i_width + 2*PADH, align );
    i_lines  = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
48

Laurent Aimar's avatar
Laurent Aimar committed
49 50 51
    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
52 53 54
        frame->i_stride[i] = i_stride >> !!i;
        frame->i_width[i] = i_width >> !!i;
        frame->i_lines[i] = i_lines >> !!i;
Laurent Aimar's avatar
Laurent Aimar committed
55 56
    }

57 58
    luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
    for( i = 1; i < 3; i++ )
59
    {
60
        CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
61
        frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
62
    }
63 64
    /* all 4 luma planes allocated together, since the cacheline split code
     * requires them to be in-phase wrt cacheline alignment. */
65 66 67 68 69 70 71 72 73 74 75 76
    if( h->param.analyse.i_subpel_refine )
    {
        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
        for( i = 0; i < 4; i++ )
            frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
        frame->plane[0] = frame->filtered[0];
    }
    else
    {
        CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
        frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
    }
77

78
    if( h->frames.b_have_lowres )
79
    {
80
        frame->i_width_lowres = frame->i_width[0]/2;
81
        frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
82
        frame->i_lines_lowres = frame->i_lines[0]/2;
83 84 85 86

        luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );

        CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
87
        for( i = 0; i < 4; i++ )
88
            frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
89 90 91 92 93 94 95 96

        for( j = 0; j <= !!h->param.i_bframe; j++ )
            for( i = 0; i <= h->param.i_bframe; i++ )
            {
                CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
                memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
                CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
            }
97
    }
98

99
    if( h->param.analyse.i_me_method >= X264_ME_ESA )
100
    {
101
        CHECKED_MALLOC( frame->buffer[3],
102
                        2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
103
        frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
104 105
    }

Laurent Aimar's avatar
Laurent Aimar committed
106 107 108
    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
109 110
    frame->i_pts = -1;
    frame->i_frame = -1;
111
    frame->i_frame_num = -1;
Loren Merritt's avatar
Loren Merritt committed
112
    frame->i_lines_completed = -1;
Laurent Aimar's avatar
Laurent Aimar committed
113

114 115 116
    CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
    CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
117
    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
118 119
    if( h->param.i_bframe )
    {
120 121
        CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
122 123 124 125 126 127 128
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

129 130
    CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
    CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
131 132
    for( i = 0; i < h->param.i_bframe + 2; i++ )
        for( j = 0; j < h->param.i_bframe + 2; j++ )
133
            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
134

135
    if( h->param.rc.i_aq_mode )
136
    {
137
        CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
138 139 140
        if( h->frames.b_have_lowres )
            CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
    }
141

142 143
    x264_pthread_mutex_init( &frame->mutex, NULL );
    x264_pthread_cond_init( &frame->cv, NULL );
144

Laurent Aimar's avatar
Laurent Aimar committed
145
    return frame;
146 147 148 149

fail:
    x264_frame_delete( frame );
    return NULL;
Laurent Aimar's avatar
Laurent Aimar committed
150 151 152 153
}

void x264_frame_delete( x264_frame_t *frame )
{
154
    int i, j;
155
    for( i = 0; i < 4; i++ )
156
        x264_free( frame->buffer[i] );
157 158
    for( i = 0; i < 4; i++ )
        x264_free( frame->buffer_lowres[i] );
159 160 161
    for( i = 0; i < X264_BFRAME_MAX+2; i++ )
        for( j = 0; j < X264_BFRAME_MAX+2; j++ )
            x264_free( frame->i_row_satds[i][j] );
162 163 164 165 166 167
    for( j = 0; j < 2; j++ )
        for( i = 0; i <= X264_BFRAME_MAX; i++ )
        {
            x264_free( frame->lowres_mvs[j][i] );
            x264_free( frame->lowres_mv_costs[j][i] );
        }
168
    x264_free( frame->f_qp_offset );
Fiona Glaser's avatar
Fiona Glaser committed
169
    x264_free( frame->i_inv_qscale_factor );
170
    x264_free( frame->i_intra_cost );
171 172
    x264_free( frame->i_row_bits );
    x264_free( frame->i_row_qp );
173
    x264_free( frame->mb_type );
174 175 176 177
    x264_free( frame->mv[0] );
    x264_free( frame->mv[1] );
    x264_free( frame->ref[0] );
    x264_free( frame->ref[1] );
178 179
    x264_pthread_mutex_destroy( &frame->mutex );
    x264_pthread_cond_destroy( &frame->cv );
Laurent Aimar's avatar
Laurent Aimar committed
180 181 182
    x264_free( frame );
}

183
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
Laurent Aimar's avatar
Laurent Aimar committed
184
{
185
    int i_csp = src->img.i_csp & X264_CSP_MASK;
186 187 188 189 190 191 192
    int i;
    if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
    {
        x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
        return -1;
    }

Laurent Aimar's avatar
Laurent Aimar committed
193 194 195 196
    dst->i_type     = src->i_type;
    dst->i_qpplus1  = src->i_qpplus1;
    dst->i_pts      = src->i_pts;

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
    for( i=0; i<3; i++ )
    {
        int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
        uint8_t *plane = src->img.plane[s];
        int stride = src->img.i_stride[s];
        int width = h->param.i_width >> !!i;
        int height = h->param.i_height >> !!i;
        if( src->img.i_csp & X264_CSP_VFLIP )
        {
            plane += (height-1)*stride;
            stride = -stride;
        }
        h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
    }
    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
212 213 214 215
}



Loren Merritt's avatar
Loren Merritt committed
216
static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
217 218 219 220 221 222
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
    int y;
    for( y = 0; y < i_height; y++ )
    {
        /* left band */
223
        memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
224
        /* right band */
225
        memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
226 227
    }
    /* upper band */
Loren Merritt's avatar
Loren Merritt committed
228
    if( b_pad_top )
229 230
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
231
    /* lower band */
Loren Merritt's avatar
Loren Merritt committed
232
    if( b_pad_bottom )
233 234
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
235 236 237
#undef PPIXEL
}

Loren Merritt's avatar
Loren Merritt committed
238
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
Laurent Aimar's avatar
Laurent Aimar committed
239
{
240
    int i;
Loren Merritt's avatar
Loren Merritt committed
241 242 243
    int b_start = !mb_y;
    if( mb_y & h->sh.b_mbaff )
        return;
Laurent Aimar's avatar
Laurent Aimar committed
244 245
    for( i = 0; i < frame->i_plane; i++ )
    {
246 247
        int stride = frame->i_stride[i];
        int width = 16*h->sps->i_mb_width >> !!i;
Loren Merritt's avatar
Loren Merritt committed
248
        int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
249 250
        int padh = PADH >> !!i;
        int padv = PADV >> !!i;
Loren Merritt's avatar
Loren Merritt committed
251 252 253 254 255
        // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
        uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
        if( b_end && !b_start )
            height += 4 >> (!!i + h->sh.b_mbaff);
        if( h->sh.b_mbaff )
256
        {
Loren Merritt's avatar
Loren Merritt committed
257 258
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
259 260 261
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
262
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
263
        }
Laurent Aimar's avatar
Laurent Aimar committed
264 265 266
    }
}

Loren Merritt's avatar
Loren Merritt committed
267
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
268
{
Loren Merritt's avatar
Loren Merritt committed
269
    /* during filtering, 8 extra pixels were filtered on each edge,
Loren Merritt's avatar
Loren Merritt committed
270
     * but up to 3 of the horizontal ones may be wrong.
271
       we want to expand border from the last filtered pixel */
Loren Merritt's avatar
Loren Merritt committed
272
    int b_start = !mb_y;
273
    int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
274
    int width = 16*h->sps->i_mb_width + 8;
Loren Merritt's avatar
Loren Merritt committed
275
    int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
Loren Merritt's avatar
Loren Merritt committed
276
    int padh = PADH - 4;
277
    int padv = PADV - 8;
278
    int i;
279
    for( i = 1; i < 4; i++ )
280
    {
Loren Merritt's avatar
Loren Merritt committed
281
        // buffer: 8 luma, to match the hpel filter
Loren Merritt's avatar
Loren Merritt committed
282
        uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
Loren Merritt's avatar
Loren Merritt committed
283
        if( h->sh.b_mbaff )
284
        {
Loren Merritt's avatar
Loren Merritt committed
285 286
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
287 288 289
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
290
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
291 292
        }
    }
293 294
}

295
void x264_frame_expand_border_lowres( x264_frame_t *frame )
296
{
297
    int i;
298
    for( i = 0; i < 4; i++ )
Loren Merritt's avatar
Loren Merritt committed
299
        plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
300 301
}

Loren Merritt's avatar
Loren Merritt committed
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
    int i, y;
    for( i = 0; i < frame->i_plane; i++ )
    {
        int i_subsample = i ? 1 : 0;
        int i_width = h->param.i_width >> i_subsample;
        int i_height = h->param.i_height >> i_subsample;
        int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
        int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;

        if( i_padx )
        {
            for( y = 0; y < i_height; y++ )
                memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
                         frame->plane[i][y*frame->i_stride[i] + i_width - 1],
                         i_padx );
        }
        if( i_pady )
        {
322
            //FIXME interlace? or just let it pad using the wrong field
Loren Merritt's avatar
Loren Merritt committed
323
            for( y = i_height; y < i_height + i_pady; y++ )
Loren Merritt's avatar
Loren Merritt committed
324 325 326 327 328 329 330
                memcpy( &frame->plane[i][y*frame->i_stride[i]],
                        &frame->plane[i][(i_height-1)*frame->i_stride[i]],
                        i_width + i_padx );
        }
    }
}

331

332 333
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 * entropy coding, but per 64 coeffs for the purpose of deblocking */
Loic Le Loarer's avatar
Loic Le Loarer committed
334
static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
335 336 337
{
    uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
338
    int x, nnz;
339 340 341 342 343
    for( x=0; x<h->sps->i_mb_width; x++ )
    {
        memcpy( buf+x, src+x, 16 );
        if( transform[x] )
        {
344 345 346 347
            nnz = src[x][0] | src[x][1];
            src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
            nnz = src[x][2] | src[x][3];
            src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
        }
    }
}

static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
{
    uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int x;
    for( x=0; x<h->sps->i_mb_width; x++ )
        memcpy( dst+x, buf+x, 16 );
}

static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
{
    func( h, mb_y, buf );
    if( mb_y > 0 )
        func( h, mb_y-1, buf + h->sps->i_mb_width );
    if( h->sh.b_mbaff )
    {
        func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
        if( mb_y > 0 )
            func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
    }
}


374
/* Deblocking filter */
375
static const uint8_t i_alpha_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
376
{
377
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
378 379 380 381 382
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
383 384
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,
Laurent Aimar's avatar
Laurent Aimar committed
385
};
386
static const uint8_t i_beta_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
387
{
388
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
389 390 391 392 393
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
394 395
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
Laurent Aimar's avatar
Laurent Aimar committed
396
};
397
static const int8_t i_tc0_table[52+12*2][4] =
Laurent Aimar's avatar
Laurent Aimar committed
398
{
399 400
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
Fiona Glaser's avatar
Fiona Glaser committed
401 402 403 404 405 406 407 408
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
409 410 411
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
Laurent Aimar's avatar
Laurent Aimar committed
412
};
413 414 415
#define alpha_table(x) i_alpha_table[(x)+12]
#define beta_table(x)  i_beta_table[(x)+12]
#define tc0_table(x)   i_tc0_table[(x)+12]
Laurent Aimar's avatar
Laurent Aimar committed
416 417

/* From ffmpeg */
Loren Merritt's avatar
Loren Merritt committed
418
static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
419 420
{
    int i, d;
421 422 423 424
    for( i = 0; i < 4; i++ )
    {
        if( tc0[i] < 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
425
            pix += 4*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
426 427
            continue;
        }
428 429
        for( d = 0; d < 4; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
430 431 432 433 434 435
            const int p2 = pix[-3*xstride];
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];
            const int q2 = pix[ 2*xstride];
436 437 438

            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
439 440
                int tc = tc0[i];
                int delta;
441 442
                if( abs( p2 - p0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
443
                    pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
Fiona Glaser's avatar
Fiona Glaser committed
444
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
445
                }
446 447
                if( abs( q2 - q0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
448 449
                    pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
450
                }
451

Loren Merritt's avatar
Loren Merritt committed
452
                delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
453 454
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
455
            }
Loren Merritt's avatar
Loren Merritt committed
456
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
457 458 459
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
460 461
static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
Loren Merritt's avatar
Loren Merritt committed
462
    deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
Loren Merritt's avatar
Loren Merritt committed
463 464 465 466 467
}
static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
    deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
468

Loren Merritt's avatar
Loren Merritt committed
469
static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
470 471
{
    int i, d;
472 473
    for( i = 0; i < 4; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
474
        const int tc = tc0[i];
475 476
        if( tc <= 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
477
            pix += 2*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
478 479
            continue;
        }
480 481
        for( d = 0; d < 2; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
482 483 484 485 486
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];

487 488
            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
489
                int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
490 491
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
492
            }
Loren Merritt's avatar
Loren Merritt committed
493
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
494 495 496
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
497
static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
498
{
Loren Merritt's avatar
Loren Merritt committed
499 500 501
    deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
502
{
Loren Merritt's avatar
Loren Merritt committed
503 504
    deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
505

Loren Merritt's avatar
Loren Merritt committed
506
static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
Laurent Aimar's avatar
Laurent Aimar committed
507
{
Loren Merritt's avatar
Loren Merritt committed
508
    int d;
509 510
    for( d = 0; d < 16; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
511 512 513 514 515 516 517
        const int p2 = pix[-3*xstride];
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];
        const int q2 = pix[ 2*xstride];

518 519 520 521 522
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
            if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
            {
                if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
Laurent Aimar's avatar
Laurent Aimar committed
523
                {
Loren Merritt's avatar
Loren Merritt committed
524 525 526 527
                    const int p3 = pix[-4*xstride];
                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
528
                }
529 530 531
                else /* p0' */
                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
Laurent Aimar's avatar
Laurent Aimar committed
532
                {
Loren Merritt's avatar
Loren Merritt committed
533 534 535 536
                    const int q3 = pix[3*xstride];
                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
537
                }
538 539 540 541 542
                else /* q0' */
                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
            }
            else /* p0', q0' */
            {
Loren Merritt's avatar
Loren Merritt committed
543 544
                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
Laurent Aimar's avatar
Laurent Aimar committed
545 546
            }
        }
Loren Merritt's avatar
Loren Merritt committed
547
        pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
548 549
    }
}
Loren Merritt's avatar
Loren Merritt committed
550
static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
551
{
Loren Merritt's avatar
Loren Merritt committed
552 553 554
    deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
555
{
Loren Merritt's avatar
Loren Merritt committed
556 557
    deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
Laurent Aimar's avatar
Laurent Aimar committed
558

Loren Merritt's avatar
Loren Merritt committed
559
static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
560
{
Loren Merritt's avatar
Loren Merritt committed
561
    int d;
562 563
    for( d = 0; d < 8; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
564 565 566 567 568
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];

569 570
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
Loren Merritt's avatar
Loren Merritt committed
571 572
            pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
            pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
573
        }
Loren Merritt's avatar
Loren Merritt committed
574 575 576 577
        pix += ystride;
    }
}
static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
578
{
Loren Merritt's avatar
Loren Merritt committed
579 580 581
    deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
582
{
Loren Merritt's avatar
Loren Merritt committed
583 584 585
    deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}

Fiona Glaser's avatar
Fiona Glaser committed
586 587
static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
588 589 590
    const int index_a = i_qp + h->sh.i_alpha_c0_offset;
    const int alpha = alpha_table(index_a);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Fiona Glaser's avatar
Fiona Glaser committed
591 592 593 594 595
    int8_t tc[4];

    if( !alpha || !beta )
        return;

596 597 598 599
    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
Fiona Glaser's avatar
Fiona Glaser committed
600

601
    pf_inter( pix1, i_stride, alpha, beta, tc );
602
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
603 604 605 606
        pf_inter( pix2, i_stride, alpha, beta, tc );
}

static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
Loren Merritt's avatar
Loren Merritt committed
607
{
608 609
    const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Loren Merritt's avatar
Loren Merritt committed
610

Fiona Glaser's avatar
Fiona Glaser committed
611 612 613
    if( !alpha || !beta )
        return;

614
    pf_intra( pix1, i_stride, alpha, beta );
615
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
616
        pf_intra( pix2, i_stride, alpha, beta );
Laurent Aimar's avatar
Laurent Aimar committed
617 618
}

Loren Merritt's avatar
Loren Merritt committed
619
void x264_frame_deblock_row( x264_t *h, int mb_y )
Laurent Aimar's avatar
Laurent Aimar committed
620 621 622
{
    const int s8x8 = 2 * h->mb.i_mb_stride;
    const int s4x4 = 4 * h->mb.i_mb_stride;
Loren Merritt's avatar
Loren Merritt committed
623
    const int b_interlaced = h->sh.b_mbaff;
624
    const int mvy_limit = 4 >> b_interlaced;
625
    const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
626
    const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
Loren Merritt's avatar
Loren Merritt committed
627
    int mb_x;
628 629 630 631
    int stridey   = h->fdec->i_stride[0];
    int stride2y  = stridey << b_interlaced;
    int strideuv  = h->fdec->i_stride[1];
    int stride2uv = strideuv << b_interlaced;
632

633 634 635
    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );

636
    for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
Laurent Aimar's avatar
Laurent Aimar committed
637 638 639 640
    {
        const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
        const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
        const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
641
        const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
642 643 644 645 646
        const int i_qp = h->mb.qp[mb_xy];
        int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
        uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
        uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
        uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
647 648
        if( b_interlaced && (mb_y&1) )
        {
649 650 651
            pixy -= 15*stridey;
            pixu -=  7*strideuv;
            pixv -=  7*strideuv;
652 653
        }

654 655
        x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );

656 657 658
        if( i_qp <= qp_thresh )
            i_edge_end = 1;

Fiona Glaser's avatar
Fiona Glaser committed
659 660 661 662 663 664 665
        #define FILTER_DIR(intra, i_dir)\
        {\
            /* Y plane */\
            i_qpn= h->mb.qp[mbn_xy];\
            if( i_dir == 0 )\
            {\
                /* vertical edge */\
666 667
                deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
668 669 670 671
                              h->loopf.deblock_h_luma##intra );\
                if( !(i_edge & 1) )\
                {\
                    /* U/V planes */\
672 673 674
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
675 676 677 678 679 680
                                  h->loopf.deblock_h_chroma##intra );\
                }\
            }\
            else\
            {\
                /* horizontal edge */\
681 682
                deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
683 684 685 686
                              h->loopf.deblock_v_luma##intra );\
                /* U/V planes */\
                if( !(i_edge & 1) )\
                {\
687 688 689
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
690 691 692 693 694
                                  h->loopf.deblock_v_chroma##intra );\
                }\
            }\
        }

695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
        #define DEBLOCK_STRENGTH(i_dir)\
        {\
            /* *** Get bS for each 4px for the current edge *** */\
            if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                *(uint32_t*)bS = 0x03030303;\
            else\
            {\
                *(uint32_t*)bS = 0x00000000;\
                for( i = 0; i < 4; i++ )\
                {\
                    int x  = i_dir == 0 ? i_edge : i;\
                    int y  = i_dir == 0 ? i      : i_edge;\
                    int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
                    int yn = i_dir == 0 ? y : (y - 1)&0x03;\
                    if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
                        h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
                        bS[i] = 2;\
712
                    else if(!(i_edge&no_sub8x8))\
713
                    {\
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
                        if((i&no_sub8x8) && bS[i-1] != 2)\
                            bS[i] = bS[i-1];\
                        else\
                        {\
                            /* FIXME: A given frame may occupy more than one position in\
                             * the reference list. So we should compare the frame numbers,\
                             * not the indices in the ref list.\
                             * No harm yet, as we don't generate that case.*/\
                            int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
                            int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
                            int i4p= mb_4x4+x+y*s4x4;\
                            int i4q= mbn_4x4+xn+yn*s4x4;\
                            if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
                                abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
                               (h->sh.i_type == SLICE_TYPE_B &&\
                               (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
                                abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
733 734 735
                            {\
                                bS[i] = 1;\
                            }\
736
                        }\
737 738 739 740 741
                    }\
                }\
            }\
        }

Laurent Aimar's avatar
Laurent Aimar committed
742 743
        /* i_dir == 0 -> vertical edge
         * i_dir == 1 -> horizontal edge */
744
        #define DEBLOCK_DIR(i_dir)\
Fiona Glaser's avatar
Fiona Glaser committed
745
        {\
Fiona Glaser's avatar
Fiona Glaser committed
746
            int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
747
            int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
748 749 750 751 752 753 754 755 756
            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
            if( i_edge )\
                i_edge+= b_8x8_transform;\
            else\
            {\
                mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
                mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
                mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
                if( b_interlaced && i_dir == 1 )\
Fiona Glaser's avatar
Fiona Glaser committed
757 758 759 760 761
                {\
                    mbn_xy -= h->mb.i_mb_stride;\
                    mbn_8x8 -= 2 * s8x8;\
                    mbn_4x4 -= 4 * s4x4;\
                }\
Fiona Glaser's avatar
Fiona Glaser committed
762 763 764
                else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                {\
                    FILTER_DIR( _intra, i_dir );\
765
                    goto end##i_dir;\
Fiona Glaser's avatar
Fiona Glaser committed
766
                }\
767 768 769 770 771
                DEBLOCK_STRENGTH(i_dir);\
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
                end##i_dir:\
                i_edge += b_8x8_transform+1;\
Fiona Glaser's avatar
Fiona Glaser committed
772
            }\
773 774 775
            mbn_xy  = mb_xy;\
            mbn_8x8 = mb_8x8;\
            mbn_4x4 = mb_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
776 777
            for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
            {\
778
                DEBLOCK_STRENGTH(i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
779 780
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
781
            }\
Laurent Aimar's avatar
Laurent Aimar committed
782 783
        }

784 785
        DEBLOCK_DIR(0);
        DEBLOCK_DIR(1);
Laurent Aimar's avatar
Laurent Aimar committed
786
    }
787 788 789

    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
Laurent Aimar's avatar
Laurent Aimar committed
790 791
}

Loren Merritt's avatar
Loren Merritt committed
792 793 794 795 796 797 798
void x264_frame_deblock( x264_t *h )
{
    int mb_y;
    for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
        x264_frame_deblock_row( h, mb_y );
}

799
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
800 801 802 803 804 805 806
void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );

void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
807 808
void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
809
#ifdef ARCH_X86
Loren Merritt's avatar
Loren Merritt committed
810 811
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
812 813
void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
Loren Merritt's avatar
Loren Merritt committed
814

Loic Le Loarer's avatar
Loic Le Loarer committed
815
static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
Loren Merritt's avatar
Loren Merritt committed
816 817 818 819
{
    x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
    x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
Loic Le Loarer's avatar
Loic Le Loarer committed
820
static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
Loren Merritt's avatar
Loren Merritt committed
821 822 823 824
{
    x264_deblock_v8_luma_intra_mmxext( pix,   stride, alpha, beta );
    x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
}
Loren Merritt's avatar
Loren Merritt committed
825
#endif
Loren Merritt's avatar
Loren Merritt committed
826
#endif
Laurent Aimar's avatar
Laurent Aimar committed
827

828 829 830 831 832
#ifdef ARCH_PPC
void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
#endif // ARCH_PPC

Loren Merritt's avatar
Loren Merritt committed
833 834 835 836 837 838 839 840 841 842 843
void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
{
    pf->deblock_v_luma = deblock_v_luma_c;
    pf->deblock_h_luma = deblock_h_luma_c;
    pf->deblock_v_chroma = deblock_v_chroma_c;
    pf->deblock_h_chroma = deblock_h_chroma_c;
    pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
    pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
    pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
    pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;

844
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
845 846 847 848 849 850
    if( cpu&X264_CPU_MMXEXT )
    {
        pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
        pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
        pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
        pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
851 852 853