frame.c 34.1 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * frame.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
22 23 24 25
 *****************************************************************************/

#include "common.h"

26 27
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))

Laurent Aimar's avatar
Laurent Aimar committed
28 29
x264_frame_t *x264_frame_new( x264_t *h )
{
30
    x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31
    int i, j;
Laurent Aimar's avatar
Laurent Aimar committed
32

33
    int i_mb_count = h->mb.i_mb_count;
34
    int i_stride, i_width, i_lines;
35
    int i_padv = PADV << h->param.b_interlaced;
36
    int luma_plane_size;
37
    int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
Laurent Aimar's avatar
Laurent Aimar committed
38

39 40
    if( !frame ) return NULL;

41 42
    memset( frame, 0, sizeof(x264_frame_t) );

Laurent Aimar's avatar
Laurent Aimar committed
43
    /* allocate frame data (+64 for extra data for me) */
44 45 46
    i_width  = ALIGN( h->param.i_width, 16 );
    i_stride = ALIGN( i_width + 2*PADH, align );
    i_lines  = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
47

Laurent Aimar's avatar
Laurent Aimar committed
48 49 50
    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
51 52 53
        frame->i_stride[i] = i_stride >> !!i;
        frame->i_width[i] = i_width >> !!i;
        frame->i_lines[i] = i_lines >> !!i;
Laurent Aimar's avatar
Laurent Aimar committed
54 55
    }

56 57
    luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
    for( i = 1; i < 3; i++ )
58
    {
59
        CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
60
        frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
61
    }
62 63 64 65
    /* all 4 luma planes allocated together, since the cacheline split code
     * requires them to be in-phase wrt cacheline alignment. */
    CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
    for( i = 0; i < 4; i++ )
66
        frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
67
    frame->plane[0] = frame->filtered[0];
68

69
    if( h->frames.b_have_lowres )
70
    {
71
        frame->i_width_lowres = frame->i_width[0]/2;
72
        frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
73
        frame->i_lines_lowres = frame->i_lines[0]/2;
74 75 76 77

        luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );

        CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
78
        for( i = 0; i < 4; i++ )
79
            frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
80
    }
81

82
    if( h->param.analyse.i_me_method >= X264_ME_ESA )
83
    {
84
        CHECKED_MALLOC( frame->buffer[3],
85
                        2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
86
        frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
87 88
    }

Laurent Aimar's avatar
Laurent Aimar committed
89 90 91
    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
92 93
    frame->i_pts = -1;
    frame->i_frame = -1;
94
    frame->i_frame_num = -1;
Loren Merritt's avatar
Loren Merritt committed
95
    frame->i_lines_completed = -1;
Laurent Aimar's avatar
Laurent Aimar committed
96

97 98 99
    CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
    CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
100
    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
101 102
    if( h->param.i_bframe )
    {
103 104
        CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
105 106 107 108 109 110 111
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

112 113
    CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
    CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
114 115
    for( i = 0; i < h->param.i_bframe + 2; i++ )
        for( j = 0; j < h->param.i_bframe + 2; j++ )
116
            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
117

118 119 120
    if( h->param.rc.i_aq_mode )
        CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );

121 122
    x264_pthread_mutex_init( &frame->mutex, NULL );
    x264_pthread_cond_init( &frame->cv, NULL );
123

Laurent Aimar's avatar
Laurent Aimar committed
124
    return frame;
125 126 127 128

fail:
    x264_frame_delete( frame );
    return NULL;
Laurent Aimar's avatar
Laurent Aimar committed
129 130 131 132
}

void x264_frame_delete( x264_frame_t *frame )
{
133
    int i, j;
134
    for( i = 0; i < 4; i++ )
135
        x264_free( frame->buffer[i] );
136 137
    for( i = 0; i < 4; i++ )
        x264_free( frame->buffer_lowres[i] );
138 139 140 141 142
    for( i = 0; i < X264_BFRAME_MAX+2; i++ )
        for( j = 0; j < X264_BFRAME_MAX+2; j++ )
            x264_free( frame->i_row_satds[i][j] );
    x264_free( frame->i_row_bits );
    x264_free( frame->i_row_qp );
143
    x264_free( frame->mb_type );
144 145 146 147
    x264_free( frame->mv[0] );
    x264_free( frame->mv[1] );
    x264_free( frame->ref[0] );
    x264_free( frame->ref[1] );
148 149
    x264_pthread_mutex_destroy( &frame->mutex );
    x264_pthread_cond_destroy( &frame->cv );
Laurent Aimar's avatar
Laurent Aimar committed
150 151 152
    x264_free( frame );
}

153
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
Laurent Aimar's avatar
Laurent Aimar committed
154
{
155
    int i_csp = src->img.i_csp & X264_CSP_MASK;
156 157 158 159 160 161 162
    int i;
    if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
    {
        x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
        return -1;
    }

Laurent Aimar's avatar
Laurent Aimar committed
163 164 165 166
    dst->i_type     = src->i_type;
    dst->i_qpplus1  = src->i_qpplus1;
    dst->i_pts      = src->i_pts;

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    for( i=0; i<3; i++ )
    {
        int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
        uint8_t *plane = src->img.plane[s];
        int stride = src->img.i_stride[s];
        int width = h->param.i_width >> !!i;
        int height = h->param.i_height >> !!i;
        if( src->img.i_csp & X264_CSP_VFLIP )
        {
            plane += (height-1)*stride;
            stride = -stride;
        }
        h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
    }
    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
182 183 184 185
}



Loren Merritt's avatar
Loren Merritt committed
186
static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
187 188 189 190 191 192
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
    int y;
    for( y = 0; y < i_height; y++ )
    {
        /* left band */
193
        memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
194
        /* right band */
195
        memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
196 197
    }
    /* upper band */
Loren Merritt's avatar
Loren Merritt committed
198
    if( b_pad_top )
199 200
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
201
    /* lower band */
Loren Merritt's avatar
Loren Merritt committed
202
    if( b_pad_bottom )
203 204
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
205 206 207
#undef PPIXEL
}

Loren Merritt's avatar
Loren Merritt committed
208
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
Laurent Aimar's avatar
Laurent Aimar committed
209
{
210
    int i;
Loren Merritt's avatar
Loren Merritt committed
211 212 213
    int b_start = !mb_y;
    if( mb_y & h->sh.b_mbaff )
        return;
Laurent Aimar's avatar
Laurent Aimar committed
214 215
    for( i = 0; i < frame->i_plane; i++ )
    {
216 217
        int stride = frame->i_stride[i];
        int width = 16*h->sps->i_mb_width >> !!i;
Loren Merritt's avatar
Loren Merritt committed
218
        int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
219 220
        int padh = PADH >> !!i;
        int padv = PADV >> !!i;
Loren Merritt's avatar
Loren Merritt committed
221 222 223 224 225
        // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
        uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
        if( b_end && !b_start )
            height += 4 >> (!!i + h->sh.b_mbaff);
        if( h->sh.b_mbaff )
226
        {
Loren Merritt's avatar
Loren Merritt committed
227 228
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
229 230 231
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
232
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
233
        }
Laurent Aimar's avatar
Laurent Aimar committed
234 235 236
    }
}

Loren Merritt's avatar
Loren Merritt committed
237
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
238
{
Loren Merritt's avatar
Loren Merritt committed
239
    /* during filtering, 8 extra pixels were filtered on each edge,
Loren Merritt's avatar
Loren Merritt committed
240
     * but up to 3 of the horizontal ones may be wrong.
241
       we want to expand border from the last filtered pixel */
Loren Merritt's avatar
Loren Merritt committed
242
    int b_start = !mb_y;
243
    int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
244
    int width = 16*h->sps->i_mb_width + 8;
Loren Merritt's avatar
Loren Merritt committed
245
    int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
Loren Merritt's avatar
Loren Merritt committed
246
    int padh = PADH - 4;
247
    int padv = PADV - 8;
248
    int i;
249
    for( i = 1; i < 4; i++ )
250
    {
Loren Merritt's avatar
Loren Merritt committed
251
        // buffer: 8 luma, to match the hpel filter
Loren Merritt's avatar
Loren Merritt committed
252
        uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
Loren Merritt's avatar
Loren Merritt committed
253
        if( h->sh.b_mbaff )
254
        {
Loren Merritt's avatar
Loren Merritt committed
255 256
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
257 258 259
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
260
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
261 262
        }
    }
263 264
}

265
void x264_frame_expand_border_lowres( x264_frame_t *frame )
266
{
267
    int i;
268
    for( i = 0; i < 4; i++ )
Loren Merritt's avatar
Loren Merritt committed
269
        plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
270 271
}

Loren Merritt's avatar
Loren Merritt committed
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
    int i, y;
    for( i = 0; i < frame->i_plane; i++ )
    {
        int i_subsample = i ? 1 : 0;
        int i_width = h->param.i_width >> i_subsample;
        int i_height = h->param.i_height >> i_subsample;
        int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
        int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;

        if( i_padx )
        {
            for( y = 0; y < i_height; y++ )
                memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
                         frame->plane[i][y*frame->i_stride[i] + i_width - 1],
                         i_padx );
        }
        if( i_pady )
        {
292
            //FIXME interlace? or just let it pad using the wrong field
Loren Merritt's avatar
Loren Merritt committed
293
            for( y = i_height; y < i_height + i_pady; y++ )
Loren Merritt's avatar
Loren Merritt committed
294 295 296 297 298 299 300
                memcpy( &frame->plane[i][y*frame->i_stride[i]],
                        &frame->plane[i][(i_height-1)*frame->i_stride[i]],
                        i_width + i_padx );
        }
    }
}

301

302 303
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 * entropy coding, but per 64 coeffs for the purpose of deblocking */
Loic Le Loarer's avatar
Loic Le Loarer committed
304
static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
305 306 307
{
    uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
308
    int x, nnz;
309 310 311 312 313
    for( x=0; x<h->sps->i_mb_width; x++ )
    {
        memcpy( buf+x, src+x, 16 );
        if( transform[x] )
        {
314 315 316 317
            nnz = src[x][0] | src[x][1];
            src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
            nnz = src[x][2] | src[x][3];
            src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
        }
    }
}

static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
{
    uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int x;
    for( x=0; x<h->sps->i_mb_width; x++ )
        memcpy( dst+x, buf+x, 16 );
}

static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
{
    func( h, mb_y, buf );
    if( mb_y > 0 )
        func( h, mb_y-1, buf + h->sps->i_mb_width );
    if( h->sh.b_mbaff )
    {
        func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
        if( mb_y > 0 )
            func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
    }
}


344
/* Deblocking filter */
345
static const uint8_t i_alpha_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
346
{
347
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
348 349 350 351 352
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
353 354
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,
Laurent Aimar's avatar
Laurent Aimar committed
355
};
356
static const uint8_t i_beta_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
357
{
358
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
359 360 361 362 363
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
364 365
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
Laurent Aimar's avatar
Laurent Aimar committed
366
};
367
static const int8_t i_tc0_table[52+12*2][4] =
Laurent Aimar's avatar
Laurent Aimar committed
368
{
369 370
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
Fiona Glaser's avatar
Fiona Glaser committed
371 372 373 374 375 376 377 378
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
379 380 381
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
Laurent Aimar's avatar
Laurent Aimar committed
382
};
383 384 385
#define alpha_table(x) i_alpha_table[(x)+12]
#define beta_table(x)  i_beta_table[(x)+12]
#define tc0_table(x)   i_tc0_table[(x)+12]
Laurent Aimar's avatar
Laurent Aimar committed
386 387

/* From ffmpeg */
Loren Merritt's avatar
Loren Merritt committed
388
static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
389 390
{
    int i, d;
391 392 393 394
    for( i = 0; i < 4; i++ )
    {
        if( tc0[i] < 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
395
            pix += 4*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
396 397
            continue;
        }
398 399
        for( d = 0; d < 4; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
400 401 402 403 404 405
            const int p2 = pix[-3*xstride];
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];
            const int q2 = pix[ 2*xstride];
406 407 408

            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
409 410
                int tc = tc0[i];
                int delta;
411 412
                if( abs( p2 - p0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
413
                    pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
Fiona Glaser's avatar
Fiona Glaser committed
414
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
415
                }
416 417
                if( abs( q2 - q0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
418 419
                    pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
420
                }
421

Loren Merritt's avatar
Loren Merritt committed
422
                delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
423 424
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
425
            }
Loren Merritt's avatar
Loren Merritt committed
426
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
427 428 429
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
430 431
static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
Loren Merritt's avatar
Loren Merritt committed
432
    deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
Loren Merritt's avatar
Loren Merritt committed
433 434 435 436 437
}
static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
    deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
438

Loren Merritt's avatar
Loren Merritt committed
439
static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
440 441
{
    int i, d;
442 443
    for( i = 0; i < 4; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
444
        const int tc = tc0[i];
445 446
        if( tc <= 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
447
            pix += 2*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
448 449
            continue;
        }
450 451
        for( d = 0; d < 2; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
452 453 454 455 456
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];

457 458
            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
459
                int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
460 461
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
462
            }
Loren Merritt's avatar
Loren Merritt committed
463
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
464 465 466
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
467
static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
468
{
Loren Merritt's avatar
Loren Merritt committed
469 470 471
    deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
472
{
Loren Merritt's avatar
Loren Merritt committed
473 474
    deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
475

Loren Merritt's avatar
Loren Merritt committed
476
static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
Laurent Aimar's avatar
Laurent Aimar committed
477
{
Loren Merritt's avatar
Loren Merritt committed
478
    int d;
479 480
    for( d = 0; d < 16; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
481 482 483 484 485 486 487
        const int p2 = pix[-3*xstride];
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];
        const int q2 = pix[ 2*xstride];

488 489 490 491 492
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
            if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
            {
                if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
Laurent Aimar's avatar
Laurent Aimar committed
493
                {
Loren Merritt's avatar
Loren Merritt committed
494 495 496 497
                    const int p3 = pix[-4*xstride];
                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
498
                }
499 500 501
                else /* p0' */
                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
Laurent Aimar's avatar
Laurent Aimar committed
502
                {
Loren Merritt's avatar
Loren Merritt committed
503 504 505 506
                    const int q3 = pix[3*xstride];
                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
507
                }
508 509 510 511 512
                else /* q0' */
                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
            }
            else /* p0', q0' */
            {
Loren Merritt's avatar
Loren Merritt committed
513 514
                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
Laurent Aimar's avatar
Laurent Aimar committed
515 516
            }
        }
Loren Merritt's avatar
Loren Merritt committed
517
        pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
518 519
    }
}
Loren Merritt's avatar
Loren Merritt committed
520
static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
521
{
Loren Merritt's avatar
Loren Merritt committed
522 523 524
    deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
525
{
Loren Merritt's avatar
Loren Merritt committed
526 527
    deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
Laurent Aimar's avatar
Laurent Aimar committed
528

Loren Merritt's avatar
Loren Merritt committed
529
static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
530
{
Loren Merritt's avatar
Loren Merritt committed
531
    int d;
532 533
    for( d = 0; d < 8; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
534 535 536 537 538
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];

539 540
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
Loren Merritt's avatar
Loren Merritt committed
541 542
            pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
            pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
543
        }
Loren Merritt's avatar
Loren Merritt committed
544 545 546 547
        pix += ystride;
    }
}
static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
548
{
Loren Merritt's avatar
Loren Merritt committed
549 550 551
    deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
552
{
Loren Merritt's avatar
Loren Merritt committed
553 554 555
    deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}

Fiona Glaser's avatar
Fiona Glaser committed
556 557
static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
558 559 560
    const int index_a = i_qp + h->sh.i_alpha_c0_offset;
    const int alpha = alpha_table(index_a);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Fiona Glaser's avatar
Fiona Glaser committed
561 562 563 564 565
    int8_t tc[4];

    if( !alpha || !beta )
        return;

566 567 568 569
    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
Fiona Glaser's avatar
Fiona Glaser committed
570

571
    pf_inter( pix1, i_stride, alpha, beta, tc );
572
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
573 574 575 576
        pf_inter( pix2, i_stride, alpha, beta, tc );
}

static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
Loren Merritt's avatar
Loren Merritt committed
577
{
578 579
    const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Loren Merritt's avatar
Loren Merritt committed
580

Fiona Glaser's avatar
Fiona Glaser committed
581 582 583
    if( !alpha || !beta )
        return;

584
    pf_intra( pix1, i_stride, alpha, beta );
585
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
586
        pf_intra( pix2, i_stride, alpha, beta );
Laurent Aimar's avatar
Laurent Aimar committed
587 588
}

Loren Merritt's avatar
Loren Merritt committed
589
void x264_frame_deblock_row( x264_t *h, int mb_y )
Laurent Aimar's avatar
Laurent Aimar committed
590 591 592
{
    const int s8x8 = 2 * h->mb.i_mb_stride;
    const int s4x4 = 4 * h->mb.i_mb_stride;
Loren Merritt's avatar
Loren Merritt committed
593
    const int b_interlaced = h->sh.b_mbaff;
594
    const int mvy_limit = 4 >> b_interlaced;
595
    const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
Loren Merritt's avatar
Loren Merritt committed
596
    int mb_x;
597 598 599 600
    int stridey   = h->fdec->i_stride[0];
    int stride2y  = stridey << b_interlaced;
    int strideuv  = h->fdec->i_stride[1];
    int stride2uv = strideuv << b_interlaced;
601

602 603 604
    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );

605
    for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
Laurent Aimar's avatar
Laurent Aimar committed
606 607 608 609
    {
        const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
        const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
        const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
610
        const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
611 612 613 614 615
        const int i_qp = h->mb.qp[mb_xy];
        int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
        uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
        uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
        uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
616 617
        if( b_interlaced && (mb_y&1) )
        {
618 619 620
            pixy -= 15*stridey;
            pixu -=  7*strideuv;
            pixv -=  7*strideuv;
621 622
        }

623 624
        x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );

625 626 627
        if( i_qp <= qp_thresh )
            i_edge_end = 1;

Fiona Glaser's avatar
Fiona Glaser committed
628 629 630 631 632 633 634
        #define FILTER_DIR(intra, i_dir)\
        {\
            /* Y plane */\
            i_qpn= h->mb.qp[mbn_xy];\
            if( i_dir == 0 )\
            {\
                /* vertical edge */\
635 636
                deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
637 638 639 640
                              h->loopf.deblock_h_luma##intra );\
                if( !(i_edge & 1) )\
                {\
                    /* U/V planes */\
641 642 643
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
644 645 646 647 648 649
                                  h->loopf.deblock_h_chroma##intra );\
                }\
            }\
            else\
            {\
                /* horizontal edge */\
650 651
                deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
652 653 654 655
                              h->loopf.deblock_v_luma##intra );\
                /* U/V planes */\
                if( !(i_edge & 1) )\
                {\
656 657 658
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
659 660 661 662 663
                                  h->loopf.deblock_v_chroma##intra );\
                }\
            }\
        }

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
        #define DEBLOCK_STRENGTH(i_dir)\
        {\
            /* *** Get bS for each 4px for the current edge *** */\
            if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                *(uint32_t*)bS = 0x03030303;\
            else\
            {\
                *(uint32_t*)bS = 0x00000000;\
                for( i = 0; i < 4; i++ )\
                {\
                    int x  = i_dir == 0 ? i_edge : i;\
                    int y  = i_dir == 0 ? i      : i_edge;\
                    int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
                    int yn = i_dir == 0 ? y : (y - 1)&0x03;\
                    if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
                        h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
                        bS[i] = 2;\
                    else\
                    {\
                        /* FIXME: A given frame may occupy more than one position in\
                         * the reference list. So we should compare the frame numbers,\
                         * not the indices in the ref list.\
                         * No harm yet, as we don't generate that case.*/\
                        int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
                        int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
                        int i4p= mb_4x4+x+y*s4x4;\
                        int i4q= mbn_4x4+xn+yn*s4x4;\
                        for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
                            if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
                                abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
                            {\
                                bS[i] = 1;\
                                break;\
                            }\
                    }\
                }\
            }\
        }

Laurent Aimar's avatar
Laurent Aimar committed
704 705
        /* i_dir == 0 -> vertical edge
         * i_dir == 1 -> horizontal edge */
706
        #define DEBLOCK_DIR(i_dir)\
Fiona Glaser's avatar
Fiona Glaser committed
707
        {\
Fiona Glaser's avatar
Fiona Glaser committed
708
            int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
709
            int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
710 711 712 713 714 715 716 717 718
            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
            if( i_edge )\
                i_edge+= b_8x8_transform;\
            else\
            {\
                mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
                mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
                mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
                if( b_interlaced && i_dir == 1 )\
Fiona Glaser's avatar
Fiona Glaser committed
719 720 721 722 723
                {\
                    mbn_xy -= h->mb.i_mb_stride;\
                    mbn_8x8 -= 2 * s8x8;\
                    mbn_4x4 -= 4 * s4x4;\
                }\
Fiona Glaser's avatar
Fiona Glaser committed
724 725 726
                else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                {\
                    FILTER_DIR( _intra, i_dir );\
727
                    goto end##i_dir;\
Fiona Glaser's avatar
Fiona Glaser committed
728
                }\
729 730 731 732 733
                DEBLOCK_STRENGTH(i_dir);\
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
                end##i_dir:\
                i_edge += b_8x8_transform+1;\
Fiona Glaser's avatar
Fiona Glaser committed
734
            }\
735 736 737
            mbn_xy  = mb_xy;\
            mbn_8x8 = mb_8x8;\
            mbn_4x4 = mb_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
738 739
            for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
            {\
740
                DEBLOCK_STRENGTH(i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
741 742
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
743
            }\
Laurent Aimar's avatar
Laurent Aimar committed
744 745
        }

746 747
        DEBLOCK_DIR(0);
        DEBLOCK_DIR(1);
Laurent Aimar's avatar
Laurent Aimar committed
748
    }
749 750 751

    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
Laurent Aimar's avatar
Laurent Aimar committed
752 753
}

Loren Merritt's avatar
Loren Merritt committed
754 755 756 757 758 759 760
void x264_frame_deblock( x264_t *h )
{
    int mb_y;
    for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
        x264_frame_deblock_row( h, mb_y );
}

761
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
762 763 764 765 766 767 768
void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );

void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
769 770
void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
771
#ifdef ARCH_X86
Loren Merritt's avatar
Loren Merritt committed
772 773
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
774 775
void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
Loren Merritt's avatar
Loren Merritt committed
776

Loic Le Loarer's avatar
Loic Le Loarer committed
777
static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
Loren Merritt's avatar
Loren Merritt committed
778 779 780 781
{
    x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
    x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
Loic Le Loarer's avatar
Loic Le Loarer committed
782
static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
Loren Merritt's avatar
Loren Merritt committed
783 784 785 786
{
    x264_deblock_v8_luma_intra_mmxext( pix,   stride, alpha, beta );
    x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
}
Loren Merritt's avatar
Loren Merritt committed
787
#endif
Loren Merritt's avatar
Loren Merritt committed
788
#endif
Laurent Aimar's avatar
Laurent Aimar committed
789

790 791 792 793 794
#ifdef ARCH_PPC
void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
#endif // ARCH_PPC

Loren Merritt's avatar
Loren Merritt committed
795 796 797 798 799 800 801 802 803 804 805
void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
{
    pf->deblock_v_luma = deblock_v_luma_c;
    pf->deblock_h_luma = deblock_h_luma_c;
    pf->deblock_v_chroma = deblock_v_chroma_c;
    pf->deblock_h_chroma = deblock_h_chroma_c;
    pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
    pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
    pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
    pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;

806
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
807 808 809 810 811 812
    if( cpu&X264_CPU_MMXEXT )
    {
        pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
        pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
        pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
        pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
813 814 815
#ifdef ARCH_X86
        pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
        pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
Loren Merritt's avatar
Loren Merritt committed
816 817
        pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
        pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
818
#endif
819
        if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
Loren Merritt's avatar
Loren Merritt committed
820 821 822
        {
            pf->deblock_v_luma = x264_deblock_v_luma_sse2;
            pf->deblock_h_luma = x264_deblock_h_luma_sse2;
Loren Merritt's avatar
Loren Merritt committed
823 824
            pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
            pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
Loren Merritt's avatar
Loren Merritt committed
825 826
        }
    }
827
#endif
828 829 830 831 832 833 834 835

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        pf->deblock_v_luma = x264_deblock_v_luma_altivec;
        pf->deblock_h_luma = x264_deblock_h_luma_altivec;
   }
#endif // ARCH_PPC
Loren Merritt's avatar
Loren Merritt committed
836
}
Laurent Aimar's avatar
Laurent Aimar committed
837

838 839

/* threading */
840
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
841
{
842
    x264_pthread_mutex_lock( &frame->mutex );
843
    frame->i_lines_completed = i_lines_completed;
844 845
    x264_pthread_cond_broadcast( &frame->cv );
    x264_pthread_mutex_unlock( &frame->mutex );
846 847 848 849
}

void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
{
850
    x264_pthread_mutex_lock( &frame->mutex );
851
    while( frame->i_lines_completed < i_lines_completed )
Loren Merritt's avatar