frame.c 33.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * frame.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
22 23 24 25
 *****************************************************************************/

#include "common.h"

26 27
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))

Laurent Aimar's avatar
Laurent Aimar committed
28 29
x264_frame_t *x264_frame_new( x264_t *h )
{
30
    x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31
    int i, j;
Laurent Aimar's avatar
Laurent Aimar committed
32

33
    int i_mb_count = h->mb.i_mb_count;
34
    int i_stride, i_width, i_lines;
35
    int i_padv = PADV << h->param.b_interlaced;
36
    int luma_plane_size;
37
    int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
Laurent Aimar's avatar
Laurent Aimar committed
38

39 40
    if( !frame ) return NULL;

41 42
    memset( frame, 0, sizeof(x264_frame_t) );

Laurent Aimar's avatar
Laurent Aimar committed
43
    /* allocate frame data (+64 for extra data for me) */
44 45 46
    i_width  = ALIGN( h->param.i_width, 16 );
    i_stride = ALIGN( i_width + 2*PADH, align );
    i_lines  = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
47

Laurent Aimar's avatar
Laurent Aimar committed
48 49 50
    frame->i_plane = 3;
    for( i = 0; i < 3; i++ )
    {
51 52 53
        frame->i_stride[i] = i_stride >> !!i;
        frame->i_width[i] = i_width >> !!i;
        frame->i_lines[i] = i_lines >> !!i;
Laurent Aimar's avatar
Laurent Aimar committed
54 55
    }

56 57
    luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
    for( i = 1; i < 3; i++ )
58
    {
59
        CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
60
        frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
61
    }
62 63 64 65
    /* all 4 luma planes allocated together, since the cacheline split code
     * requires them to be in-phase wrt cacheline alignment. */
    CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
    for( i = 0; i < 4; i++ )
66
        frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
67
    frame->plane[0] = frame->filtered[0];
68

69
    if( h->frames.b_have_lowres )
70
    {
71
        frame->i_width_lowres = frame->i_width[0]/2;
72
        frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
73
        frame->i_lines_lowres = frame->i_lines[0]/2;
74 75 76 77

        luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );

        CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
78
        for( i = 0; i < 4; i++ )
79
            frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
80
    }
81

82
    if( h->param.analyse.i_me_method >= X264_ME_ESA )
83
    {
84
        CHECKED_MALLOC( frame->buffer[3],
85
                        2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
86
        frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
87 88
    }

Laurent Aimar's avatar
Laurent Aimar committed
89 90 91
    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
    frame->i_qpplus1 = 0;
92 93
    frame->i_pts = -1;
    frame->i_frame = -1;
94
    frame->i_frame_num = -1;
Loren Merritt's avatar
Loren Merritt committed
95
    frame->i_lines_completed = -1;
Laurent Aimar's avatar
Laurent Aimar committed
96

97 98 99
    CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
    CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
    CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
100 101
    if( h->param.i_bframe )
    {
102 103
        CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
        CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
104 105 106 107 108 109 110
    }
    else
    {
        frame->mv[1]  = NULL;
        frame->ref[1] = NULL;
    }

111 112
    CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
    CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
113 114
    for( i = 0; i < h->param.i_bframe + 2; i++ )
        for( j = 0; j < h->param.i_bframe + 2; j++ )
115
            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
116

117 118
    x264_pthread_mutex_init( &frame->mutex, NULL );
    x264_pthread_cond_init( &frame->cv, NULL );
119

Laurent Aimar's avatar
Laurent Aimar committed
120
    return frame;
121 122 123 124

fail:
    x264_frame_delete( frame );
    return NULL;
Laurent Aimar's avatar
Laurent Aimar committed
125 126 127 128
}

void x264_frame_delete( x264_frame_t *frame )
{
129
    int i, j;
130
    for( i = 0; i < 4; i++ )
131
        x264_free( frame->buffer[i] );
132 133
    for( i = 0; i < 4; i++ )
        x264_free( frame->buffer_lowres[i] );
134 135 136 137 138
    for( i = 0; i < X264_BFRAME_MAX+2; i++ )
        for( j = 0; j < X264_BFRAME_MAX+2; j++ )
            x264_free( frame->i_row_satds[i][j] );
    x264_free( frame->i_row_bits );
    x264_free( frame->i_row_qp );
139
    x264_free( frame->mb_type );
140 141 142 143
    x264_free( frame->mv[0] );
    x264_free( frame->mv[1] );
    x264_free( frame->ref[0] );
    x264_free( frame->ref[1] );
144 145
    x264_pthread_mutex_destroy( &frame->mutex );
    x264_pthread_cond_destroy( &frame->cv );
Laurent Aimar's avatar
Laurent Aimar committed
146 147 148
    x264_free( frame );
}

149
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
Laurent Aimar's avatar
Laurent Aimar committed
150
{
151
    int i_csp = src->img.i_csp & X264_CSP_MASK;
152 153 154 155 156 157 158
    int i;
    if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
    {
        x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
        return -1;
    }

Laurent Aimar's avatar
Laurent Aimar committed
159 160 161 162
    dst->i_type     = src->i_type;
    dst->i_qpplus1  = src->i_qpplus1;
    dst->i_pts      = src->i_pts;

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
    for( i=0; i<3; i++ )
    {
        int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
        uint8_t *plane = src->img.plane[s];
        int stride = src->img.i_stride[s];
        int width = h->param.i_width >> !!i;
        int height = h->param.i_height >> !!i;
        if( src->img.i_csp & X264_CSP_VFLIP )
        {
            plane += (height-1)*stride;
            stride = -stride;
        }
        h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
    }
    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
178 179 180 181
}



Loren Merritt's avatar
Loren Merritt committed
182
static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
183 184 185 186 187 188
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
    int y;
    for( y = 0; y < i_height; y++ )
    {
        /* left band */
189
        memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
190
        /* right band */
191
        memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
192 193
    }
    /* upper band */
Loren Merritt's avatar
Loren Merritt committed
194
    if( b_pad_top )
195 196
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
197
    /* lower band */
Loren Merritt's avatar
Loren Merritt committed
198
    if( b_pad_bottom )
199 200
    for( y = 0; y < i_padv; y++ )
        memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
201 202 203
#undef PPIXEL
}

Loren Merritt's avatar
Loren Merritt committed
204
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
Laurent Aimar's avatar
Laurent Aimar committed
205
{
206
    int i;
Loren Merritt's avatar
Loren Merritt committed
207 208 209
    int b_start = !mb_y;
    if( mb_y & h->sh.b_mbaff )
        return;
Laurent Aimar's avatar
Laurent Aimar committed
210 211
    for( i = 0; i < frame->i_plane; i++ )
    {
212 213
        int stride = frame->i_stride[i];
        int width = 16*h->sps->i_mb_width >> !!i;
Loren Merritt's avatar
Loren Merritt committed
214
        int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
215 216
        int padh = PADH >> !!i;
        int padv = PADV >> !!i;
Loren Merritt's avatar
Loren Merritt committed
217 218 219 220 221
        // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
        uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
        if( b_end && !b_start )
            height += 4 >> (!!i + h->sh.b_mbaff);
        if( h->sh.b_mbaff )
222
        {
Loren Merritt's avatar
Loren Merritt committed
223 224
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
225 226 227
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
228
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
229
        }
Laurent Aimar's avatar
Laurent Aimar committed
230 231 232
    }
}

Loren Merritt's avatar
Loren Merritt committed
233
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
234
{
Loren Merritt's avatar
Loren Merritt committed
235
    /* during filtering, 8 extra pixels were filtered on each edge,
Loren Merritt's avatar
Loren Merritt committed
236
     * but up to 3 of the horizontal ones may be wrong.
237
       we want to expand border from the last filtered pixel */
Loren Merritt's avatar
Loren Merritt committed
238
    int b_start = !mb_y;
239
    int stride = frame->i_stride[0];
Loren Merritt's avatar
Loren Merritt committed
240
    int width = 16*h->sps->i_mb_width + 8;
Loren Merritt's avatar
Loren Merritt committed
241
    int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
Loren Merritt's avatar
Loren Merritt committed
242
    int padh = PADH - 4;
243
    int padv = PADV - 8;
244
    int i;
245
    for( i = 1; i < 4; i++ )
246
    {
Loren Merritt's avatar
Loren Merritt committed
247
        // buffer: 8 luma, to match the hpel filter
Loren Merritt's avatar
Loren Merritt committed
248
        uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
Loren Merritt's avatar
Loren Merritt committed
249
        if( h->sh.b_mbaff )
250
        {
Loren Merritt's avatar
Loren Merritt committed
251 252
            plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
253 254 255
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
256
            plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
257 258
        }
    }
259 260
}

261
void x264_frame_expand_border_lowres( x264_frame_t *frame )
262
{
263
    int i;
264
    for( i = 0; i < 4; i++ )
Loren Merritt's avatar
Loren Merritt committed
265
        plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
266 267
}

Loren Merritt's avatar
Loren Merritt committed
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
    int i, y;
    for( i = 0; i < frame->i_plane; i++ )
    {
        int i_subsample = i ? 1 : 0;
        int i_width = h->param.i_width >> i_subsample;
        int i_height = h->param.i_height >> i_subsample;
        int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
        int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;

        if( i_padx )
        {
            for( y = 0; y < i_height; y++ )
                memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
                         frame->plane[i][y*frame->i_stride[i] + i_width - 1],
                         i_padx );
        }
        if( i_pady )
        {
288
            //FIXME interlace? or just let it pad using the wrong field
Loren Merritt's avatar
Loren Merritt committed
289
            for( y = i_height; y < i_height + i_pady; y++ )
Loren Merritt's avatar
Loren Merritt committed
290 291 292 293 294 295 296
                memcpy( &frame->plane[i][y*frame->i_stride[i]],
                        &frame->plane[i][(i_height-1)*frame->i_stride[i]],
                        i_width + i_padx );
        }
    }
}

297

298 299
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 * entropy coding, but per 64 coeffs for the purpose of deblocking */
Loic Le Loarer's avatar
Loic Le Loarer committed
300
static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
301 302 303
{
    uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
304
    int x, nnz;
305 306 307 308 309
    for( x=0; x<h->sps->i_mb_width; x++ )
    {
        memcpy( buf+x, src+x, 16 );
        if( transform[x] )
        {
310 311 312 313
            nnz = src[x][0] | src[x][1];
            src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
            nnz = src[x][2] | src[x][3];
            src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
        }
    }
}

static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
{
    uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
    int x;
    for( x=0; x<h->sps->i_mb_width; x++ )
        memcpy( dst+x, buf+x, 16 );
}

static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
{
    func( h, mb_y, buf );
    if( mb_y > 0 )
        func( h, mb_y-1, buf + h->sps->i_mb_width );
    if( h->sh.b_mbaff )
    {
        func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
        if( mb_y > 0 )
            func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
    }
}


340
/* Deblocking filter */
341
static const uint8_t i_alpha_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
342
{
343
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
344 345 346 347 348
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
    80, 90,101,113,127,144,162,182,203,226,
349 350
   255,255,
   255,255,255,255,255,255,255,255,255,255,255,255,
Laurent Aimar's avatar
Laurent Aimar committed
351
};
352
static const uint8_t i_beta_table[52+12*2] =
Laurent Aimar's avatar
Laurent Aimar committed
353
{
354
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
Laurent Aimar's avatar
Laurent Aimar committed
355 356 357 358 359
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
360 361
    18, 18,
    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
Laurent Aimar's avatar
Laurent Aimar committed
362
};
363
static const int8_t i_tc0_table[52+12*2][4] =
Laurent Aimar's avatar
Laurent Aimar committed
364
{
365 366
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
Fiona Glaser's avatar
Fiona Glaser committed
367 368 369 370 371 372 373 374
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
    {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
    {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
    {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
    {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
    {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
375 376 377
    {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
    {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
Laurent Aimar's avatar
Laurent Aimar committed
378
};
379 380 381
#define alpha_table(x) i_alpha_table[(x)+12]
#define beta_table(x)  i_beta_table[(x)+12]
#define tc0_table(x)   i_tc0_table[(x)+12]
Laurent Aimar's avatar
Laurent Aimar committed
382 383

/* From ffmpeg */
Loren Merritt's avatar
Loren Merritt committed
384
static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
385 386
{
    int i, d;
387 388 389 390
    for( i = 0; i < 4; i++ )
    {
        if( tc0[i] < 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
391
            pix += 4*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
392 393
            continue;
        }
394 395
        for( d = 0; d < 4; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
396 397 398 399 400 401
            const int p2 = pix[-3*xstride];
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];
            const int q2 = pix[ 2*xstride];
402 403 404

            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
405 406
                int tc = tc0[i];
                int delta;
407 408
                if( abs( p2 - p0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
409
                    pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
Fiona Glaser's avatar
Fiona Glaser committed
410
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
411
                }
412 413
                if( abs( q2 - q0 ) < beta )
                {
Loren Merritt's avatar
Loren Merritt committed
414 415
                    pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
                    tc++;
Laurent Aimar's avatar
Laurent Aimar committed
416
                }
417

Loren Merritt's avatar
Loren Merritt committed
418
                delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
419 420
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
421
            }
Loren Merritt's avatar
Loren Merritt committed
422
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
423 424 425
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
426 427
static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
Loren Merritt's avatar
Loren Merritt committed
428
    deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
Loren Merritt's avatar
Loren Merritt committed
429 430 431 432 433
}
static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
    deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
434

Loren Merritt's avatar
Loren Merritt committed
435
static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
Laurent Aimar's avatar
Laurent Aimar committed
436 437
{
    int i, d;
438 439
    for( i = 0; i < 4; i++ )
    {
Loren Merritt's avatar
Loren Merritt committed
440
        const int tc = tc0[i];
441 442
        if( tc <= 0 )
        {
Loren Merritt's avatar
Loren Merritt committed
443
            pix += 2*ystride;
Laurent Aimar's avatar
Laurent Aimar committed
444 445
            continue;
        }
446 447
        for( d = 0; d < 2; d++ )
        {
Loren Merritt's avatar
Loren Merritt committed
448 449 450 451 452
            const int p1 = pix[-2*xstride];
            const int p0 = pix[-1*xstride];
            const int q0 = pix[ 0*xstride];
            const int q1 = pix[ 1*xstride];

453 454
            if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
            {
Loren Merritt's avatar
Loren Merritt committed
455
                int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
456 457
                pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
                pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
458
            }
Loren Merritt's avatar
Loren Merritt committed
459
            pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
460 461 462
        }
    }
}
Loren Merritt's avatar
Loren Merritt committed
463
static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
464
{
Loren Merritt's avatar
Loren Merritt committed
465 466 467
    deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
468
{
Loren Merritt's avatar
Loren Merritt committed
469 470
    deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
}
Laurent Aimar's avatar
Laurent Aimar committed
471

Loren Merritt's avatar
Loren Merritt committed
472
static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
Laurent Aimar's avatar
Laurent Aimar committed
473
{
Loren Merritt's avatar
Loren Merritt committed
474
    int d;
475 476
    for( d = 0; d < 16; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
477 478 479 480 481 482 483
        const int p2 = pix[-3*xstride];
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];
        const int q2 = pix[ 2*xstride];

484 485 486 487 488
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
            if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
            {
                if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
Laurent Aimar's avatar
Laurent Aimar committed
489
                {
Loren Merritt's avatar
Loren Merritt committed
490 491 492 493
                    const int p3 = pix[-4*xstride];
                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
494
                }
495 496 497
                else /* p0' */
                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
Laurent Aimar's avatar
Laurent Aimar committed
498
                {
Loren Merritt's avatar
Loren Merritt committed
499 500 501 502
                    const int q3 = pix[3*xstride];
                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
Laurent Aimar's avatar
Laurent Aimar committed
503
                }
504 505 506 507 508
                else /* q0' */
                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
            }
            else /* p0', q0' */
            {
Loren Merritt's avatar
Loren Merritt committed
509 510
                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
Laurent Aimar's avatar
Laurent Aimar committed
511 512
            }
        }
Loren Merritt's avatar
Loren Merritt committed
513
        pix += ystride;
Laurent Aimar's avatar
Laurent Aimar committed
514 515
    }
}
Loren Merritt's avatar
Loren Merritt committed
516
static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
517
{
Loren Merritt's avatar
Loren Merritt committed
518 519 520
    deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
521
{
Loren Merritt's avatar
Loren Merritt committed
522 523
    deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
Laurent Aimar's avatar
Laurent Aimar committed
524

Loren Merritt's avatar
Loren Merritt committed
525
static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
526
{
Loren Merritt's avatar
Loren Merritt committed
527
    int d;
528 529
    for( d = 0; d < 8; d++ )
    {
Loren Merritt's avatar
Loren Merritt committed
530 531 532 533 534
        const int p1 = pix[-2*xstride];
        const int p0 = pix[-1*xstride];
        const int q0 = pix[ 0*xstride];
        const int q1 = pix[ 1*xstride];

535 536
        if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
        {
Loren Merritt's avatar
Loren Merritt committed
537 538
            pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
            pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
Laurent Aimar's avatar
Laurent Aimar committed
539
        }
Loren Merritt's avatar
Loren Merritt committed
540 541 542 543
        pix += ystride;
    }
}
static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
544
{
Loren Merritt's avatar
Loren Merritt committed
545 546 547
    deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
548
{
Loren Merritt's avatar
Loren Merritt committed
549 550 551
    deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}

Fiona Glaser's avatar
Fiona Glaser committed
552 553
static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
554 555 556
    const int index_a = i_qp + h->sh.i_alpha_c0_offset;
    const int alpha = alpha_table(index_a);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Fiona Glaser's avatar
Fiona Glaser committed
557 558 559 560 561
    int8_t tc[4];

    if( !alpha || !beta )
        return;

562 563 564 565
    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
Fiona Glaser's avatar
Fiona Glaser committed
566

567
    pf_inter( pix1, i_stride, alpha, beta, tc );
568
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
569 570 571 572
        pf_inter( pix2, i_stride, alpha, beta, tc );
}

static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
Loren Merritt's avatar
Loren Merritt committed
573
{
574 575
    const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
    const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
Loren Merritt's avatar
Loren Merritt committed
576

Fiona Glaser's avatar
Fiona Glaser committed
577 578 579
    if( !alpha || !beta )
        return;

580
    pf_intra( pix1, i_stride, alpha, beta );
581
    if( b_chroma )
Fiona Glaser's avatar
Fiona Glaser committed
582
        pf_intra( pix2, i_stride, alpha, beta );
Laurent Aimar's avatar
Laurent Aimar committed
583 584
}

Loren Merritt's avatar
Loren Merritt committed
585
void x264_frame_deblock_row( x264_t *h, int mb_y )
Laurent Aimar's avatar
Laurent Aimar committed
586 587 588
{
    const int s8x8 = 2 * h->mb.i_mb_stride;
    const int s4x4 = 4 * h->mb.i_mb_stride;
Loren Merritt's avatar
Loren Merritt committed
589
    const int b_interlaced = h->sh.b_mbaff;
590
    const int mvy_limit = 4 >> b_interlaced;
591
    const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
Loren Merritt's avatar
Loren Merritt committed
592
    int mb_x;
593 594 595 596
    int stridey   = h->fdec->i_stride[0];
    int stride2y  = stridey << b_interlaced;
    int strideuv  = h->fdec->i_stride[1];
    int stride2uv = strideuv << b_interlaced;
597

598 599 600
    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );

601
    for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
Laurent Aimar's avatar
Laurent Aimar committed
602 603 604 605
    {
        const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
        const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
        const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
606
        const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
607 608 609 610 611
        const int i_qp = h->mb.qp[mb_xy];
        int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
        uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
        uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
        uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
612 613
        if( b_interlaced && (mb_y&1) )
        {
614 615 616
            pixy -= 15*stridey;
            pixu -=  7*strideuv;
            pixv -=  7*strideuv;
617 618
        }

619 620
        x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );

621 622 623
        if( i_qp <= qp_thresh )
            i_edge_end = 1;

Fiona Glaser's avatar
Fiona Glaser committed
624 625 626 627 628 629 630
        #define FILTER_DIR(intra, i_dir)\
        {\
            /* Y plane */\
            i_qpn= h->mb.qp[mbn_xy];\
            if( i_dir == 0 )\
            {\
                /* vertical edge */\
631 632
                deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
633 634 635 636
                              h->loopf.deblock_h_luma##intra );\
                if( !(i_edge & 1) )\
                {\
                    /* U/V planes */\
637 638 639
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
640 641 642 643 644 645
                                  h->loopf.deblock_h_chroma##intra );\
                }\
            }\
            else\
            {\
                /* horizontal edge */\
646 647
                deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
                              stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
Fiona Glaser's avatar
Fiona Glaser committed
648 649 650 651
                              h->loopf.deblock_v_luma##intra );\
                /* U/V planes */\
                if( !(i_edge & 1) )\
                {\
652 653 654
                    int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
                    deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
                                  stride2uv, bS, i_qpc, 1,\
Fiona Glaser's avatar
Fiona Glaser committed
655 656 657 658 659
                                  h->loopf.deblock_v_chroma##intra );\
                }\
            }\
        }

660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
        #define DEBLOCK_STRENGTH(i_dir)\
        {\
            /* *** Get bS for each 4px for the current edge *** */\
            if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                *(uint32_t*)bS = 0x03030303;\
            else\
            {\
                *(uint32_t*)bS = 0x00000000;\
                for( i = 0; i < 4; i++ )\
                {\
                    int x  = i_dir == 0 ? i_edge : i;\
                    int y  = i_dir == 0 ? i      : i_edge;\
                    int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
                    int yn = i_dir == 0 ? y : (y - 1)&0x03;\
                    if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
                        h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
                        bS[i] = 2;\
                    else\
                    {\
                        /* FIXME: A given frame may occupy more than one position in\
                         * the reference list. So we should compare the frame numbers,\
                         * not the indices in the ref list.\
                         * No harm yet, as we don't generate that case.*/\
                        int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
                        int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
                        int i4p= mb_4x4+x+y*s4x4;\
                        int i4q= mbn_4x4+xn+yn*s4x4;\
                        for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
                            if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
                                abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
                                abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
                            {\
                                bS[i] = 1;\
                                break;\
                            }\
                    }\
                }\
            }\
        }

Laurent Aimar's avatar
Laurent Aimar committed
700 701
        /* i_dir == 0 -> vertical edge
         * i_dir == 1 -> horizontal edge */
702
        #define DEBLOCK_DIR(i_dir)\
Fiona Glaser's avatar
Fiona Glaser committed
703
        {\
Fiona Glaser's avatar
Fiona Glaser committed
704
            int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
705
            int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
706 707 708 709 710 711 712 713 714
            DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
            if( i_edge )\
                i_edge+= b_8x8_transform;\
            else\
            {\
                mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
                mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
                mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
                if( b_interlaced && i_dir == 1 )\
Fiona Glaser's avatar
Fiona Glaser committed
715 716 717 718 719
                {\
                    mbn_xy -= h->mb.i_mb_stride;\
                    mbn_8x8 -= 2 * s8x8;\
                    mbn_4x4 -= 4 * s4x4;\
                }\
Fiona Glaser's avatar
Fiona Glaser committed
720 721 722
                else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                {\
                    FILTER_DIR( _intra, i_dir );\
723
                    goto end##i_dir;\
Fiona Glaser's avatar
Fiona Glaser committed
724
                }\
725 726 727 728 729
                DEBLOCK_STRENGTH(i_dir);\
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
                end##i_dir:\
                i_edge += b_8x8_transform+1;\
Fiona Glaser's avatar
Fiona Glaser committed
730
            }\
731 732 733
            mbn_xy  = mb_xy;\
            mbn_8x8 = mb_8x8;\
            mbn_4x4 = mb_4x4;\
Fiona Glaser's avatar
Fiona Glaser committed
734 735
            for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
            {\
736
                DEBLOCK_STRENGTH(i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
737 738
                if( *(uint32_t*)bS )\
                    FILTER_DIR( , i_dir);\
Fiona Glaser's avatar
Fiona Glaser committed
739
            }\
Laurent Aimar's avatar
Laurent Aimar committed
740 741
        }

742 743
        DEBLOCK_DIR(0);
        DEBLOCK_DIR(1);
Laurent Aimar's avatar
Laurent Aimar committed
744
    }
745 746 747

    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
Laurent Aimar's avatar
Laurent Aimar committed
748 749
}

Loren Merritt's avatar
Loren Merritt committed
750 751 752 753 754 755 756
void x264_frame_deblock( x264_t *h )
{
    int mb_y;
    for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
        x264_frame_deblock_row( h, mb_y );
}

757
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
758 759 760 761 762 763 764
void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );

void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
765 766
void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
767
#ifdef ARCH_X86
Loren Merritt's avatar
Loren Merritt committed
768 769
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
Loren Merritt's avatar
Loren Merritt committed
770 771
void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
Loren Merritt's avatar
Loren Merritt committed
772

Loic Le Loarer's avatar
Loic Le Loarer committed
773
static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
Loren Merritt's avatar
Loren Merritt committed
774 775 776 777
{
    x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
    x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
Loic Le Loarer's avatar
Loic Le Loarer committed
778
static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
Loren Merritt's avatar
Loren Merritt committed
779 780 781 782
{
    x264_deblock_v8_luma_intra_mmxext( pix,   stride, alpha, beta );
    x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
}
Loren Merritt's avatar
Loren Merritt committed
783
#endif
Loren Merritt's avatar
Loren Merritt committed
784
#endif
Laurent Aimar's avatar
Laurent Aimar committed
785

786 787 788 789 790
#ifdef ARCH_PPC
void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
#endif // ARCH_PPC

Loren Merritt's avatar
Loren Merritt committed
791 792 793 794 795 796 797 798 799 800 801
void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
{
    pf->deblock_v_luma = deblock_v_luma_c;
    pf->deblock_h_luma = deblock_h_luma_c;
    pf->deblock_v_chroma = deblock_v_chroma_c;
    pf->deblock_h_chroma = deblock_h_chroma_c;
    pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
    pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
    pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
    pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;

802
#ifdef HAVE_MMX
Loren Merritt's avatar
Loren Merritt committed
803 804 805 806 807 808
    if( cpu&X264_CPU_MMXEXT )
    {
        pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
        pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
        pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
        pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
809 810 811
#ifdef ARCH_X86
        pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
        pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
Loren Merritt's avatar
Loren Merritt committed
812 813
        pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
        pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
814
#endif
815
        if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
Loren Merritt's avatar
Loren Merritt committed
816 817 818
        {
            pf->deblock_v_luma = x264_deblock_v_luma_sse2;
            pf->deblock_h_luma = x264_deblock_h_luma_sse2;
Loren Merritt's avatar
Loren Merritt committed
819 820
            pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
            pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
Loren Merritt's avatar
Loren Merritt committed
821 822
        }
    }
823
#endif
824 825 826 827 828 829 830 831

#ifdef ARCH_PPC
    if( cpu&X264_CPU_ALTIVEC )
    {
        pf->deblock_v_luma = x264_deblock_v_luma_altivec;
        pf->deblock_h_luma = x264_deblock_h_luma_altivec;
   }
#endif // ARCH_PPC
Loren Merritt's avatar
Loren Merritt committed
832
}
Laurent Aimar's avatar
Laurent Aimar committed
833

834 835

/* threading */
836
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
837
{
838
    x264_pthread_mutex_lock( &frame->mutex );
839
    frame->i_lines_completed = i_lines_completed;
840 841
    x264_pthread_cond_broadcast( &frame->cv );
    x264_pthread_mutex_unlock( &frame->mutex );
842 843 844 845
}

void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
{
846
    x264_pthread_mutex_lock( &frame->mutex );
847
    while( frame->i_lines_completed < i_lines_completed )
848 849
        x264_pthread_cond_wait( &frame->cv, &frame->mutex );
    x264_pthread_mutex_unlock( &frame->mutex );
850 851
}