frame.c 32 KB
Newer Older
1
/*****************************************************************************
2
 * frame.c: frame handling
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2019 x264 project
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23 24 25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
26 27 28 29
 *****************************************************************************/

#include "common.h"

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
static int align_stride( int x, int align, int disalign )
{
    x = ALIGN( x, align );
    if( !(x&(disalign-1)) )
        x += align;
    return x;
}

static int align_plane_size( int x, int disalign )
{
    if( !(x&(disalign-1)) )
        x += 128;
    return x;
}

45
static int frame_internal_csp( int external_csp )
46
{
47 48 49 50 51 52 53 54 55 56
    int csp = external_csp & X264_CSP_MASK;
    if( csp == X264_CSP_I400 )
        return X264_CSP_I400;
    if( csp >= X264_CSP_I420 && csp < X264_CSP_I422 )
        return X264_CSP_NV12;
    if( csp >= X264_CSP_I422 && csp < X264_CSP_I444 )
        return X264_CSP_NV16;
    if( csp >= X264_CSP_I444 && csp <= X264_CSP_RGB )
        return X264_CSP_I444;
    return X264_CSP_NONE;
Fiona Glaser's avatar
Fiona Glaser committed
57
}
58

59
static x264_frame_t *frame_new( x264_t *h, int b_fdec )
Fiona Glaser's avatar
Fiona Glaser committed
60 61
{
    x264_frame_t *frame;
62
    int i_csp = frame_internal_csp( h->param.i_csp );
63
    int i_mb_count = h->mb.i_mb_count;
Henrik Gramner's avatar
Henrik Gramner committed
64
    int i_stride, i_width, i_lines, luma_plane_count;
65
    int i_padv = PADV << PARAM_INTERLACED;
66 67
    int align = 16;
#if ARCH_X86 || ARCH_X86_64
68
    if( h->param.cpu&X264_CPU_CACHELINE_64 || h->param.cpu&X264_CPU_AVX512 )
69
        align = 64;
70
    else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX )
71 72
        align = 32;
#endif
73 74 75 76 77
#if ARCH_PPC
    int disalign = 1<<9;
#else
    int disalign = 1<<10;
#endif
78

79 80 81
    /* ensure frame alignment after PADH is added */
    int padh_align = X264_MAX( align - PADH * sizeof(pixel), 0 ) / sizeof(pixel);

82
    CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
83
    PREALLOC_INIT
84

85
    /* allocate frame data (+64 for extra data for me) */
86 87
    i_width  = h->mb.i_mb_width*16;
    i_lines  = h->mb.i_mb_height*16;
88
    i_stride = align_stride( i_width + 2*PADH, align, disalign );
89

Henrik Gramner's avatar
Henrik Gramner committed
90
    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
91
    {
Henrik Gramner's avatar
Henrik Gramner committed
92
        luma_plane_count = 1;
Fiona Glaser's avatar
Fiona Glaser committed
93 94 95 96
        frame->i_plane = 2;
        for( int i = 0; i < 2; i++ )
        {
            frame->i_width[i] = i_width >> i;
Henrik Gramner's avatar
Henrik Gramner committed
97
            frame->i_lines[i] = i_lines >> (i && i_csp == X264_CSP_NV12);
Fiona Glaser's avatar
Fiona Glaser committed
98 99
            frame->i_stride[i] = i_stride;
        }
100
    }
Fiona Glaser's avatar
Fiona Glaser committed
101 102
    else if( i_csp == X264_CSP_I444 )
    {
Henrik Gramner's avatar
Henrik Gramner committed
103
        luma_plane_count = 3;
Fiona Glaser's avatar
Fiona Glaser committed
104 105 106 107 108 109 110 111
        frame->i_plane = 3;
        for( int i = 0; i < 3; i++ )
        {
            frame->i_width[i] = i_width;
            frame->i_lines[i] = i_lines;
            frame->i_stride[i] = i_stride;
        }
    }
112 113 114 115 116 117 118 119
    else if( i_csp == X264_CSP_I400 )
    {
        luma_plane_count = 1;
        frame->i_plane = 1;
        frame->i_width[0] = i_width;
        frame->i_lines[0] = i_lines;
        frame->i_stride[0] = i_stride;
    }
Fiona Glaser's avatar
Fiona Glaser committed
120 121
    else
        goto fail;
Steven Walters's avatar
Steven Walters committed
122

123
    frame->i_csp = i_csp;
124 125 126 127
    frame->i_width_lowres = frame->i_width[0]/2;
    frame->i_lines_lowres = frame->i_lines[0]/2;
    frame->i_stride_lowres = align_stride( frame->i_width_lowres + 2*PADH, align, disalign<<1 );

128 129
    for( int i = 0; i < h->param.i_bframe + 2; i++ )
        for( int j = 0; j < h->param.i_bframe + 2; j++ )
130
            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
Steven Walters's avatar
Steven Walters committed
131 132 133

    frame->i_poc = -1;
    frame->i_type = X264_TYPE_AUTO;
134
    frame->i_qpplus1 = X264_QP_AUTO;
Steven Walters's avatar
Steven Walters committed
135 136 137 138 139
    frame->i_pts = -1;
    frame->i_frame = -1;
    frame->i_frame_num = -1;
    frame->i_lines_completed = -1;
    frame->b_fdec = b_fdec;
140 141 142 143 144 145 146 147 148
    frame->i_pic_struct = PIC_STRUCT_AUTO;
    frame->i_field_cnt = -1;
    frame->i_duration =
    frame->i_cpb_duration =
    frame->i_dpb_output_delay =
    frame->i_cpb_delay = 0;
    frame->i_coded_fields_lookahead =
    frame->i_cpb_delay_lookahead = -1;

149
    frame->orig = frame;
Steven Walters's avatar
Steven Walters committed
150

Henrik Gramner's avatar
Henrik Gramner committed
151
    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
Simon Horlick's avatar
Simon Horlick committed
152
    {
Henrik Gramner's avatar
Henrik Gramner committed
153 154
        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
        int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
155
        PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
Fiona Glaser's avatar
Fiona Glaser committed
156
        if( PARAM_INTERLACED )
157
            PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
Simon Horlick's avatar
Simon Horlick committed
158
    }
159

160 161
    /* all 4 luma planes allocated together, since the cacheline split code
     * requires them to be in-phase wrt cacheline alignment. */
Fiona Glaser's avatar
Fiona Glaser committed
162 163

    for( int p = 0; p < luma_plane_count; p++ )
164
    {
Fiona Glaser's avatar
Fiona Glaser committed
165 166
        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
        if( h->param.analyse.i_subpel_refine && b_fdec )
167 168 169 170 171 172
            luma_plane_size *= 4;

        /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
        PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * sizeof(pixel) );
        if( PARAM_INTERLACED )
            PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * sizeof(pixel) );
173
    }
174

175 176
    frame->b_duplicate = 0;

Steven Walters's avatar
Steven Walters committed
177
    if( b_fdec ) /* fdec frame */
178
    {
179 180 181 182 183
        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
Steven Walters's avatar
Steven Walters committed
184 185
        if( h->param.i_bframe )
        {
186 187
            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
Steven Walters's avatar
Steven Walters committed
188 189 190 191 192 193
        }
        else
        {
            frame->mv[1]  = NULL;
            frame->ref[1] = NULL;
        }
194 195 196
        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
Steven Walters's avatar
Steven Walters committed
197
        if( h->param.analyse.i_me_method >= X264_ME_ESA )
198
            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
199
        if( PARAM_INTERLACED )
200
            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
201
        if( h->param.analyse.b_mb_info )
202
            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
203
    }
Steven Walters's avatar
Steven Walters committed
204
    else /* fenc frame */
205
    {
Steven Walters's avatar
Steven Walters committed
206 207
        if( h->frames.b_have_lowres )
        {
Fiona Glaser's avatar
Fiona Glaser committed
208
            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
209

210
            PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
211

212 213
            for( int j = 0; j <= !!h->param.i_bframe; j++ )
                for( int i = 0; i <= h->param.i_bframe; i++ )
Steven Walters's avatar
Steven Walters committed
214
                {
215 216
                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
Steven Walters's avatar
Steven Walters committed
217
                }
218
            PREALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint16_t) );
219 220
            for( int j = 0; j <= h->param.i_bframe+1; j++ )
                for( int i = 0; i <= h->param.i_bframe+1; i++ )
221
                    PREALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
222

223 224
            /* mbtree asm can overread the input buffers, make sure we don't read outside of allocated memory. */
            prealloc_size += NATIVE_ALIGN;
Steven Walters's avatar
Steven Walters committed
225 226 227
        }
        if( h->param.rc.i_aq_mode )
        {
228 229
            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
Steven Walters's avatar
Steven Walters committed
230
            if( h->frames.b_have_lowres )
231 232 233 234 235 236 237 238 239
                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
        }
    }

    PREALLOC_END( frame->base );

    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
    {
        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
240
        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
241
        if( PARAM_INTERLACED )
242
            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
243 244 245 246 247 248 249 250 251
    }

    for( int p = 0; p < luma_plane_count; p++ )
    {
        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
        if( h->param.analyse.i_subpel_refine && b_fdec )
        {
            for( int i = 0; i < 4; i++ )
            {
252 253
                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
254 255 256 257 258 259
            }
            frame->plane[p] = frame->filtered[p][0];
            frame->plane_fld[p] = frame->filtered_fld[p][0];
        }
        else
        {
260 261
            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
        }
    }

    if( b_fdec )
    {
        M32( frame->mv16x16[0] ) = 0;
        frame->mv16x16++;

        if( h->param.analyse.i_me_method >= X264_ME_ESA )
            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
    }
    else
    {
        if( h->frames.b_have_lowres )
        {
            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
            for( int i = 0; i < 4; i++ )
279
                frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
280 281 282 283 284 285 286 287 288

            for( int j = 0; j <= !!h->param.i_bframe; j++ )
                for( int i = 0; i <= h->param.i_bframe; i++ )
                    memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );

            frame->i_intra_cost = frame->lowres_costs[0][0];
            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );

            if( h->param.rc.i_aq_mode )
Steven Walters's avatar
Steven Walters committed
289
                /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
290
                memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
Steven Walters's avatar
Steven Walters committed
291
        }
292
    }
293

294 295 296 297
    if( x264_pthread_mutex_init( &frame->mutex, NULL ) )
        goto fail;
    if( x264_pthread_cond_init( &frame->cv, NULL ) )
        goto fail;
298

299 300 301 302
#if HAVE_OPENCL
    frame->opencl.ocl = h->opencl.ocl;
#endif

303
    return frame;
304 305

fail:
306
    x264_free( frame );
307
    return NULL;
308 309 310 311
}

void x264_frame_delete( x264_frame_t *frame )
{
312 313 314 315
    /* Duplicate frames are blank copies of real frames (including pointers),
     * so freeing those pointers would cause a double free later. */
    if( !frame->b_duplicate )
    {
316 317
        x264_free( frame->base );

318 319 320 321 322 323 324 325 326 327
        if( frame->param && frame->param->param_free )
            frame->param->param_free( frame->param );
        if( frame->mb_info_free )
            frame->mb_info_free( frame->mb_info );
        if( frame->extra_sei.sei_free )
        {
            for( int i = 0; i < frame->extra_sei.num_payloads; i++ )
                frame->extra_sei.sei_free( frame->extra_sei.payloads[i].payload );
            frame->extra_sei.sei_free( frame->extra_sei.payloads );
        }
328 329
        x264_pthread_mutex_destroy( &frame->mutex );
        x264_pthread_cond_destroy( &frame->cv );
Steve Borho's avatar
Steve Borho committed
330 331 332
#if HAVE_OPENCL
        x264_opencl_frame_delete( frame );
#endif
333
    }
334 335 336
    x264_free( frame );
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
static int get_plane_ptr( x264_t *h, x264_picture_t *src, uint8_t **pix, int *stride, int plane, int xshift, int yshift )
{
    int width = h->param.i_width >> xshift;
    int height = h->param.i_height >> yshift;
    *pix = src->img.plane[plane];
    *stride = src->img.i_stride[plane];
    if( src->img.i_csp & X264_CSP_VFLIP )
    {
        *pix += (height-1) * *stride;
        *stride = -*stride;
    }
    if( width > abs(*stride) )
    {
        x264_log( h, X264_LOG_ERROR, "Input picture width (%d) is greater than stride (%d)\n", width, *stride );
        return -1;
    }
    return 0;
}

Anton Mitrofanov's avatar
Anton Mitrofanov committed
356
#define get_plane_ptr(...) do { if( get_plane_ptr(__VA_ARGS__) < 0 ) return -1; } while( 0 )
357

358
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
359
{
360
    int i_csp = src->img.i_csp & X264_CSP_MASK;
361
    if( dst->i_csp != frame_internal_csp( i_csp ) )
362
    {
363
        x264_log( h, X264_LOG_ERROR, "Invalid input colorspace\n" );
364 365 366
        return -1;
    }

367
#if HIGH_BIT_DEPTH
368 369 370 371 372 373 374 375 376 377 378 379 380
    if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
    {
        x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
        return -1;
    }
#else
    if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
    {
        x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
        return -1;
    }
#endif

James Weaver's avatar
James Weaver committed
381 382 383 384 385 386
    if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 )
    {
        x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" );
        return -1;
    }

387 388 389 390 391 392 393 394 395
    if( src->i_type < X264_TYPE_AUTO || src->i_type > X264_TYPE_KEYFRAME )
    {
        x264_log( h, X264_LOG_WARNING, "forced frame type (%d) at %d is unknown\n", src->i_type, h->frames.i_input );
        dst->i_forced_type = X264_TYPE_AUTO;
    }
    else
        dst->i_forced_type = src->i_type;

    dst->i_type     = dst->i_forced_type;
396
    dst->i_qpplus1  = src->i_qpplus1;
397
    dst->i_pts      = dst->i_reordered_pts = src->i_pts;
398
    dst->param      = src->param;
399
    dst->i_pic_struct = src->i_pic_struct;
400
    dst->extra_sei  = src->extra_sei;
Kieran Kunhya's avatar
Kieran Kunhya committed
401
    dst->opaque     = src->opaque;
402 403
    dst->mb_info    = h->param.analyse.b_mb_info ? src->prop.mb_info : NULL;
    dst->mb_info_free = h->param.analyse.b_mb_info ? src->prop.mb_info_free : NULL;
404

405 406
    uint8_t *pix[3];
    int stride[3];
407 408 409 410 411 412 413
    if( i_csp == X264_CSP_YUYV || i_csp == X264_CSP_UYVY )
    {
        int p = i_csp == X264_CSP_UYVY;
        h->mc.plane_copy_deinterleave_yuyv( dst->plane[p], dst->i_stride[p], dst->plane[p^1], dst->i_stride[p^1],
                                            (pixel*)src->img.plane[0], src->img.i_stride[0], h->param.i_width, h->param.i_height );
    }
    else if( i_csp == X264_CSP_V210 )
James Weaver's avatar
James Weaver committed
414 415 416 417 418 419 420 421 422
    {
         stride[0] = src->img.i_stride[0];
         pix[0] = src->img.plane[0];

         h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0],
                                             dst->plane[1], dst->i_stride[1],
                                             (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height );
    }
    else if( i_csp >= X264_CSP_BGR )
423
    {
xvidfan's avatar
xvidfan committed
424 425 426 427 428 429 430
         stride[0] = src->img.i_stride[0];
         pix[0] = src->img.plane[0];
         if( src->img.i_csp & X264_CSP_VFLIP )
         {
             pix[0] += (h->param.i_height-1) * stride[0];
             stride[0] = -stride[0];
         }
431 432 433
         int b = i_csp==X264_CSP_RGB;
         h->mc.plane_copy_deinterleave_rgb( dst->plane[1+b], dst->i_stride[1+b],
                                            dst->plane[0], dst->i_stride[0],
xvidfan's avatar
xvidfan committed
434 435
                                            dst->plane[2-b], dst->i_stride[2-b],
                                            (pixel*)pix[0], stride[0]/sizeof(pixel), i_csp==X264_CSP_BGRA ? 4 : 3, h->param.i_width, h->param.i_height );
436
    }
xvidfan's avatar
xvidfan committed
437
    else
Fiona Glaser's avatar
Fiona Glaser committed
438
    {
439
        int v_shift = CHROMA_V_SHIFT;
xvidfan's avatar
xvidfan committed
440 441 442
        get_plane_ptr( h, src, &pix[0], &stride[0], 0, 0, 0 );
        h->mc.plane_copy( dst->plane[0], dst->i_stride[0], (pixel*)pix[0],
                          stride[0]/sizeof(pixel), h->param.i_width, h->param.i_height );
Henrik Gramner's avatar
Henrik Gramner committed
443
        if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
xvidfan's avatar
xvidfan committed
444
        {
Henrik Gramner's avatar
Henrik Gramner committed
445
            get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
xvidfan's avatar
xvidfan committed
446
            h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
Henrik Gramner's avatar
Henrik Gramner committed
447
                              stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
xvidfan's avatar
xvidfan committed
448
        }
Xiaolei Yu's avatar
Xiaolei Yu committed
449 450 451 452 453 454
        else if( i_csp == X264_CSP_NV21 )
        {
            get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
            h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
                                   stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
        }
Henrik Gramner's avatar
Henrik Gramner committed
455
        else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
xvidfan's avatar
xvidfan committed
456
        {
Henrik Gramner's avatar
Henrik Gramner committed
457 458 459
            int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
            get_plane_ptr( h, src, &pix[1], &stride[1], uv_swap ? 2 : 1, 1, v_shift );
            get_plane_ptr( h, src, &pix[2], &stride[2], uv_swap ? 1 : 2, 1, v_shift );
xvidfan's avatar
xvidfan committed
460 461 462
            h->mc.plane_copy_interleave( dst->plane[1], dst->i_stride[1],
                                         (pixel*)pix[1], stride[1]/sizeof(pixel),
                                         (pixel*)pix[2], stride[2]/sizeof(pixel),
Henrik Gramner's avatar
Henrik Gramner committed
463
                                         h->param.i_width>>1, h->param.i_height>>v_shift );
xvidfan's avatar
xvidfan committed
464
        }
465
        else if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
xvidfan's avatar
xvidfan committed
466 467 468 469 470 471 472 473
        {
            get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 );
            get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 );
            h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
                              stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height );
            h->mc.plane_copy( dst->plane[2], dst->i_stride[2], (pixel*)pix[2],
                              stride[2]/sizeof(pixel), h->param.i_width, h->param.i_height );
        }
Fiona Glaser's avatar
Fiona Glaser committed
474
    }
475
    return 0;
476 477
}

478
static ALWAYS_INLINE void pixel_memset( pixel *dst, pixel *src, int len, int size )
479
{
480
    uint8_t *dstp = (uint8_t*)dst;
481 482
    uint32_t v1 = *src;
    uint32_t v2 = size == 1 ? v1 + (v1 <<  8) : M16( src );
Fiona Glaser's avatar
Fiona Glaser committed
483 484 485 486 487 488
    uint32_t v4 = size <= 2 ? v2 + (v2 << 16) : M32( src );
    int i = 0;
    len *= size;

    /* Align the input pointer if it isn't already */
    if( (intptr_t)dstp & (WORD_SIZE - 1) )
489
    {
Fiona Glaser's avatar
Fiona Glaser committed
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
        if( size <= 2 && ((intptr_t)dstp & 3) )
        {
            if( size == 1 && ((intptr_t)dstp & 1) )
                dstp[i++] = v1;
            if( (intptr_t)dstp & 2 )
            {
                M16( dstp+i ) = v2;
                i += 2;
            }
        }
        if( WORD_SIZE == 8 && (intptr_t)dstp & 4 )
        {
            M32( dstp+i ) = v4;
            i += 4;
        }
505
    }
Fiona Glaser's avatar
Fiona Glaser committed
506 507 508 509 510 511 512 513 514 515 516 517 518

    /* Main copy loop */
    if( WORD_SIZE == 8 )
    {
        uint64_t v8 = v4 + ((uint64_t)v4<<32);
        for( ; i < len - 7; i+=8 )
            M64( dstp+i ) = v8;
    }
    for( ; i < len - 3; i+=4 )
        M32( dstp+i ) = v4;

    /* Finish up the last few bytes */
    if( size <= 2 )
519
    {
Fiona Glaser's avatar
Fiona Glaser committed
520 521 522 523 524 525 526
        if( i < len - 1 )
        {
            M16( dstp+i ) = v2;
            i += 2;
        }
        if( size == 1 && i != len )
            dstp[i] = v1;
527
    }
528
}
529

530
static ALWAYS_INLINE void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
531 532
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
533
    for( int y = 0; y < i_height; y++ )
534 535
    {
        /* left band */
536
        pixel_memset( PPIXEL(-i_padh, y), PPIXEL(0, y), i_padh>>b_chroma, sizeof(pixel)<<b_chroma );
537
        /* right band */
538
        pixel_memset( PPIXEL(i_width, y), PPIXEL(i_width-1-b_chroma, y), i_padh>>b_chroma, sizeof(pixel)<<b_chroma );
539 540
    }
    /* upper band */
Loren Merritt's avatar
Loren Merritt committed
541
    if( b_pad_top )
542
        for( int y = 0; y < i_padv; y++ )
543
            memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), (i_width+2*i_padh) * sizeof(pixel) );
544
    /* lower band */
Loren Merritt's avatar
Loren Merritt committed
545
    if( b_pad_bottom )
546
        for( int y = 0; y < i_padv; y++ )
547
            memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), (i_width+2*i_padh) * sizeof(pixel) );
548 549 550
#undef PPIXEL
}

551
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y )
552
{
553 554 555 556
    int pad_top = mb_y == 0;
    int pad_bot = mb_y == h->mb.i_mb_height - (1 << SLICE_MBAFF);
    int b_start = mb_y == h->i_threadslice_start;
    int b_end   = mb_y == h->i_threadslice_end - (1 << SLICE_MBAFF);
557
    if( mb_y & SLICE_MBAFF )
Loren Merritt's avatar
Loren Merritt committed
558
        return;
559
    for( int i = 0; i < frame->i_plane; i++ )
560
    {
561 562
        int h_shift = i && CHROMA_H_SHIFT;
        int v_shift = i && CHROMA_V_SHIFT;
563
        int stride = frame->i_stride[i];
Fiona Glaser's avatar
Fiona Glaser committed
564
        int width = 16*h->mb.i_mb_width;
565
        int height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> v_shift;
566
        int padh = PADH;
Henrik Gramner's avatar
Henrik Gramner committed
567
        int padv = PADV >> v_shift;
Loren Merritt's avatar
Loren Merritt committed
568 569
        // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
        if( b_end && !b_start )
Henrik Gramner's avatar
Henrik Gramner committed
570
            height += 4 >> (v_shift + SLICE_MBAFF);
Simon Horlick's avatar
Simon Horlick committed
571
        pixel *pix;
572
        int starty = 16*mb_y - 4*!b_start;
573
        if( SLICE_MBAFF )
574
        {
Simon Horlick's avatar
Simon Horlick committed
575
            // border samples for each field are extended separately
576 577 578
            pix = frame->plane_fld[i] + (starty*stride >> v_shift);
            plane_expand_border( pix, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift );
            plane_expand_border( pix+stride, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift );
Simon Horlick's avatar
Simon Horlick committed
579

580
            height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) : 32) >> v_shift;
Simon Horlick's avatar
Simon Horlick committed
581
            if( b_end && !b_start )
Henrik Gramner's avatar
Henrik Gramner committed
582
                height += 4 >> v_shift;
583 584
            pix = frame->plane[i] + (starty*stride >> v_shift);
            plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift );
585 586 587
        }
        else
        {
588 589
            pix = frame->plane[i] + (starty*stride >> v_shift);
            plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift );
590
        }
591 592 593
    }
}

Loren Merritt's avatar
Loren Merritt committed
594
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
595
{
Loren Merritt's avatar
Loren Merritt committed
596
    /* during filtering, 8 extra pixels were filtered on each edge,
Loren Merritt's avatar
Loren Merritt committed
597
     * but up to 3 of the horizontal ones may be wrong.
598
       we want to expand border from the last filtered pixel */
Loren Merritt's avatar
Loren Merritt committed
599
    int b_start = !mb_y;
600
    int width = 16*h->mb.i_mb_width + 8;
601
    int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF) + 16 : 16;
Loren Merritt's avatar
Loren Merritt committed
602
    int padh = PADH - 4;
603
    int padv = PADV - 8;
Fiona Glaser's avatar
Fiona Glaser committed
604 605
    for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
        for( int i = 1; i < 4; i++ )
606
        {
Fiona Glaser's avatar
Fiona Glaser committed
607 608 609 610 611 612 613 614 615
            int stride = frame->i_stride[p];
            // buffer: 8 luma, to match the hpel filter
            pixel *pix;
            if( SLICE_MBAFF )
            {
                pix = frame->filtered_fld[p][i] + (16*mb_y - 16) * stride - 4;
                plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
                plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
            }
Simon Horlick's avatar
Simon Horlick committed
616

Fiona Glaser's avatar
Fiona Glaser committed
617 618 619
            pix = frame->filtered[p][i] + (16*mb_y - 8) * stride - 4;
            plane_expand_border( pix, stride, width, height << SLICE_MBAFF, padh, padv, b_start, b_end, 0 );
        }
620 621
}

622
void x264_frame_expand_border_lowres( x264_frame_t *frame )
623
{
624
    for( int i = 0; i < 4; i++ )
625
        plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres, PADH, PADV, 1, 1, 0 );
626 627
}

628 629
void x264_frame_expand_border_chroma( x264_t *h, x264_frame_t *frame, int plane )
{
630
    int v_shift = CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
631
    plane_expand_border( frame->plane[plane], frame->i_stride[plane], 16*h->mb.i_mb_width, 16*h->mb.i_mb_height>>v_shift,
632
                         PADH, PADV>>v_shift, 1, 1, CHROMA_H_SHIFT );
633 634
}

635 636
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
637
    for( int i = 0; i < frame->i_plane; i++ )
638
    {
639
        int i_width = h->param.i_width;
640 641
        int h_shift = i && CHROMA_H_SHIFT;
        int v_shift = i && CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
642
        int i_height = h->param.i_height >> v_shift;
643
        int i_padx = (h->mb.i_mb_width * 16 - h->param.i_width);
Henrik Gramner's avatar
Henrik Gramner committed
644
        int i_pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> v_shift;
645 646 647

        if( i_padx )
        {
648
            for( int y = 0; y < i_height; y++ )
649
                pixel_memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
Henrik Gramner's avatar
Henrik Gramner committed
650 651
                              &frame->plane[i][y*frame->i_stride[i] + i_width - 1-h_shift],
                              i_padx>>h_shift, sizeof(pixel)<<h_shift );
652 653 654
        }
        if( i_pady )
        {
655
            for( int y = i_height; y < i_height + i_pady; y++ )
656
                memcpy( &frame->plane[i][y*frame->i_stride[i]],
657
                        &frame->plane[i][(i_height-(~y&PARAM_INTERLACED)-1)*frame->i_stride[i]],
658
                        (i_width + i_padx) * sizeof(pixel) );
659 660 661 662
        }
    }
}

663 664 665 666
void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y )
{
    for( int i = 0; i < h->fenc->i_plane; i++ )
    {
667
        int v_shift = i && CHROMA_V_SHIFT;
668
        int stride = h->fenc->i_stride[i];
Henrik Gramner's avatar
Henrik Gramner committed
669 670
        int height = h->param.i_height >> v_shift;
        int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> v_shift;
671
        pixel *fenc = h->fenc->plane[i] + 16*mb_x;
672
        for( int y = height; y < height + pady; y++ )
673
            memcpy( fenc + y*stride, fenc + (height-1)*stride, 16*sizeof(pixel) );
674 675 676
    }
}

677
/* threading */
678
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
679
{
680
    x264_pthread_mutex_lock( &frame->mutex );
681
    frame->i_lines_completed = i_lines_completed;
682 683
    x264_pthread_cond_broadcast( &frame->cv );
    x264_pthread_mutex_unlock( &frame->mutex );
684 685 686 687
}

void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
{
688
    x264_pthread_mutex_lock( &frame->mutex );
689
    while( frame->i_lines_completed < i_lines_completed )
690 691
        x264_pthread_cond_wait( &frame->cv, &frame->mutex );
    x264_pthread_mutex_unlock( &frame->mutex );
692 693
}

694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
void x264_threadslice_cond_broadcast( x264_t *h, int pass )
{
    x264_pthread_mutex_lock( &h->mutex );
    h->i_threadslice_pass = pass;
    if( pass > 0 )
        x264_pthread_cond_broadcast( &h->cv );
    x264_pthread_mutex_unlock( &h->mutex );
}

void x264_threadslice_cond_wait( x264_t *h, int pass )
{
    x264_pthread_mutex_lock( &h->mutex );
    while( h->i_threadslice_pass < pass )
        x264_pthread_cond_wait( &h->cv, &h->mutex );
    x264_pthread_mutex_unlock( &h->mutex );
}

Fiona Glaser's avatar
Fiona Glaser committed
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
int x264_frame_new_slice( x264_t *h, x264_frame_t *frame )
{
    if( h->param.i_slice_count_max )
    {
        int slice_count;
        if( h->param.b_sliced_threads )
            slice_count = x264_pthread_fetch_and_add( &frame->i_slice_count, 1, &frame->mutex );
        else
            slice_count = frame->i_slice_count++;
        if( slice_count >= h->param.i_slice_count_max )
            return -1;
    }
    return 0;
}

726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
/* list operators */

void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
{
    int i = 0;
    while( list[i] ) i++;
    list[i] = frame;
}

x264_frame_t *x264_frame_pop( x264_frame_t **list )
{
    x264_frame_t *frame;
    int i = 0;
    assert( list[0] );
    while( list[i+1] ) i++;
    frame = list[i];
    list[i] = NULL;
    return frame;
}

void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
{
    int i = 0;
    while( list[i] ) i++;
    while( i-- )
        list[i+1] = list[i];
    list[0] = frame;
}

x264_frame_t *x264_frame_shift( x264_frame_t **list )
{
    x264_frame_t *frame = list[0];
    int i;
    for( i = 0; list[i]; i++ )
        list[i] = list[i+1];
    assert(frame);
    return frame;
}

void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
{
    assert( frame->i_reference_count > 0 );
    frame->i_reference_count--;
    if( frame->i_reference_count == 0 )
Steven Walters's avatar
Steven Walters committed
770
        x264_frame_push( h->frames.unused[frame->b_fdec], frame );
771 772
}

Steven Walters's avatar
Steven Walters committed
773
x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec )
774 775
{
    x264_frame_t *frame;
Steven Walters's avatar
Steven Walters committed
776 777
    if( h->frames.unused[b_fdec][0] )
        frame = x264_frame_pop( h->frames.unused[b_fdec] );
778
    else
779
        frame = frame_new( h, b_fdec );
780 781
    if( !frame )
        return NULL;
Steven Walters's avatar
Steven Walters committed
782
    frame->b_last_minigop_bframe = 0;
783
    frame->i_reference_count = 1;
784
    frame->b_intra_calculated = 0;
785
    frame->b_scenecut = 1;
Fiona Glaser's avatar
Fiona Glaser committed
786
    frame->b_keyframe = 0;
787
    frame->b_corrupt = 0;
Fiona Glaser's avatar
Fiona Glaser committed
788
    frame->i_slice_count = h->param.b_sliced_threads ? h->param.i_threads : 1;
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814

    memset( frame->weight, 0, sizeof(frame->weight) );
    memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) );

    return frame;
}

void x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame )
{
    assert( frame->i_reference_count > 0 );
    frame->i_reference_count--;
    if( frame->i_reference_count == 0 )
        x264_frame_push( h->frames.blank_unused, frame );
}

x264_frame_t *x264_frame_pop_blank_unused( x264_t *h )
{
    x264_frame_t *frame;
    if( h->frames.blank_unused[0] )
        frame = x264_frame_pop( h->frames.blank_unused );
    else
        frame = x264_malloc( sizeof(x264_frame_t) );
    if( !frame )
        return NULL;
    frame->b_duplicate = 1;
    frame->i_reference_count = 1;
815 816 817
    return frame;
}

818 819
void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                              int i_width, int i_height, x264_weight_t *w )
820 821 822 823 824
{
    /* Weight horizontal strips of height 16. This was found to be the optimal height
     * in terms of the cache loads. */
    while( i_height > 0 )
    {
825 826
        int x;
        for( x = 0; x < i_width-8; x += 16 )
827
            w->weightfn[16>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
828 829
        if( x < i_width )
            w->weightfn[ 8>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
830 831 832 833 834 835
        i_height -= 16;
        dst += 16 * i_dst_stride;
        src += 16 * i_src_stride;
    }
}

Steven Walters's avatar
Steven Walters committed
836 837 838
void x264_frame_delete_list( x264_frame_t **list )
{
    int i = 0;
839 840
    if( !list )
        return;
Steven Walters's avatar
Steven Walters committed
841 842 843 844 845
    while( list[i] )
        x264_frame_delete( list[i++] );
    x264_free( list );
}

846
int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
Steven Walters's avatar
Steven Walters committed
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
{
    if( max_size < 0 )
        return -1;
    slist->i_max_size = max_size;
    slist->i_size = 0;
    CHECKED_MALLOCZERO( slist->list, (max_size+1) * sizeof(x264_frame_t*) );
    if( x264_pthread_mutex_init( &slist->mutex, NULL ) ||
        x264_pthread_cond_init( &slist->cv_fill, NULL ) ||
        x264_pthread_cond_init( &slist->cv_empty, NULL ) )
        return -1;
    return 0;
fail:
    return -1;
}

862
void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
Steven Walters's avatar
Steven Walters committed
863 864 865 866 867 868 869
{
    x264_pthread_mutex_destroy( &slist->mutex );
    x264_pthread_cond_destroy( &slist->cv_fill );
    x264_pthread_cond_destroy( &slist->cv_empty );
    x264_frame_delete_list( slist->list );
}

870
void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
Steven Walters's avatar
Steven Walters committed
871 872 873 874 875 876 877 878
{
    x264_pthread_mutex_lock( &slist->mutex );
    while( slist->i_size == slist->i_max_size )
        x264_pthread_cond_wait( &slist->cv_empty, &slist->mutex );
    slist->list[ slist->i_size++ ] = frame;
    x264_pthread_mutex_unlock( &slist->mutex );
    x264_pthread_cond_broadcast( &slist->cv_fill );
}
879 880 881 882 883 884 885 886 887 888 889 890 891

x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
{
    x264_frame_t *frame;
    x264_pthread_mutex_lock( &slist->mutex );
    while( !slist->i_size )
        x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
    frame = slist->list[ --slist->i_size ];
    slist->list[ slist->i_size ] = NULL;
    x264_pthread_cond_broadcast( &slist->cv_empty );
    x264_pthread_mutex_unlock( &slist->mutex );
    return frame;
}