ratecontrol.c 30.5 KB
Newer Older
1
/***************************************************-*- coding: iso-8859-1 -*-
Laurent Aimar's avatar
Laurent Aimar committed
2 3 4 5 6
 * ratecontrol.c: h264 encoder library (Rate Control)
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
 *
7
 * Authors: Mns Rullgrd <mru@mru.ath.cx>
8
 * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
9
 *              Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

26
#define _ISOC99_SOURCE
27
#undef NDEBUG // always check asserts, the speed effect is far too small to disable them
Laurent Aimar's avatar
Laurent Aimar committed
28 29 30
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
31 32
#include <math.h>
#include <limits.h>
33
#include <assert.h>
Laurent Aimar's avatar
Laurent Aimar committed
34

35 36 37
#include "common/common.h"
#include "common/cpu.h"
#include "common/macroblock.h"
Laurent Aimar's avatar
Laurent Aimar committed
38 39
#include "ratecontrol.h"

Eric Petit's avatar
Eric Petit committed
40 41 42
#ifdef SYS_MACOSX
#define exp2f(x) ( (float) exp2( (x) ) )
#endif
43 44 45
#ifdef SYS_FREEBSD
#define exp2f(x) powf( 2, (x) )
#endif
Eric Petit's avatar
Eric Petit committed
46

47 48 49 50

typedef struct
{
    int pict_type;
51
    int idr;
52 53 54 55 56 57 58 59 60 61 62
    float qscale;
    int mv_bits;
    int i_tex_bits;
    int p_tex_bits;
    int misc_bits;
    uint64_t expected_bits;
    float new_qscale;
    int new_qp;
    int i_count;
    int p_count;
    int s_count;
Loren Merritt's avatar
Loren Merritt committed
63
    float blurred_complexity;
64 65
} ratecontrol_entry_t;

66 67 68
struct x264_ratecontrol_t
{
    /* constants */
69
    double fps;
70 71
    int gop_size;
    int bitrate;
72
    int nmb;                    /* number of macroblocks in a frame */
73 74 75
    int buffer_size;
    int rcbufrate;
    int init_qp;
76
    int qp_constant[5];
77

78
    /* 1st pass stuff */
79 80 81 82 83 84 85 86 87 88
    int gop_qp;
    int buffer_fullness;
    int frames;                 /* frames in current gop */
    int pframes;
    int slice_type;
    int mb;                     /* MBs processed in current frame */
    int bits_gop;               /* allocated bits current gop */
    int bits_last_gop;          /* bits consumed in gop */
    int qp;                     /* qp for current frame */
    int qpm;                    /* qp for next MB */
89
    float qpa;                  /* average qp for last frame */
90
    int qps;
91 92
    float qp_avg_p;             /* average QP for P frames */
    float qp_last_p;
93 94 95 96 97
    int fbits;                  /* bits allocated for current frame */
    int ufbits;                 /* bits used for current frame */
    int nzcoeffs;               /* # of 0-quantized coefficients */
    int ncoeffs;                /* total # of coefficients */
    int overhead;
98 99 100 101 102

    /* 2pass stuff */
    FILE *p_stat_file_out;

    int num_entries;            /* number of ratecontrol_entry_ts */
Loren Merritt's avatar
Loren Merritt committed
103
    ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
104 105 106
    double last_qscale;
    double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
    int last_non_b_pict_type;
107 108
    double accum_p_qp;          /* for determining I-frame quant */
    double accum_p_norm;
109
    double lmin[5];             /* min qscale by frame type */
110
    double lmax[5];
Loren Merritt's avatar
Loren Merritt committed
111
    double lstep;               /* max change (multiply) in qscale per frame */
112 113 114 115
    double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
    double p_cplx_sum[5];
    double mv_bits_sum[5];
    int frame_count[5];         /* number of frames of each type */
116
};
Laurent Aimar's avatar
Laurent Aimar committed
117

118 119 120 121 122 123

static int init_pass2(x264_t *);
static float rate_estimate_qscale( x264_t *h, int pict_type );

/* Terminology:
 * qp = h.264's quantizer
124
 * qscale = linearized quantizer = Lagrange multiplier
125 126 127 128 129 130 131 132 133 134 135 136
 */
static inline double qp2qscale(double qp)
{
    return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
}
static inline double qscale2qp(double qscale)
{
    return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
}

static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
{
Loren Merritt's avatar
Loren Merritt committed
137
    if(qscale<0.1)
138
    {
139
//      fprintf(stderr, "qscale<0.1\n");
140 141
        qscale = 0.1;
    }
Loren Merritt's avatar
Loren Merritt committed
142
    return (double)(rce->i_tex_bits + rce->p_tex_bits + .1) * rce->qscale / qscale;
143 144 145 146 147 148
}

static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
{
    if(bits<0.9)
    {
149
//      fprintf(stderr, "bits<0.9\n");
150 151
        bits = 1.0;
    }
Loren Merritt's avatar
Loren Merritt committed
152
    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits + .1) / bits;
153 154 155
}


156
int x264_ratecontrol_new( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
157
{
158
    x264_ratecontrol_t *rc;
159
    float bpp;
160
    int i;
161

162 163 164 165
    /* Needed(?) for 2 pass */
    x264_cpu_restore( h->param.cpu );

    h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
166 167
    memset(rc, 0, sizeof(*rc));

Måns Rullgård's avatar
Måns Rullgård committed
168 169 170 171 172 173
    /* FIXME: use integers */
    if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
        rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
    else
        rc->fps = 25.0;

174
    rc->gop_size = h->param.i_keyint_max;
175
    rc->bitrate = h->param.rc.i_bitrate * 1000;
176
    rc->nmb = h->mb.i_mb_count;
177

178 179 180 181
    rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    /* Init 1pass CBR algo */
    if( h->param.rc.b_cbr ){
        rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
        rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
        rc->rcbufrate = rc->bitrate / rc->fps;

        if(rc->buffer_size < rc->rcbufrate){
            x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
                     rc->buffer_size);
            rc->buffer_size = 0;
        }

        if(rc->buffer_size <= 0)
            rc->buffer_size = rc->bitrate / 2;

        if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
            x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
                     rc->buffer_fullness);
            rc->buffer_fullness = 0;
        }

        bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
        if(bpp <= 0.6)
            rc->init_qp = 31;
        else if(bpp <= 1.4)
            rc->init_qp = 25;
        else if(bpp <= 2.4)
            rc->init_qp = 20;
        else
            rc->init_qp = 10;
        rc->gop_qp = rc->init_qp;

        rc->bits_last_gop = 0;

        x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
                 rc->fps, rc->bitrate, rc->buffer_size);
218 219
    }

220

Loren Merritt's avatar
Loren Merritt committed
221
    rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
    for( i = 0; i < 5; i++ )
    {
        rc->last_qscale_for[i] = qp2qscale(26);
        rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
        rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
    }
#if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
    rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
    rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
#endif

    /* Load stat file and init 2pass algo */
    if( h->param.rc.b_stat_read )
    {
        int stats_size;
        char *p, *stats_in;
        FILE *stats_file;

        /* read 1st pass stats */
        assert( h->param.rc.psz_stat_in );
244
        stats_file = fopen( h->param.rc.psz_stat_in, "rb");
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
        if(!stats_file)
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
        // FIXME: error checking
        fseek(stats_file, 0, SEEK_END);
        stats_size = ftell(stats_file);
        fseek(stats_file, 0, SEEK_SET);
        stats_in = x264_malloc(stats_size+10);
        fread(stats_in, 1, stats_size, stats_file);
        fclose(stats_file);

        /* find number of pics */
        p = stats_in;
        for(i=-1; p; i++){
            p = strchr(p+1, ';');
        }
        i += h->param.i_bframe;
        rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
        memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
        rc->num_entries= i;

        /* init all to skipped p frames */
        for(i=0; i<rc->num_entries; i++){
            ratecontrol_entry_t *rce = &rc->entry[i];
271
            rce->pict_type = SLICE_TYPE_P;
272 273 274 275 276 277 278 279 280
            rce->qscale = rce->new_qscale = qp2qscale(20);
            rce->misc_bits = rc->nmb + 10;
            rce->new_qp = 0;
        }

        /* read stats */
        p = stats_in;
        for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
            ratecontrol_entry_t *rce;
281 282
            int frame_number;
            char pict_type;
283 284 285 286 287 288 289 290 291
            int e;
            char *next;
            float qp;

            next= strchr(p, ';');
            if(next){
                (*next)=0; //sscanf is unbelievably slow on looong strings
                next++;
            }
292
            e = sscanf(p, " in:%d ", &frame_number);
293

294 295 296
            assert(frame_number >= 0);
            assert(frame_number < rc->num_entries);
            rce = &rc->entry[frame_number];
297

298 299
            e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
                   &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
300
                   &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
301 302 303 304 305 306 307 308

            switch(pict_type){
                case 'I': rce->idr = 1;
                case 'i': rce->pict_type = SLICE_TYPE_I; break;
                case 'P': rce->pict_type = SLICE_TYPE_P; break;
                case 'B': rce->pict_type = SLICE_TYPE_B; break;
                default:  e = -1; break;
            }
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
            if(e != 10){
                x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
                return -1;
            }
            rce->qscale = qp2qscale(qp);
            p = next;
        }

        x264_free(stats_in);

        if(init_pass2(h) < 0) return -1;
    }

    /* Open output file */
    if( h->param.rc.b_stat_write )
    {
325
        rc->p_stat_file_out = fopen( h->param.rc.psz_stat_out, "wb" );
326 327 328 329 330 331
        if( rc->p_stat_file_out == NULL )
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
    }
332 333

    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
334 335
}

336
void x264_ratecontrol_delete( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
337
{
338
    x264_ratecontrol_t *rc = h->rc;
339 340 341 342 343

    if( rc->p_stat_file_out )
        fclose( rc->p_stat_file_out );
    if( rc->entry )
        x264_free(rc->entry);
Laurent Aimar's avatar
Laurent Aimar committed
344 345 346
    x264_free( rc );
}

347
void x264_ratecontrol_start( x264_t *h, int i_slice_type )
Laurent Aimar's avatar
Laurent Aimar committed
348
{
349 350 351 352 353 354 355 356
    x264_ratecontrol_t *rc = h->rc;
    int gframes, iframes, pframes, bframes;
    int minbits, maxbits;
    int gbits, fbits;
    int zn = 0;
    float kp;
    int gbuf;

357
    rc->slice_type = i_slice_type;
358 359 360

    x264_cpu_restore( h->param.cpu );

361 362 363 364 365 366 367
    if( !h->param.rc.b_cbr )
    {
        rc->qpm = rc->qpa = rc->qp =
            rc->qp_constant[ i_slice_type ];
        return;
    }
    else if( h->param.rc.b_stat_read )
368 369
    {
        int frame = h->fenc->i_frame;
370
        ratecontrol_entry_t *rce;
371
        assert( frame >= 0 && frame < rc->num_entries );
372
        rce = &h->rc->entry[frame];
373 374

        rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
375 376
        rc->qpm = rc->qpa = rc->qp = rce->new_qp =
            (int)(qscale2qp(rce->new_qscale) + 0.5);
377 378
        return;
    }
379 380 381 382 383 384 385

    switch(i_slice_type){
    case SLICE_TYPE_I:
        gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
        rc->bits_gop = gbuf - rc->buffer_size / 2;

        if(!rc->mb && rc->pframes){
386
            int qp = rc->qp_avg_p / rc->pframes + 0.5;
387 388 389 390 391 392 393 394 395
#if 0 /* JM does this without explaining why */
            int gdq = (float) rc->gop_size / 15 + 0.5;
            if(gdq > 2)
                gdq = 2;
            qp -= gdq;
            if(qp > rc->qp_last_p - 2)
                qp--;
#endif
            qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
Loren Merritt's avatar
Loren Merritt committed
396
            qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
397
            rc->gop_qp = qp;
398
        } else if(rc->frames > 4){
399 400 401
            rc->gop_qp = rc->init_qp;
        }

402
        kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
403

404 405
        x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
                 gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
406

407 408 409 410 411 412 413
        rc->bits_last_gop = 0;
        rc->frames = 0;
        rc->pframes = 0;
        rc->qp_avg_p = 0;
        break;

    case SLICE_TYPE_P:
414
        kp = h->param.rc.f_pb_factor;
415 416 417 418 419 420 421
        break;

    case SLICE_TYPE_B:
        kp = 1.0;
        break;

    default:
422 423
        x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
                 i_slice_type);
424 425 426 427 428 429 430 431 432 433 434
        kp = 1.0;
        break;
    }

    gframes = rc->gop_size - rc->frames;
    iframes = gframes / rc->gop_size;
    pframes = gframes / (h->param.i_bframe + 1) - iframes;
    bframes = gframes - pframes - iframes;

    gbits = rc->bits_gop - rc->bits_last_gop;
    fbits = kp * gbits /
435 436
        (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
         h->param.rc.f_pb_factor * pframes + bframes);
437 438 439 440 441 442 443 444 445

    minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
    if(minbits < 0)
        minbits = 0;
    maxbits = rc->buffer_fullness;
    rc->fbits = x264_clip3(fbits, minbits, maxbits);

    if(i_slice_type == SLICE_TYPE_I){
        rc->qp = rc->gop_qp;
446
    } else if(rc->ncoeffs && rc->ufbits){
447 448 449 450 451 452 453 454 455 456
        int dqp, nonzc;

        nonzc = (rc->ncoeffs - rc->nzcoeffs);
        if(nonzc == 0)
            zn = rc->ncoeffs;
        else if(rc->fbits < INT_MAX / nonzc)
            zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
        else
            zn = 0;
        zn = x264_clip3(zn, 0, rc->ncoeffs);
457
        dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
458
            (zn - rc->nzcoeffs) / rc->nzcoeffs;
459
        dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
460
        rc->qp = (int)(rc->qpa + dqp + .5);
461 462
    }

463
    if(rc->fbits > 0.9 * maxbits)
464
        rc->qp += 2;
465 466
    else if(rc->fbits > 0.8 * maxbits)
        rc->qp += 1;
467 468
    else if(rc->fbits < 1.1 * minbits)
        rc->qp -= 2;
469 470
    else if(rc->fbits < 1.2 * minbits)
        rc->qp -= 1;
471

472
    rc->qp = x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
473 474
    rc->qpm = rc->qp;

475 476
    x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
             rc->fbits, rc->qpm, zn, minbits, maxbits);
477 478 479 480 481 482 483

    rc->fbits -= rc->overhead;
    rc->ufbits = 0;
    rc->ncoeffs = 0;
    rc->nzcoeffs = 0;
    rc->mb = 0;
    rc->qps = 0;
Laurent Aimar's avatar
Laurent Aimar committed
484 485
}

486
void x264_ratecontrol_mb( x264_t *h, int bits )
Laurent Aimar's avatar
Laurent Aimar committed
487
{
488 489
    x264_ratecontrol_t *rc = h->rc;
    int rbits;
490 491
    int zn, enz, nonz;
    int rcoeffs;
492 493 494
    int dqp;
    int i;

495
    if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
496 497
        return;

498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
    x264_cpu_restore( h->param.cpu );

    rc->qps += rc->qpm;
    rc->ufbits += bits;
    rc->mb++;

    for(i = 0; i < 16 + 8; i++)
        rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
    rc->ncoeffs += 16 * (16 + 8);

    if(rc->mb < rc->nmb / 16)
        return;
    else if(rc->mb == rc->nmb)
        return;

513
    rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
514 515 516 517
    rbits = rc->fbits - rc->ufbits;
/*     if(rbits < 0) */
/*      rbits = 0; */

518 519 520 521 522 523 524 525 526
/*     zn = (rc->nmb - rc->mb) * 16 * 24; */
    nonz = (rc->ncoeffs - rc->nzcoeffs);
    if(nonz == 0)
        zn = rcoeffs;
    else if(rc->ufbits && rbits < INT_MAX / nonz)
        zn = rcoeffs - rbits * nonz / rc->ufbits;
    else
        zn = 0;
    zn = x264_clip3(zn, 0, rcoeffs);
527
    enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
528
    dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
529 530 531 532
        (zn - enz) / enz;
    rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
    if(rbits <= 0)
        rc->qpm++;
533
    rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
Laurent Aimar's avatar
Laurent Aimar committed
534 535
}

536
int  x264_ratecontrol_qp( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
537
{
538 539
    return h->rc->qpm;
}
Laurent Aimar's avatar
Laurent Aimar committed
540

541 542 543 544
int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
{
    if( h->param.rc.b_stat_read )
    {
545 546 547 548 549
        if( frame_num >= h->rc->num_entries )
        {
            x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
            return X264_TYPE_P;
        }
550
        switch( h->rc->entry[frame_num].pict_type )
551 552
        {
            case SLICE_TYPE_I:
553
                return h->rc->entry[frame_num].idr ? X264_TYPE_IDR : X264_TYPE_I;
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568

            case SLICE_TYPE_B:
                return X264_TYPE_B;

            case SLICE_TYPE_P:
            default:
                return X264_TYPE_P;
        }
    }
    else
    {
        return X264_TYPE_AUTO;
    }
}

569 570 571
void x264_ratecontrol_end( x264_t *h, int bits )
{
    x264_ratecontrol_t *rc = h->rc;
Loren Merritt's avatar
Loren Merritt committed
572
    int i;
Laurent Aimar's avatar
Laurent Aimar committed
573

574 575
    x264_cpu_restore( h->param.cpu );

Loren Merritt's avatar
Loren Merritt committed
576 577 578 579 580
    h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
    h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
    for( i = B_DIRECT; i < B_8x8; i++ )
        h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];

581 582
    if( h->param.rc.b_stat_write )
    {
583 584
        char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
                    : rc->slice_type==SLICE_TYPE_P ? 'P' : 'B';
585
        fprintf( rc->p_stat_file_out,
586
                 "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
587
                 h->fenc->i_frame, h->i_frame-1,
588
                 c_type, rc->qpa,
589 590 591
                 h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
                 h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
                 h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
Loren Merritt's avatar
Loren Merritt committed
592 593
                 h->stat.frame.i_mb_count_p,
                 h->stat.frame.i_mb_count_skip);
594 595 596
    }

    if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
597
        return;
Laurent Aimar's avatar
Laurent Aimar committed
598

599 600
    rc->buffer_fullness += rc->rcbufrate - bits;
    if(rc->buffer_fullness < 0){
601 602
        x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
                 rc->buffer_fullness);
603
        rc->buffer_fullness = 0;
Laurent Aimar's avatar
Laurent Aimar committed
604 605
    }

606
    rc->qpa = (float)rc->qps / rc->mb;
607 608 609 610
    if(rc->slice_type == SLICE_TYPE_P){
        rc->qp_avg_p += rc->qpa;
        rc->qp_last_p = rc->qpa;
        rc->pframes++;
611 612 613 614 615 616
    } else if(rc->slice_type == SLICE_TYPE_I){
        float err = (float) rc->ufbits / rc->fbits;
        if(err > 1.1)
            rc->gop_qp++;
        else if(err < 0.9)
            rc->gop_qp--;
Laurent Aimar's avatar
Laurent Aimar committed
617 618
    }

619 620
    rc->overhead = bits - rc->ufbits;

621
    x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
622 623
             bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
             rc->buffer_fullness);
624 625 626 627

    rc->bits_last_gop += bits;
    rc->frames++;
    rc->mb = 0;
Laurent Aimar's avatar
Laurent Aimar committed
628
}
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644

/****************************************************************************
 * 2 pass functions
 ***************************************************************************/
double x264_eval( char *s, double *const_value, const char **const_name,
                  double (**func1)(void *, double), const char **func1_name,
                  double (**func2)(void *, double, double), char **func2_name,
                  void *opaque );

/**
 * modifies the bitrate curve from pass1 for one frame
 */
static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
{
    x264_ratecontrol_t *rcc= h->rc;
    double bits;
645
    const int pict_type = rce->pict_type;
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663

    double const_values[]={
        rce->i_tex_bits * rce->qscale,
        rce->p_tex_bits * rce->qscale,
        (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
        rce->mv_bits / rcc->nmb,
        (double)rce->i_count / rcc->nmb,
        (double)rce->p_count / rcc->nmb,
        (double)rce->s_count / rcc->nmb,
        rce->pict_type == SLICE_TYPE_I,
        rce->pict_type == SLICE_TYPE_P,
        rce->pict_type == SLICE_TYPE_B,
        h->param.rc.f_qcompress,
        rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
        rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
Loren Merritt's avatar
Loren Merritt committed
664
        rce->blurred_complexity,
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
        0
    };
    static const char *const_names[]={
        "iTex",
        "pTex",
        "tex",
        "mv",
        "iCount",
        "pCount",
        "sCount",
        "isI",
        "isP",
        "isB",
        "qComp",
        "avgIITex",
        "avgPITex",
        "avgPPTex",
        "avgBPTex",
        "avgTex",
Loren Merritt's avatar
Loren Merritt committed
684
        "blurTex",
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
        NULL
    };
    static double (*func1[])(void *, double)={
        (void *)bits2qscale,
        (void *)qscale2bits,
        NULL
    };
    static const char *func1_names[]={
        "bits2qp",
        "qp2bits",
        NULL
    };

    bits = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);

Loren Merritt's avatar
Loren Merritt committed
700 701 702 703
    // avoid NaN's in the rc_eq
    if(bits != bits || rce->i_tex_bits + rce->p_tex_bits == 0)
        bits = 0;

704 705 706 707 708 709 710 711 712 713
    bits *= rate_factor;
    if(bits<0.0) bits=0.0;
    bits += 1.0; //avoid 1/0 issues

    return bits2qscale(rce, bits);
}

static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
{
    x264_ratecontrol_t *rcc = h->rc;
714
    const int pict_type = rce->pict_type;
715 716 717 718

    // force I/B quants as a function of P quants
    const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
    const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
    if( pict_type == SLICE_TYPE_I )
    {
        if( rcc->accum_p_norm > 0 && h->param.rc.f_ip_factor > 0 )
            q = qp2qscale(rcc->accum_p_qp / rcc->accum_p_norm);
        q /= fabs( h->param.rc.f_ip_factor );
    }
    else if( pict_type == SLICE_TYPE_B )
    {
        if( h->param.rc.f_pb_factor > 0 )
            q = last_non_b_q;
        q  *= fabs( h->param.rc.f_pb_factor );
    }
    else if( pict_type == SLICE_TYPE_P
             && rcc->last_non_b_pict_type == SLICE_TYPE_P
             && rce->i_tex_bits + rce->p_tex_bits == 0 )
    {
735
        q = last_p_q;
736
    }
737 738 739 740 741

    /* last qscale / qdiff stuff */
    if(rcc->last_non_b_pict_type==pict_type || pict_type!=SLICE_TYPE_I)
    {
        double last_q = rcc->last_qscale_for[pict_type];
Loren Merritt's avatar
Loren Merritt committed
742 743
        double max_qscale = last_q * rcc->lstep;
        double min_qscale = last_q / rcc->lstep;
744 745 746 747 748

        if     (q > max_qscale) q = max_qscale;
        else if(q < min_qscale) q = min_qscale;
    }

Loren Merritt's avatar
Loren Merritt committed
749
    rcc->last_qscale_for[pict_type] = q;
750 751
    if(pict_type!=SLICE_TYPE_B)
        rcc->last_non_b_pict_type = pict_type;
752 753 754 755 756 757 758 759 760 761 762
    if(pict_type==SLICE_TYPE_I)
    {
        rcc->accum_p_norm = 0;
        rcc->accum_p_qp = 0;
    }
    if(pict_type==SLICE_TYPE_P)
    {
        float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
        rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
        rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
    }
763 764 765
    return q;
}

Loren Merritt's avatar
Loren Merritt committed
766 767
// clip a qscale to between lmin and lmax
static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
768
{
769 770
    double lmin = h->rc->lmin[rce->pict_type];
    double lmax = h->rc->lmax[rce->pict_type];
771

Loren Merritt's avatar
Loren Merritt committed
772 773
    if(lmin==lmax){
        return lmin;
774 775 776
    }else{
        double min2 = log(lmin);
        double max2 = log(lmax);
Loren Merritt's avatar
Loren Merritt committed
777 778
        q = (log(q) - min2)/(max2-min2) - 0.5;
        q = 1.0/(1.0 + exp(-4*q));
779
        q = q*(max2-min2) + min2;
Loren Merritt's avatar
Loren Merritt committed
780
        return exp(q);
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
    }
}

// update qscale for 1 frame based on actual bits used so far
static float rate_estimate_qscale(x264_t *h, int pict_type)
{
    float q;
    float br_compensation;
    double diff;
    int picture_number = h->fenc->i_frame;
    x264_ratecontrol_t *rcc = h->rc;
    ratecontrol_entry_t *rce;
    double lmin = rcc->lmin[pict_type];
    double lmax = rcc->lmax[pict_type];
    int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
                          + h->stat.i_slice_size[SLICE_TYPE_P]
                          + h->stat.i_slice_size[SLICE_TYPE_B]);

//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);

    rce = &rcc->entry[picture_number];

803
    assert(pict_type == rce->pict_type);
804

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
    if(rce->pict_type == SLICE_TYPE_B)
    {
        return rcc->last_qscale * h->param.rc.f_pb_factor;
    }
    else
    {
        diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
        br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
        br_compensation = x264_clip3f(br_compensation, .5, 2);

        q = rce->new_qscale / br_compensation;
        q = x264_clip3f(q, lmin, lmax);
        rcc->last_qscale = q;
        return q;
    }
820 821 822 823 824 825 826 827 828
}

static int init_pass2( x264_t *h )
{
    x264_ratecontrol_t *rcc = h->rc;
    uint64_t all_const_bits = 0;
    uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
    double rate_factor, step, step_mult;
    double qblur = h->param.rc.f_qblur;
Loren Merritt's avatar
Loren Merritt committed
829
    double cplxblur = h->param.rc.f_complexity_blur;
830 831 832 833 834 835 836 837 838
    const int filter_size = (int)(qblur*4) | 1;
    double expected_bits;
    double *qscale, *blurred_qscale;
    int i;

    /* find total/average complexity & const_bits */
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
        all_const_bits += rce->mv_bits + rce->misc_bits;
839 840 841 842
        rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
        rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits;
        rcc->frame_count[rce->pict_type] ++;
843 844 845 846 847 848 849 850 851
    }

    if( all_available_bits < all_const_bits)
    {
        x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
                 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
        return -1;
    }

Loren Merritt's avatar
Loren Merritt committed
852 853
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
854
        double weight_sum = 0;
Loren Merritt's avatar
Loren Merritt committed
855
        double cplx_sum = 0;
856
        double weight = 1.0;
Loren Merritt's avatar
Loren Merritt committed
857 858
        int j;
        /* weighted average of cplx of future frames */
859
        for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
Loren Merritt's avatar
Loren Merritt committed
860
            ratecontrol_entry_t *rcj = &rcc->entry[i+j];
861
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
862 863 864 865 866 867
            if(weight < .0001)
                break;
            weight_sum += weight;
            cplx_sum += weight * (rcj->i_tex_bits + rcj->p_tex_bits) * rce->qscale;
        }
        /* weighted average of cplx of past frames */
868 869
        weight = 1.0;
        for(j=0; j<cplxblur*2 && j<=i; j++){
Loren Merritt's avatar
Loren Merritt committed
870 871 872
            ratecontrol_entry_t *rcj = &rcc->entry[i-j];
            weight_sum += weight;
            cplx_sum += weight * (rcj->i_tex_bits + rcj->p_tex_bits) * rce->qscale;
873
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
874 875 876 877 878 879
            if(weight < .0001)
                break;
        }
        rce->blurred_complexity = cplx_sum / weight_sum;
    }

880 881 882 883 884 885
    qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    if(filter_size > 1)
        blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    else
        blurred_qscale = qscale;

Loren Merritt's avatar
Loren Merritt committed
886
    expected_bits = 1;
887 888 889 890 891
    for(i=0; i<rcc->num_entries; i++)
        expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
    step_mult = all_available_bits / expected_bits;

    rate_factor = 0;
Loren Merritt's avatar
Loren Merritt committed
892
    for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
        expected_bits = 0;
        rate_factor += step;

        /* find qscale */
        for(i=0; i<rcc->num_entries; i++){
            qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
        }

        /* fixed I/B QP relative to P mode */
        for(i=rcc->num_entries-1; i>=0; i--){
            qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
            assert(qscale[i] >= 0);
        }

        /* smooth curve */
        if(filter_size > 1){
            assert(filter_size%2==1);
            for(i=0; i<rcc->num_entries; i++){
                ratecontrol_entry_t *rce = &rcc->entry[i];
                int j;
                double q=0.0, sum=0.0;

                for(j=0; j<filter_size; j++){
                    int index = i+j-filter_size/2;
                    double d = index-i;
                    double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
                    if(index < 0 || index >= rcc->num_entries) continue;
920
                    if(rce->pict_type != rcc->entry[index].pict_type) continue;
921 922 923 924 925 926 927 928 929 930 931
                    q += qscale[index] * coeff;
                    sum += coeff;
                }
                blurred_qscale[i] = q/sum;
            }
        }

        /* find expected bits */
        for(i=0; i<rcc->num_entries; i++){
            ratecontrol_entry_t *rce = &rcc->entry[i];
            double bits;
Loren Merritt's avatar
Loren Merritt committed
932
            rce->new_qscale = clip_qscale(h, rce, blurred_qscale[i]);
933 934 935 936 937 938 939
            assert(rce->new_qscale >= 0);
            bits = qscale2bits(rce, rce->new_qscale) + rce->mv_bits + rce->misc_bits;

            rce->expected_bits = expected_bits;
            expected_bits += bits;
        }

940
//printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor);
941 942 943 944 945 946 947
        if(expected_bits > all_available_bits) rate_factor -= step;
    }

    x264_free(qscale);
    if(filter_size > 1)
        x264_free(blurred_qscale);

948
    if(fabs(expected_bits/all_available_bits - 1.0) > 0.01)
949
    {
950 951
        double avgq = 0;
        for(i=0; i<rcc->num_entries; i++)
Loren Merritt's avatar
Loren Merritt committed
952 953
            avgq += rcc->entry[i].new_qscale;
        avgq = qscale2qp(avgq / rcc->num_entries);
954 955 956 957 958 959 960

        x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
        x264_log(h, X264_LOG_ERROR, "expected bits: %llu, available: %llu, avg QP: %.4lf\n", (uint64_t)expected_bits, all_available_bits, avgq);
        if(expected_bits < all_available_bits && avgq < h->param.rc.i_qp_min + 1)
            x264_log(h, X264_LOG_ERROR, "try reducing bitrate or reducing qp_min\n");
        if(expected_bits > all_available_bits && avgq > h->param.rc.i_qp_min - 1)
            x264_log(h, X264_LOG_ERROR, "try increasing bitrate or increasing qp_max\n");
961 962 963 964 965 966 967
        return -1;
    }

    return 0;
}