ratecontrol.c 33.7 KB
Newer Older
1
/***************************************************-*- coding: iso-8859-1 -*-
Laurent Aimar's avatar
Laurent Aimar committed
2 3 4 5 6
 * ratecontrol.c: h264 encoder library (Rate Control)
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
 *
7
 * Authors: Mns Rullgrd <mru@mru.ath.cx>
8
 * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
9
 *              Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

26
#define _ISOC99_SOURCE
27
#undef NDEBUG // always check asserts, the speed effect is far too small to disable them
Laurent Aimar's avatar
Laurent Aimar committed
28 29 30
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
31 32
#include <math.h>
#include <limits.h>
33
#include <assert.h>
Laurent Aimar's avatar
Laurent Aimar committed
34

35 36 37
#include "common/common.h"
#include "common/cpu.h"
#include "common/macroblock.h"
Laurent Aimar's avatar
Laurent Aimar committed
38 39
#include "ratecontrol.h"

Eric Petit's avatar
Eric Petit committed
40 41 42
#ifdef SYS_MACOSX
#define exp2f(x) ( (float) exp2( (x) ) )
#endif
43
#if defined(SYS_FREEBSD) || defined(SYS_BEOS)
44 45
#define exp2f(x) powf( 2, (x) )
#endif
46 47 48
#ifdef _MSC_VER
#define exp2f(x) pow( 2, (x) )
#endif
49 50 51
#ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
#define rename(src,dst) (unlink(dst), rename(src,dst))
#endif
52 53 54 55

typedef struct
{
    int pict_type;
56
    int kept_as_ref;
57 58 59 60 61 62 63 64 65 66 67
    float qscale;
    int mv_bits;
    int i_tex_bits;
    int p_tex_bits;
    int misc_bits;
    uint64_t expected_bits;
    float new_qscale;
    int new_qp;
    int i_count;
    int p_count;
    int s_count;
Loren Merritt's avatar
Loren Merritt committed
68
    float blurred_complexity;
69 70
} ratecontrol_entry_t;

71 72 73
struct x264_ratecontrol_t
{
    /* constants */
74
    double fps;
75 76
    int gop_size;
    int bitrate;
77
    int nmb;                    /* number of macroblocks in a frame */
78 79 80
    int buffer_size;
    int rcbufrate;
    int init_qp;
81
    int qp_constant[5];
82

83
    /* 1st pass stuff */
84 85 86 87 88 89 90 91 92 93
    int gop_qp;
    int buffer_fullness;
    int frames;                 /* frames in current gop */
    int pframes;
    int slice_type;
    int mb;                     /* MBs processed in current frame */
    int bits_gop;               /* allocated bits current gop */
    int bits_last_gop;          /* bits consumed in gop */
    int qp;                     /* qp for current frame */
    int qpm;                    /* qp for next MB */
94
    float qpa;                  /* average qp for last frame */
95
    int qps;
96 97
    float qp_avg_p;             /* average QP for P frames */
    float qp_last_p;
98 99 100 101 102
    int fbits;                  /* bits allocated for current frame */
    int ufbits;                 /* bits used for current frame */
    int nzcoeffs;               /* # of 0-quantized coefficients */
    int ncoeffs;                /* total # of coefficients */
    int overhead;
Måns Rullgård's avatar
Måns Rullgård committed
103
    int qp_force;
104 105 106

    /* 2pass stuff */
    FILE *p_stat_file_out;
107
    char *psz_stat_file_tmpname;
108 109

    int num_entries;            /* number of ratecontrol_entry_ts */
Loren Merritt's avatar
Loren Merritt committed
110
    ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
111 112 113
    double last_qscale;
    double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
    int last_non_b_pict_type;
114 115
    double accum_p_qp;          /* for determining I-frame quant */
    double accum_p_norm;
116
    double last_accum_p_norm;
117
    double lmin[5];             /* min qscale by frame type */
118
    double lmax[5];
Loren Merritt's avatar
Loren Merritt committed
119
    double lstep;               /* max change (multiply) in qscale per frame */
120 121 122 123
    double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
    double p_cplx_sum[5];
    double mv_bits_sum[5];
    int frame_count[5];         /* number of frames of each type */
124
};
Laurent Aimar's avatar
Laurent Aimar committed
125

126 127 128 129 130 131

static int init_pass2(x264_t *);
static float rate_estimate_qscale( x264_t *h, int pict_type );

/* Terminology:
 * qp = h.264's quantizer
132
 * qscale = linearized quantizer = Lagrange multiplier
133 134 135 136 137 138 139 140 141 142
 */
static inline double qp2qscale(double qp)
{
    return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
}
static inline double qscale2qp(double qscale)
{
    return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
}

143 144 145 146
/* Texture bitrate is not quite inversely proportional to qscale,
 * probably due the the changing number of SKIP blocks.
 * MV bits level off at about qp<=12, because the lambda used
 * for motion estimation is constant there. */
147 148
static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
{
Loren Merritt's avatar
Loren Merritt committed
149
    if(qscale<0.1)
150
        qscale = 0.1;
151 152
    return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
           + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
153 154
}

155
/* There is no analytical inverse to the above formula. */
156
#if 0
157 158
static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
{
159
    if(bits<1.0)
160
        bits = 1.0;
161
    return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
162
}
163
#endif
164 165


166
int x264_ratecontrol_new( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
167
{
168
    x264_ratecontrol_t *rc;
169
    float bpp;
170
    int i;
171

172 173 174 175
    /* Needed(?) for 2 pass */
    x264_cpu_restore( h->param.cpu );

    h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
176 177
    memset(rc, 0, sizeof(*rc));

Måns Rullgård's avatar
Måns Rullgård committed
178 179 180 181 182 183
    /* FIXME: use integers */
    if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
        rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
    else
        rc->fps = 25.0;

184
    rc->gop_size = h->param.i_keyint_max;
185
    rc->bitrate = h->param.rc.i_bitrate * 1000;
186
    rc->nmb = h->mb.i_mb_count;
187

188 189 190 191
    rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );

192 193 194
    /* Currently there is no adaptive quant, and per-MB ratecontrol is used only in CBR. */
    h->mb.b_variable_qp = h->param.rc.b_cbr && !h->param.rc.b_stat_read;

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
    /* Init 1pass CBR algo */
    if( h->param.rc.b_cbr ){
        rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
        rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
        rc->rcbufrate = rc->bitrate / rc->fps;

        if(rc->buffer_size < rc->rcbufrate){
            x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
                     rc->buffer_size);
            rc->buffer_size = 0;
        }

        if(rc->buffer_size <= 0)
            rc->buffer_size = rc->bitrate / 2;

        if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
            x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
                     rc->buffer_fullness);
            rc->buffer_fullness = 0;
        }

        bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
        if(bpp <= 0.6)
            rc->init_qp = 31;
        else if(bpp <= 1.4)
            rc->init_qp = 25;
        else if(bpp <= 2.4)
            rc->init_qp = 20;
        else
            rc->init_qp = 10;
        rc->gop_qp = rc->init_qp;

        rc->bits_last_gop = 0;

        x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
                 rc->fps, rc->bitrate, rc->buffer_size);
231 232
    }

233

Loren Merritt's avatar
Loren Merritt committed
234
    rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
235
    rc->last_qscale = qp2qscale(26);
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
    for( i = 0; i < 5; i++ )
    {
        rc->last_qscale_for[i] = qp2qscale(26);
        rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
        rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
    }
#if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
    rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
    rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
#endif

    /* Load stat file and init 2pass algo */
    if( h->param.rc.b_stat_read )
    {
        int stats_size;
        char *p, *stats_in;
        FILE *stats_file;

        /* read 1st pass stats */
        assert( h->param.rc.psz_stat_in );
258
        stats_file = fopen( h->param.rc.psz_stat_in, "rb");
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
        if(!stats_file)
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
        // FIXME: error checking
        fseek(stats_file, 0, SEEK_END);
        stats_size = ftell(stats_file);
        fseek(stats_file, 0, SEEK_SET);
        stats_in = x264_malloc(stats_size+10);
        fread(stats_in, 1, stats_size, stats_file);
        fclose(stats_file);

        /* find number of pics */
        p = stats_in;
        for(i=-1; p; i++){
            p = strchr(p+1, ';');
        }
        i += h->param.i_bframe;
        rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
        memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
        rc->num_entries= i;

        /* init all to skipped p frames */
        for(i=0; i<rc->num_entries; i++){
            ratecontrol_entry_t *rce = &rc->entry[i];
285
            rce->pict_type = SLICE_TYPE_P;
286 287 288 289 290 291 292 293 294
            rce->qscale = rce->new_qscale = qp2qscale(20);
            rce->misc_bits = rc->nmb + 10;
            rce->new_qp = 0;
        }

        /* read stats */
        p = stats_in;
        for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
            ratecontrol_entry_t *rce;
295 296
            int frame_number;
            char pict_type;
297 298 299 300 301 302 303 304 305
            int e;
            char *next;
            float qp;

            next= strchr(p, ';');
            if(next){
                (*next)=0; //sscanf is unbelievably slow on looong strings
                next++;
            }
306
            e = sscanf(p, " in:%d ", &frame_number);
307

308 309 310
            assert(frame_number >= 0);
            assert(frame_number < rc->num_entries);
            rce = &rc->entry[frame_number];
311

312 313
            e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
                   &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
314
                   &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
315 316

            switch(pict_type){
317
                case 'I': rce->kept_as_ref = 1;
318 319
                case 'i': rce->pict_type = SLICE_TYPE_I; break;
                case 'P': rce->pict_type = SLICE_TYPE_P; break;
320 321
                case 'B': rce->kept_as_ref = 1;
                case 'b': rce->pict_type = SLICE_TYPE_B; break;
322 323
                default:  e = -1; break;
            }
324 325 326 327 328 329 330 331 332 333
            if(e != 10){
                x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
                return -1;
            }
            rce->qscale = qp2qscale(qp);
            p = next;
        }

        x264_free(stats_in);

334 335 336 337 338
        /* If using 2pass with constant quant, no need to run the bitrate allocation */
        if(h->param.rc.b_cbr)
        {
            if(init_pass2(h) < 0) return -1;
        }
339 340 341
    }

    /* Open output file */
342 343
    /* If input and output files are the same, output to a temp file
     * and move it to the real name only when it's complete */
344 345
    if( h->param.rc.b_stat_write )
    {
346 347 348
        rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
        strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
        strcat( rc->psz_stat_file_tmpname, ".temp" );
349 350

        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
351 352 353 354 355 356
        if( rc->p_stat_file_out == NULL )
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
    }
357 358

    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
359 360
}

361
void x264_ratecontrol_delete( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
362
{
363
    x264_ratecontrol_t *rc = h->rc;
364 365

    if( rc->p_stat_file_out )
366
    {
367
        fclose( rc->p_stat_file_out );
368 369 370 371 372 373 374
        if( h->i_frame >= rc->num_entries - h->param.i_bframe )
            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
            {
                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                          rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
            }
        x264_free( rc->psz_stat_file_tmpname );
375
    }
376 377
    if( rc->entry )
        x264_free(rc->entry);
Laurent Aimar's avatar
Laurent Aimar committed
378 379 380
    x264_free( rc );
}

Måns Rullgård's avatar
Måns Rullgård committed
381
void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
Laurent Aimar's avatar
Laurent Aimar committed
382
{
383 384 385 386 387 388 389 390
    x264_ratecontrol_t *rc = h->rc;
    int gframes, iframes, pframes, bframes;
    int minbits, maxbits;
    int gbits, fbits;
    int zn = 0;
    float kp;
    int gbuf;

391
    rc->slice_type = i_slice_type;
392 393 394

    x264_cpu_restore( h->param.cpu );

Måns Rullgård's avatar
Måns Rullgård committed
395 396
    rc->qp_force = i_force_qp;

397 398
    if( !h->param.rc.b_cbr )
    {
399
        int q;
Måns Rullgård's avatar
Måns Rullgård committed
400 401 402
        if( i_force_qp )
            q = i_force_qp - 1;
        else if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
403 404 405 406
            q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
        else
            q = rc->qp_constant[ i_slice_type ];
        rc->qpm = rc->qpa = rc->qp = q;
407 408 409
        return;
    }
    else if( h->param.rc.b_stat_read )
410 411
    {
        int frame = h->fenc->i_frame;
412
        ratecontrol_entry_t *rce;
413
        assert( frame >= 0 && frame < rc->num_entries );
414
        rce = &h->rc->entry[frame];
415 416

        rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
417 418
        rc->qpm = rc->qpa = rc->qp = rce->new_qp =
            (int)(qscale2qp(rce->new_qscale) + 0.5);
419 420
        return;
    }
421 422 423 424 425 426 427

    switch(i_slice_type){
    case SLICE_TYPE_I:
        gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
        rc->bits_gop = gbuf - rc->buffer_size / 2;

        if(!rc->mb && rc->pframes){
428
            int qp = rc->qp_avg_p / rc->pframes + 0.5;
429 430 431 432 433 434 435 436 437
#if 0 /* JM does this without explaining why */
            int gdq = (float) rc->gop_size / 15 + 0.5;
            if(gdq > 2)
                gdq = 2;
            qp -= gdq;
            if(qp > rc->qp_last_p - 2)
                qp--;
#endif
            qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
Loren Merritt's avatar
Loren Merritt committed
438
            qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
439
            rc->gop_qp = qp;
440
        } else if(rc->frames > 4){
441 442 443
            rc->gop_qp = rc->init_qp;
        }

444
        kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
445

446 447
        x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
                 gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
448

449 450 451 452 453 454 455
        rc->bits_last_gop = 0;
        rc->frames = 0;
        rc->pframes = 0;
        rc->qp_avg_p = 0;
        break;

    case SLICE_TYPE_P:
456
        kp = h->param.rc.f_pb_factor;
457 458 459 460 461 462 463
        break;

    case SLICE_TYPE_B:
        kp = 1.0;
        break;

    default:
464 465
        x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
                 i_slice_type);
466 467 468 469 470 471 472 473 474 475 476
        kp = 1.0;
        break;
    }

    gframes = rc->gop_size - rc->frames;
    iframes = gframes / rc->gop_size;
    pframes = gframes / (h->param.i_bframe + 1) - iframes;
    bframes = gframes - pframes - iframes;

    gbits = rc->bits_gop - rc->bits_last_gop;
    fbits = kp * gbits /
477 478
        (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
         h->param.rc.f_pb_factor * pframes + bframes);
479 480 481 482 483 484 485 486 487

    minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
    if(minbits < 0)
        minbits = 0;
    maxbits = rc->buffer_fullness;
    rc->fbits = x264_clip3(fbits, minbits, maxbits);

    if(i_slice_type == SLICE_TYPE_I){
        rc->qp = rc->gop_qp;
488
    } else if(rc->ncoeffs && rc->ufbits){
489 490 491 492 493 494 495 496 497 498
        int dqp, nonzc;

        nonzc = (rc->ncoeffs - rc->nzcoeffs);
        if(nonzc == 0)
            zn = rc->ncoeffs;
        else if(rc->fbits < INT_MAX / nonzc)
            zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
        else
            zn = 0;
        zn = x264_clip3(zn, 0, rc->ncoeffs);
499
        dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
500
            (zn - rc->nzcoeffs) / rc->nzcoeffs;
501
        dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
502
        rc->qp = (int)(rc->qpa + dqp + .5);
503 504
    }

505
    if(rc->fbits > 0.9 * maxbits)
506
        rc->qp += 2;
507 508
    else if(rc->fbits > 0.8 * maxbits)
        rc->qp += 1;
509 510
    else if(rc->fbits < 1.1 * minbits)
        rc->qp -= 2;
511 512
    else if(rc->fbits < 1.2 * minbits)
        rc->qp -= 1;
513

Måns Rullgård's avatar
Måns Rullgård committed
514 515 516 517 518 519
    if( i_force_qp > 0 ) {
        rc->qpm = rc->qpa = rc->qp = i_force_qp - 1;
    } else {
        rc->qp = rc->qpm =
            x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
    }
520

521 522
    x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
             rc->fbits, rc->qpm, zn, minbits, maxbits);
523 524 525 526 527 528 529

    rc->fbits -= rc->overhead;
    rc->ufbits = 0;
    rc->ncoeffs = 0;
    rc->nzcoeffs = 0;
    rc->mb = 0;
    rc->qps = 0;
Laurent Aimar's avatar
Laurent Aimar committed
530 531
}

532
void x264_ratecontrol_mb( x264_t *h, int bits )
Laurent Aimar's avatar
Laurent Aimar committed
533
{
534 535
    x264_ratecontrol_t *rc = h->rc;
    int rbits;
536 537
    int zn, enz, nonz;
    int rcoeffs;
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
    int dqp;
    int i;

    x264_cpu_restore( h->param.cpu );

    rc->qps += rc->qpm;
    rc->ufbits += bits;
    rc->mb++;

    for(i = 0; i < 16 + 8; i++)
        rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
    rc->ncoeffs += 16 * (16 + 8);

    if(rc->mb < rc->nmb / 16)
        return;
    else if(rc->mb == rc->nmb)
        return;
Måns Rullgård's avatar
Måns Rullgård committed
555 556
    else if(rc->qp_force > 0)
        return;
557

558
    rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
559 560 561 562
    rbits = rc->fbits - rc->ufbits;
/*     if(rbits < 0) */
/*      rbits = 0; */

563 564 565 566 567 568 569 570 571
/*     zn = (rc->nmb - rc->mb) * 16 * 24; */
    nonz = (rc->ncoeffs - rc->nzcoeffs);
    if(nonz == 0)
        zn = rcoeffs;
    else if(rc->ufbits && rbits < INT_MAX / nonz)
        zn = rcoeffs - rbits * nonz / rc->ufbits;
    else
        zn = 0;
    zn = x264_clip3(zn, 0, rcoeffs);
572
    enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
573
    dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
574 575 576 577
        (zn - enz) / enz;
    rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
    if(rbits <= 0)
        rc->qpm++;
578
    rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
Laurent Aimar's avatar
Laurent Aimar committed
579 580
}

581
int  x264_ratecontrol_qp( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
582
{
583 584
    return h->rc->qpm;
}
Laurent Aimar's avatar
Laurent Aimar committed
585

586 587 588 589
int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
{
    if( h->param.rc.b_stat_read )
    {
590 591 592 593 594
        if( frame_num >= h->rc->num_entries )
        {
            x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
            return X264_TYPE_P;
        }
595
        switch( h->rc->entry[frame_num].pict_type )
596 597
        {
            case SLICE_TYPE_I:
598
                return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
599 600

            case SLICE_TYPE_B:
601
                return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
602 603 604 605 606 607 608 609 610 611 612 613

            case SLICE_TYPE_P:
            default:
                return X264_TYPE_P;
        }
    }
    else
    {
        return X264_TYPE_AUTO;
    }
}

614 615 616
void x264_ratecontrol_end( x264_t *h, int bits )
{
    x264_ratecontrol_t *rc = h->rc;
Loren Merritt's avatar
Loren Merritt committed
617
    int i;
Laurent Aimar's avatar
Laurent Aimar committed
618

619 620
    x264_cpu_restore( h->param.cpu );

Loren Merritt's avatar
Loren Merritt committed
621 622 623 624 625
    h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
    h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
    for( i = B_DIRECT; i < B_8x8; i++ )
        h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];

626 627
    if( h->param.rc.b_stat_write )
    {
628
        char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
629 630
                    : rc->slice_type==SLICE_TYPE_P ? 'P'
                    : h->fenc->b_kept_as_ref ? 'B' : 'b';
631
        fprintf( rc->p_stat_file_out,
632
                 "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
633
                 h->fenc->i_frame, h->i_frame-1,
634
                 c_type, rc->qpa,
635 636 637
                 h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
                 h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
                 h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
Loren Merritt's avatar
Loren Merritt committed
638 639
                 h->stat.frame.i_mb_count_p,
                 h->stat.frame.i_mb_count_skip);
640 641 642
    }

    if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
643
        return;
Laurent Aimar's avatar
Laurent Aimar committed
644

645 646
    rc->buffer_fullness += rc->rcbufrate - bits;
    if(rc->buffer_fullness < 0){
647 648
        x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
                 rc->buffer_fullness);
649
        rc->buffer_fullness = 0;
Laurent Aimar's avatar
Laurent Aimar committed
650 651
    }

652
    rc->qpa = (float)rc->qps / rc->mb;
653 654 655 656
    if(rc->slice_type == SLICE_TYPE_P){
        rc->qp_avg_p += rc->qpa;
        rc->qp_last_p = rc->qpa;
        rc->pframes++;
657 658 659 660 661 662
    } else if(rc->slice_type == SLICE_TYPE_I){
        float err = (float) rc->ufbits / rc->fbits;
        if(err > 1.1)
            rc->gop_qp++;
        else if(err < 0.9)
            rc->gop_qp--;
Laurent Aimar's avatar
Laurent Aimar committed
663 664
    }

665 666
    rc->overhead = bits - rc->ufbits;

667
    x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
668 669
             bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
             rc->buffer_fullness);
670 671 672 673

    rc->bits_last_gop += bits;
    rc->frames++;
    rc->mb = 0;
Laurent Aimar's avatar
Laurent Aimar committed
674
}
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689

/****************************************************************************
 * 2 pass functions
 ***************************************************************************/
double x264_eval( char *s, double *const_value, const char **const_name,
                  double (**func1)(void *, double), const char **func1_name,
                  double (**func2)(void *, double, double), char **func2_name,
                  void *opaque );

/**
 * modifies the bitrate curve from pass1 for one frame
 */
static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
{
    x264_ratecontrol_t *rcc= h->rc;
690
    const int pict_type = rce->pict_type;
691
    double q;
692 693 694 695 696

    double const_values[]={
        rce->i_tex_bits * rce->qscale,
        rce->p_tex_bits * rce->qscale,
        (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
697
        rce->mv_bits * rce->qscale,
698 699 700 701 702 703 704 705 706 707 708 709
        (double)rce->i_count / rcc->nmb,
        (double)rce->p_count / rcc->nmb,
        (double)rce->s_count / rcc->nmb,
        rce->pict_type == SLICE_TYPE_I,
        rce->pict_type == SLICE_TYPE_P,
        rce->pict_type == SLICE_TYPE_B,
        h->param.rc.f_qcompress,
        rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
        rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
Loren Merritt's avatar
Loren Merritt committed
710
        rce->blurred_complexity,
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
        0
    };
    static const char *const_names[]={
        "iTex",
        "pTex",
        "tex",
        "mv",
        "iCount",
        "pCount",
        "sCount",
        "isI",
        "isP",
        "isB",
        "qComp",
        "avgIITex",
        "avgPITex",
        "avgPPTex",
        "avgBPTex",
        "avgTex",
730
        "blurCplx",
731 732 733
        NULL
    };
    static double (*func1[])(void *, double)={
734
//      (void *)bits2qscale,
735 736 737 738
        (void *)qscale2bits,
        NULL
    };
    static const char *func1_names[]={
739
//      "bits2qp",
740 741 742 743
        "qp2bits",
        NULL
    };

744 745
    q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
    q /= rate_factor;
746

Loren Merritt's avatar
Loren Merritt committed
747
    // avoid NaN's in the rc_eq
748 749 750 751
    if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
        q = rcc->last_qscale;
    else
        rcc->last_qscale = q;
752

753
    return q;
754 755 756 757 758
}

static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
{
    x264_ratecontrol_t *rcc = h->rc;
759
    const int pict_type = rce->pict_type;
760 761 762 763

    // force I/B quants as a function of P quants
    const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
    const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
764 765
    if( pict_type == SLICE_TYPE_I )
    {
766 767 768 769 770 771 772 773 774 775 776 777
        double iq = q;
        double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
        double ip_factor = fabs( h->param.rc.f_ip_factor );
        /* don't apply ip_factor if the following frame is also I */
        if( rcc->accum_p_norm <= 0 )
            q = iq;
        else if( h->param.rc.f_ip_factor < 0 )
            q = iq / ip_factor;
        else if( rcc->accum_p_norm >= 1 )
            q = pq / ip_factor;
        else
            q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
778 779 780 781 782
    }
    else if( pict_type == SLICE_TYPE_B )
    {
        if( h->param.rc.f_pb_factor > 0 )
            q = last_non_b_q;
783 784
        if( !rce->kept_as_ref )
            q *= fabs( h->param.rc.f_pb_factor );
785 786 787 788 789
    }
    else if( pict_type == SLICE_TYPE_P
             && rcc->last_non_b_pict_type == SLICE_TYPE_P
             && rce->i_tex_bits + rce->p_tex_bits == 0 )
    {
790
        q = last_p_q;
791
    }
792 793

    /* last qscale / qdiff stuff */
794 795 796 797
    /* TODO take intro account whether the I-frame is a scene cut
     * or just a seek point */
    if(rcc->last_non_b_pict_type==pict_type
       && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
798 799
    {
        double last_q = rcc->last_qscale_for[pict_type];
Loren Merritt's avatar
Loren Merritt committed
800 801
        double max_qscale = last_q * rcc->lstep;
        double min_qscale = last_q / rcc->lstep;
802 803 804 805 806

        if     (q > max_qscale) q = max_qscale;
        else if(q < min_qscale) q = min_qscale;
    }

Loren Merritt's avatar
Loren Merritt committed
807
    rcc->last_qscale_for[pict_type] = q;
808 809
    if(pict_type!=SLICE_TYPE_B)
        rcc->last_non_b_pict_type = pict_type;
810 811
    if(pict_type==SLICE_TYPE_I)
    {
812
        rcc->last_accum_p_norm = rcc->accum_p_norm;
813 814 815 816 817 818 819 820 821
        rcc->accum_p_norm = 0;
        rcc->accum_p_qp = 0;
    }
    if(pict_type==SLICE_TYPE_P)
    {
        float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
        rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
        rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
    }
822 823 824
    return q;
}

Loren Merritt's avatar
Loren Merritt committed
825 826
// clip a qscale to between lmin and lmax
static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
827
{
828 829
    double lmin = h->rc->lmin[rce->pict_type];
    double lmax = h->rc->lmax[rce->pict_type];
830

Loren Merritt's avatar
Loren Merritt committed
831 832
    if(lmin==lmax){
        return lmin;
833 834 835
    }else{
        double min2 = log(lmin);
        double max2 = log(lmax);
Loren Merritt's avatar
Loren Merritt committed
836 837
        q = (log(q) - min2)/(max2-min2) - 0.5;
        q = 1.0/(1.0 + exp(-4*q));
838
        q = q*(max2-min2) + min2;
Loren Merritt's avatar
Loren Merritt committed
839
        return exp(q);
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
    }
}

// update qscale for 1 frame based on actual bits used so far
static float rate_estimate_qscale(x264_t *h, int pict_type)
{
    float q;
    float br_compensation;
    double diff;
    int picture_number = h->fenc->i_frame;
    x264_ratecontrol_t *rcc = h->rc;
    ratecontrol_entry_t *rce;
    double lmin = rcc->lmin[pict_type];
    double lmax = rcc->lmax[pict_type];
    int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
                          + h->stat.i_slice_size[SLICE_TYPE_P]
                          + h->stat.i_slice_size[SLICE_TYPE_B]);

//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);

    rce = &rcc->entry[picture_number];

862
    assert(pict_type == rce->pict_type);
863

864 865
    if(rce->pict_type == SLICE_TYPE_B)
    {
Loren Merritt's avatar
Loren Merritt committed
866
        if(h->fenc->b_kept_as_ref)
867 868 869
            return rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
        else
            return rcc->last_qscale * h->param.rc.f_pb_factor;
870 871 872 873 874 875 876 877 878 879 880 881
    }
    else
    {
        diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
        br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
        br_compensation = x264_clip3f(br_compensation, .5, 2);

        q = rce->new_qscale / br_compensation;
        q = x264_clip3f(q, lmin, lmax);
        rcc->last_qscale = q;
        return q;
    }
882 883 884 885 886 887 888 889 890
}

static int init_pass2( x264_t *h )
{
    x264_ratecontrol_t *rcc = h->rc;
    uint64_t all_const_bits = 0;
    uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
    double rate_factor, step, step_mult;
    double qblur = h->param.rc.f_qblur;
Loren Merritt's avatar
Loren Merritt committed
891
    double cplxblur = h->param.rc.f_complexity_blur;
892 893 894 895 896 897 898 899
    const int filter_size = (int)(qblur*4) | 1;
    double expected_bits;
    double *qscale, *blurred_qscale;
    int i;

    /* find total/average complexity & const_bits */
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
900
        all_const_bits += rce->misc_bits;
901 902
        rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
        rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
903
        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
904
        rcc->frame_count[rce->pict_type] ++;
905 906 907 908 909 910 911 912 913
    }

    if( all_available_bits < all_const_bits)
    {
        x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
                 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
        return -1;
    }

Loren Merritt's avatar
Loren Merritt committed
914 915
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
916
        double weight_sum = 0;
Loren Merritt's avatar
Loren Merritt committed
917
        double cplx_sum = 0;
918
        double weight = 1.0;
Loren Merritt's avatar
Loren Merritt committed
919 920
        int j;
        /* weighted average of cplx of future frames */
921
        for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
Loren Merritt's avatar
Loren Merritt committed
922
            ratecontrol_entry_t *rcj = &rcc->entry[i+j];
923
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
924 925 926
            if(weight < .0001)
                break;
            weight_sum += weight;
927
            cplx_sum += weight * qscale2bits(rcj, 1);
Loren Merritt's avatar
Loren Merritt committed
928 929
        }
        /* weighted average of cplx of past frames */
930
        weight = 1.0;
931
        for(j=0; j<=cplxblur*2 && j<=i; j++){
Loren Merritt's avatar
Loren Merritt committed
932 933
            ratecontrol_entry_t *rcj = &rcc->entry[i-j];
            weight_sum += weight;
934
            cplx_sum += weight * qscale2bits(rcj, 1);
935
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
936 937 938 939 940 941
            if(weight < .0001)
                break;
        }
        rce->blurred_complexity = cplx_sum / weight_sum;
    }

942 943 944 945 946 947
    qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    if(filter_size > 1)
        blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    else
        blurred_qscale = qscale;

Loren Merritt's avatar
Loren Merritt committed
948
    expected_bits = 1;
949 950 951 952 953
    for(i=0; i<rcc->num_entries; i++)
        expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
    step_mult = all_available_bits / expected_bits;

    rate_factor = 0;
Loren Merritt's avatar
Loren Merritt committed
954
    for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
955 956 957
        expected_bits = 0;
        rate_factor += step;

958 959 960
        rcc->last_non_b_pict_type = -1;
        rcc->last_accum_p_norm = 1;

961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
        /* find qscale */
        for(i=0; i<rcc->num_entries; i++){
            qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
        }

        /* fixed I/B QP relative to P mode */
        for(i=rcc->num_entries-1; i>=0; i--){
            qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
            assert(qscale[i] >= 0);
        }

        /* smooth curve */
        if(filter_size > 1){
            assert(filter_size%2==1);
            for(i=0; i<rcc->num_entries; i++){
                ratecontrol_entry_t *rce = &rcc->entry[i];
                int j;
                double q=0.0, sum=0.0;

                for(j=0; j<filter_size; j++){
                    int index = i+j-filter_size/2;
                    double d = index-i;
                    double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
                    if(index < 0 || index >= rcc->num_entries) continue;
Loren Merritt's avatar