ratecontrol.c 33.7 KB
Newer Older
1
/***************************************************-*- coding: iso-8859-1 -*-
Laurent Aimar's avatar
Laurent Aimar committed
2
3
4
5
6
 * ratecontrol.c: h264 encoder library (Rate Control)
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
 *
7
 * Authors: Mns Rullgrd <mru@mru.ath.cx>
8
 * 2 pass code: Michael Niedermayer <michaelni@gmx.at>
9
 *              Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

26
#define _ISOC99_SOURCE
27
#undef NDEBUG // always check asserts, the speed effect is far too small to disable them
Laurent Aimar's avatar
Laurent Aimar committed
28
29
30
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
31
32
#include <math.h>
#include <limits.h>
33
#include <assert.h>
Laurent Aimar's avatar
Laurent Aimar committed
34

35
36
37
#include "common/common.h"
#include "common/cpu.h"
#include "common/macroblock.h"
Laurent Aimar's avatar
Laurent Aimar committed
38
39
#include "ratecontrol.h"

Eric Petit's avatar
Eric Petit committed
40
41
42
#ifdef SYS_MACOSX
#define exp2f(x) ( (float) exp2( (x) ) )
#endif
43
#if defined(SYS_FREEBSD) || defined(SYS_BEOS)
44
45
#define exp2f(x) powf( 2, (x) )
#endif
46
47
48
#ifdef _MSC_VER
#define exp2f(x) pow( 2, (x) )
#endif
49
50
51
#ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
#define rename(src,dst) (unlink(dst), rename(src,dst))
#endif
52
53
54
55

typedef struct
{
    int pict_type;
56
    int kept_as_ref;
57
58
59
60
61
62
63
64
65
66
67
    float qscale;
    int mv_bits;
    int i_tex_bits;
    int p_tex_bits;
    int misc_bits;
    uint64_t expected_bits;
    float new_qscale;
    int new_qp;
    int i_count;
    int p_count;
    int s_count;
Loren Merritt's avatar
Loren Merritt committed
68
    float blurred_complexity;
69
70
} ratecontrol_entry_t;

71
72
73
struct x264_ratecontrol_t
{
    /* constants */
74
    double fps;
75
76
    int gop_size;
    int bitrate;
77
    int nmb;                    /* number of macroblocks in a frame */
78
79
80
    int buffer_size;
    int rcbufrate;
    int init_qp;
81
    int qp_constant[5];
82

83
    /* 1st pass stuff */
84
85
86
87
88
89
90
91
92
93
    int gop_qp;
    int buffer_fullness;
    int frames;                 /* frames in current gop */
    int pframes;
    int slice_type;
    int mb;                     /* MBs processed in current frame */
    int bits_gop;               /* allocated bits current gop */
    int bits_last_gop;          /* bits consumed in gop */
    int qp;                     /* qp for current frame */
    int qpm;                    /* qp for next MB */
94
    float qpa;                  /* average qp for last frame */
95
    int qps;
96
97
    float qp_avg_p;             /* average QP for P frames */
    float qp_last_p;
98
99
100
101
102
    int fbits;                  /* bits allocated for current frame */
    int ufbits;                 /* bits used for current frame */
    int nzcoeffs;               /* # of 0-quantized coefficients */
    int ncoeffs;                /* total # of coefficients */
    int overhead;
Måns Rullgård's avatar
Måns Rullgård committed
103
    int qp_force;
104
105
106

    /* 2pass stuff */
    FILE *p_stat_file_out;
107
    char *psz_stat_file_tmpname;
108
109

    int num_entries;            /* number of ratecontrol_entry_ts */
Loren Merritt's avatar
Loren Merritt committed
110
    ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
111
112
113
    double last_qscale;
    double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
    int last_non_b_pict_type;
114
115
    double accum_p_qp;          /* for determining I-frame quant */
    double accum_p_norm;
116
    double last_accum_p_norm;
117
    double lmin[5];             /* min qscale by frame type */
118
    double lmax[5];
Loren Merritt's avatar
Loren Merritt committed
119
    double lstep;               /* max change (multiply) in qscale per frame */
120
121
122
123
    double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
    double p_cplx_sum[5];
    double mv_bits_sum[5];
    int frame_count[5];         /* number of frames of each type */
124
};
Laurent Aimar's avatar
Laurent Aimar committed
125

126
127
128
129
130
131

static int init_pass2(x264_t *);
static float rate_estimate_qscale( x264_t *h, int pict_type );

/* Terminology:
 * qp = h.264's quantizer
132
 * qscale = linearized quantizer = Lagrange multiplier
133
134
135
136
137
138
139
140
141
142
 */
static inline double qp2qscale(double qp)
{
    return 0.85 * pow(2.0, ( qp - 12.0 ) / 6.0);
}
static inline double qscale2qp(double qscale)
{
    return 12.0 + 6.0 * log(qscale/0.85) / log(2.0);
}

143
144
145
146
/* Texture bitrate is not quite inversely proportional to qscale,
 * probably due the the changing number of SKIP blocks.
 * MV bits level off at about qp<=12, because the lambda used
 * for motion estimation is constant there. */
147
148
static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
{
Loren Merritt's avatar
Loren Merritt committed
149
    if(qscale<0.1)
150
        qscale = 0.1;
151
152
    return (rce->i_tex_bits + rce->p_tex_bits + .1) * pow( rce->qscale / qscale, 1.1 )
           + rce->mv_bits * pow( X264_MAX(rce->qscale, 12) / X264_MAX(qscale, 12), 0.5 );
153
154
}

155
/* There is no analytical inverse to the above formula. */
156
#if 0
157
158
static inline double bits2qscale(ratecontrol_entry_t *rce, double bits)
{
159
    if(bits<1.0)
160
        bits = 1.0;
161
    return (rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits + .1) * rce->qscale / bits;
162
}
163
#endif
164
165


166
int x264_ratecontrol_new( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
167
{
168
    x264_ratecontrol_t *rc;
169
    float bpp;
170
    int i;
171

172
173
174
175
    /* Needed(?) for 2 pass */
    x264_cpu_restore( h->param.cpu );

    h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
176
177
    memset(rc, 0, sizeof(*rc));

Måns Rullgård's avatar
Måns Rullgård committed
178
179
180
181
182
183
    /* FIXME: use integers */
    if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
        rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
    else
        rc->fps = 25.0;

184
    rc->gop_size = h->param.i_keyint_max;
185
    rc->bitrate = h->param.rc.i_bitrate * 1000;
186
    rc->nmb = h->mb.i_mb_count;
187

188
189
190
191
    rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );

192
193
194
    /* Currently there is no adaptive quant, and per-MB ratecontrol is used only in CBR. */
    h->mb.b_variable_qp = h->param.rc.b_cbr && !h->param.rc.b_stat_read;

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    /* Init 1pass CBR algo */
    if( h->param.rc.b_cbr ){
        rc->buffer_size = h->param.rc.i_rc_buffer_size * 1000;
        rc->buffer_fullness = h->param.rc.i_rc_init_buffer;
        rc->rcbufrate = rc->bitrate / rc->fps;

        if(rc->buffer_size < rc->rcbufrate){
            x264_log(h, X264_LOG_WARNING, "rc buffer size %i too small\n",
                     rc->buffer_size);
            rc->buffer_size = 0;
        }

        if(rc->buffer_size <= 0)
            rc->buffer_size = rc->bitrate / 2;

        if(rc->buffer_fullness > rc->buffer_size || rc->buffer_fullness < 0){
            x264_log(h, X264_LOG_WARNING, "invalid initial buffer fullness %i\n",
                     rc->buffer_fullness);
            rc->buffer_fullness = 0;
        }

        bpp = rc->bitrate / (rc->fps * h->param.i_width * h->param.i_height);
        if(bpp <= 0.6)
            rc->init_qp = 31;
        else if(bpp <= 1.4)
            rc->init_qp = 25;
        else if(bpp <= 2.4)
            rc->init_qp = 20;
        else
            rc->init_qp = 10;
        rc->gop_qp = rc->init_qp;

        rc->bits_last_gop = 0;

        x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
                 rc->fps, rc->bitrate, rc->buffer_size);
231
232
    }

233

Loren Merritt's avatar
Loren Merritt committed
234
    rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0);
235
    rc->last_qscale = qp2qscale(26);
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
    for( i = 0; i < 5; i++ )
    {
        rc->last_qscale_for[i] = qp2qscale(26);
        rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
        rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
    }
#if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
    rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
    rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
    rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
#endif

    /* Load stat file and init 2pass algo */
    if( h->param.rc.b_stat_read )
    {
        int stats_size;
        char *p, *stats_in;
        FILE *stats_file;

        /* read 1st pass stats */
        assert( h->param.rc.psz_stat_in );
258
        stats_file = fopen( h->param.rc.psz_stat_in, "rb");
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
        if(!stats_file)
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
        // FIXME: error checking
        fseek(stats_file, 0, SEEK_END);
        stats_size = ftell(stats_file);
        fseek(stats_file, 0, SEEK_SET);
        stats_in = x264_malloc(stats_size+10);
        fread(stats_in, 1, stats_size, stats_file);
        fclose(stats_file);

        /* find number of pics */
        p = stats_in;
        for(i=-1; p; i++){
            p = strchr(p+1, ';');
        }
        i += h->param.i_bframe;
        rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
        memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
        rc->num_entries= i;

        /* init all to skipped p frames */
        for(i=0; i<rc->num_entries; i++){
            ratecontrol_entry_t *rce = &rc->entry[i];
285
            rce->pict_type = SLICE_TYPE_P;
286
287
288
289
290
291
292
293
294
            rce->qscale = rce->new_qscale = qp2qscale(20);
            rce->misc_bits = rc->nmb + 10;
            rce->new_qp = 0;
        }

        /* read stats */
        p = stats_in;
        for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
            ratecontrol_entry_t *rce;
295
296
            int frame_number;
            char pict_type;
297
298
299
300
301
302
303
304
305
            int e;
            char *next;
            float qp;

            next= strchr(p, ';');
            if(next){
                (*next)=0; //sscanf is unbelievably slow on looong strings
                next++;
            }
306
            e = sscanf(p, " in:%d ", &frame_number);
307

308
309
310
            assert(frame_number >= 0);
            assert(frame_number < rc->num_entries);
            rce = &rc->entry[frame_number];
311

312
313
            e += sscanf(p, " in:%*d out:%*d type:%c q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
                   &pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
314
                   &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
315
316

            switch(pict_type){
317
                case 'I': rce->kept_as_ref = 1;
318
319
                case 'i': rce->pict_type = SLICE_TYPE_I; break;
                case 'P': rce->pict_type = SLICE_TYPE_P; break;
320
321
                case 'B': rce->kept_as_ref = 1;
                case 'b': rce->pict_type = SLICE_TYPE_B; break;
322
323
                default:  e = -1; break;
            }
324
325
326
327
328
329
330
331
332
333
            if(e != 10){
                x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
                return -1;
            }
            rce->qscale = qp2qscale(qp);
            p = next;
        }

        x264_free(stats_in);

334
335
336
337
338
        /* If using 2pass with constant quant, no need to run the bitrate allocation */
        if(h->param.rc.b_cbr)
        {
            if(init_pass2(h) < 0) return -1;
        }
339
340
341
    }

    /* Open output file */
342
343
    /* If input and output files are the same, output to a temp file
     * and move it to the real name only when it's complete */
344
345
    if( h->param.rc.b_stat_write )
    {
346
347
348
        rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
        strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
        strcat( rc->psz_stat_file_tmpname, ".temp" );
349
350

        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
351
352
353
354
355
356
        if( rc->p_stat_file_out == NULL )
        {
            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
            return -1;
        }
    }
357
358

    return 0;
Laurent Aimar's avatar
Laurent Aimar committed
359
360
}

361
void x264_ratecontrol_delete( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
362
{
363
    x264_ratecontrol_t *rc = h->rc;
364
365

    if( rc->p_stat_file_out )
366
    {
367
        fclose( rc->p_stat_file_out );
368
369
370
371
372
373
374
        if( h->i_frame >= rc->num_entries - h->param.i_bframe )
            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
            {
                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                          rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
            }
        x264_free( rc->psz_stat_file_tmpname );
375
    }
376
377
    if( rc->entry )
        x264_free(rc->entry);
Laurent Aimar's avatar
Laurent Aimar committed
378
379
380
    x264_free( rc );
}

Måns Rullgård's avatar
Måns Rullgård committed
381
void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp )
Laurent Aimar's avatar
Laurent Aimar committed
382
{
383
384
385
386
387
388
389
390
    x264_ratecontrol_t *rc = h->rc;
    int gframes, iframes, pframes, bframes;
    int minbits, maxbits;
    int gbits, fbits;
    int zn = 0;
    float kp;
    int gbuf;

391
    rc->slice_type = i_slice_type;
392
393
394

    x264_cpu_restore( h->param.cpu );

Måns Rullgård's avatar
Måns Rullgård committed
395
396
    rc->qp_force = i_force_qp;

397
398
    if( !h->param.rc.b_cbr )
    {
399
        int q;
Måns Rullgård's avatar
Måns Rullgård committed
400
401
402
        if( i_force_qp )
            q = i_force_qp - 1;
        else if( i_slice_type == SLICE_TYPE_B && h->fdec->b_kept_as_ref )
403
404
405
406
            q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2;
        else
            q = rc->qp_constant[ i_slice_type ];
        rc->qpm = rc->qpa = rc->qp = q;
407
408
409
        return;
    }
    else if( h->param.rc.b_stat_read )
410
411
    {
        int frame = h->fenc->i_frame;
412
        ratecontrol_entry_t *rce;
413
        assert( frame >= 0 && frame < rc->num_entries );
414
        rce = &h->rc->entry[frame];
415
416

        rce->new_qscale = rate_estimate_qscale( h, i_slice_type );
417
418
        rc->qpm = rc->qpa = rc->qp = rce->new_qp =
            (int)(qscale2qp(rce->new_qscale) + 0.5);
419
420
        return;
    }
421
422
423
424
425
426
427

    switch(i_slice_type){
    case SLICE_TYPE_I:
        gbuf = rc->buffer_fullness + (rc->gop_size-1) * rc->rcbufrate;
        rc->bits_gop = gbuf - rc->buffer_size / 2;

        if(!rc->mb && rc->pframes){
428
            int qp = rc->qp_avg_p / rc->pframes + 0.5;
429
430
431
432
433
434
435
436
437
#if 0 /* JM does this without explaining why */
            int gdq = (float) rc->gop_size / 15 + 0.5;
            if(gdq > 2)
                gdq = 2;
            qp -= gdq;
            if(qp > rc->qp_last_p - 2)
                qp--;
#endif
            qp = x264_clip3(qp, rc->gop_qp - 4, rc->gop_qp + 4);
Loren Merritt's avatar
Loren Merritt committed
438
            qp = x264_clip3(qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
439
            rc->gop_qp = qp;
440
        } else if(rc->frames > 4){
441
442
443
            rc->gop_qp = rc->init_qp;
        }

444
        kp = h->param.rc.f_ip_factor * h->param.rc.f_pb_factor;
445

446
447
        x264_log(h, X264_LOG_DEBUG,"gbuf=%i bits_gop=%i frames=%i gop_qp=%i\n",
                 gbuf, rc->bits_gop, rc->frames, rc->gop_qp);
448

449
450
451
452
453
454
455
        rc->bits_last_gop = 0;
        rc->frames = 0;
        rc->pframes = 0;
        rc->qp_avg_p = 0;
        break;

    case SLICE_TYPE_P:
456
        kp = h->param.rc.f_pb_factor;
457
458
459
460
461
462
463
        break;

    case SLICE_TYPE_B:
        kp = 1.0;
        break;

    default:
464
465
        x264_log(h, X264_LOG_WARNING, "ratecontrol: unknown slice type %i\n",
                 i_slice_type);
466
467
468
469
470
471
472
473
474
475
476
        kp = 1.0;
        break;
    }

    gframes = rc->gop_size - rc->frames;
    iframes = gframes / rc->gop_size;
    pframes = gframes / (h->param.i_bframe + 1) - iframes;
    bframes = gframes - pframes - iframes;

    gbits = rc->bits_gop - rc->bits_last_gop;
    fbits = kp * gbits /
477
478
        (h->param.rc.f_ip_factor * h->param.rc.f_pb_factor * iframes +
         h->param.rc.f_pb_factor * pframes + bframes);
479
480
481
482
483
484
485
486
487

    minbits = rc->buffer_fullness + rc->rcbufrate - rc->buffer_size;
    if(minbits < 0)
        minbits = 0;
    maxbits = rc->buffer_fullness;
    rc->fbits = x264_clip3(fbits, minbits, maxbits);

    if(i_slice_type == SLICE_TYPE_I){
        rc->qp = rc->gop_qp;
488
    } else if(rc->ncoeffs && rc->ufbits){
489
490
491
492
493
494
495
496
497
498
        int dqp, nonzc;

        nonzc = (rc->ncoeffs - rc->nzcoeffs);
        if(nonzc == 0)
            zn = rc->ncoeffs;
        else if(rc->fbits < INT_MAX / nonzc)
            zn = rc->ncoeffs - rc->fbits * nonzc / rc->ufbits;
        else
            zn = 0;
        zn = x264_clip3(zn, 0, rc->ncoeffs);
499
        dqp = h->param.rc.i_rc_sens * exp2f(rc->qpa / 6) *
500
            (zn - rc->nzcoeffs) / rc->nzcoeffs;
501
        dqp = x264_clip3(dqp, -h->param.rc.i_qp_step, h->param.rc.i_qp_step);
502
        rc->qp = (int)(rc->qpa + dqp + .5);
503
504
    }

505
    if(rc->fbits > 0.9 * maxbits)
506
        rc->qp += 2;
507
508
    else if(rc->fbits > 0.8 * maxbits)
        rc->qp += 1;
509
510
    else if(rc->fbits < 1.1 * minbits)
        rc->qp -= 2;
511
512
    else if(rc->fbits < 1.2 * minbits)
        rc->qp -= 1;
513

Måns Rullgård's avatar
Måns Rullgård committed
514
515
516
517
518
519
    if( i_force_qp > 0 ) {
        rc->qpm = rc->qpa = rc->qp = i_force_qp - 1;
    } else {
        rc->qp = rc->qpm =
            x264_clip3(rc->qp, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
    }
520

521
522
    x264_log(h, X264_LOG_DEBUG, "fbits=%i, qp=%i, z=%i, min=%i, max=%i\n",
             rc->fbits, rc->qpm, zn, minbits, maxbits);
523
524
525
526
527
528
529

    rc->fbits -= rc->overhead;
    rc->ufbits = 0;
    rc->ncoeffs = 0;
    rc->nzcoeffs = 0;
    rc->mb = 0;
    rc->qps = 0;
Laurent Aimar's avatar
Laurent Aimar committed
530
531
}

532
void x264_ratecontrol_mb( x264_t *h, int bits )
Laurent Aimar's avatar
Laurent Aimar committed
533
{
534
535
    x264_ratecontrol_t *rc = h->rc;
    int rbits;
536
537
    int zn, enz, nonz;
    int rcoeffs;
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
    int dqp;
    int i;

    x264_cpu_restore( h->param.cpu );

    rc->qps += rc->qpm;
    rc->ufbits += bits;
    rc->mb++;

    for(i = 0; i < 16 + 8; i++)
        rc->nzcoeffs += 16 - h->mb.cache.non_zero_count[x264_scan8[i]];
    rc->ncoeffs += 16 * (16 + 8);

    if(rc->mb < rc->nmb / 16)
        return;
    else if(rc->mb == rc->nmb)
        return;
Måns Rullgård's avatar
Måns Rullgård committed
555
556
    else if(rc->qp_force > 0)
        return;
557

558
    rcoeffs = (rc->nmb - rc->mb) * 16 * 24;
559
560
561
562
    rbits = rc->fbits - rc->ufbits;
/*     if(rbits < 0) */
/*      rbits = 0; */

563
564
565
566
567
568
569
570
571
/*     zn = (rc->nmb - rc->mb) * 16 * 24; */
    nonz = (rc->ncoeffs - rc->nzcoeffs);
    if(nonz == 0)
        zn = rcoeffs;
    else if(rc->ufbits && rbits < INT_MAX / nonz)
        zn = rcoeffs - rbits * nonz / rc->ufbits;
    else
        zn = 0;
    zn = x264_clip3(zn, 0, rcoeffs);
572
    enz = rc->nzcoeffs * (rc->nmb - rc->mb) / rc->mb;
573
    dqp = (float) 2*h->param.rc.i_rc_sens * exp2f((float) rc->qps / rc->mb / 6) *
574
575
576
577
        (zn - enz) / enz;
    rc->qpm = x264_clip3(rc->qpm + dqp, rc->qp - 3, rc->qp + 3);
    if(rbits <= 0)
        rc->qpm++;
578
    rc->qpm = x264_clip3(rc->qpm, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
Laurent Aimar's avatar
Laurent Aimar committed
579
580
}

581
int  x264_ratecontrol_qp( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
582
{
583
584
    return h->rc->qpm;
}
Laurent Aimar's avatar
Laurent Aimar committed
585

586
587
588
589
int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
{
    if( h->param.rc.b_stat_read )
    {
590
591
592
593
594
        if( frame_num >= h->rc->num_entries )
        {
            x264_log(h, X264_LOG_ERROR, "More input frames than in the 1st pass\n");
            return X264_TYPE_P;
        }
595
        switch( h->rc->entry[frame_num].pict_type )
596
597
        {
            case SLICE_TYPE_I:
598
                return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
599
600

            case SLICE_TYPE_B:
601
                return h->rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
602
603
604
605
606
607
608
609
610
611
612
613

            case SLICE_TYPE_P:
            default:
                return X264_TYPE_P;
        }
    }
    else
    {
        return X264_TYPE_AUTO;
    }
}

614
615
616
void x264_ratecontrol_end( x264_t *h, int bits )
{
    x264_ratecontrol_t *rc = h->rc;
Loren Merritt's avatar
Loren Merritt committed
617
    int i;
Laurent Aimar's avatar
Laurent Aimar committed
618

619
620
    x264_cpu_restore( h->param.cpu );

Loren Merritt's avatar
Loren Merritt committed
621
622
623
624
625
    h->stat.frame.i_mb_count_skip = h->stat.frame.i_mb_count[P_SKIP] + h->stat.frame.i_mb_count[B_SKIP];
    h->stat.frame.i_mb_count_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
    for( i = B_DIRECT; i < B_8x8; i++ )
        h->stat.frame.i_mb_count_p += h->stat.frame.i_mb_count[i];

626
627
    if( h->param.rc.b_stat_write )
    {
628
        char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i')
629
630
                    : rc->slice_type==SLICE_TYPE_P ? 'P'
                    : h->fenc->b_kept_as_ref ? 'B' : 'b';
631
        fprintf( rc->p_stat_file_out,
632
                 "in:%d out:%d type:%c q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
633
                 h->fenc->i_frame, h->i_frame-1,
634
                 c_type, rc->qpa,
635
636
637
                 h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits,
                 h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits,
                 h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16],
Loren Merritt's avatar
Loren Merritt committed
638
639
                 h->stat.frame.i_mb_count_p,
                 h->stat.frame.i_mb_count_skip);
640
641
642
    }

    if( !h->param.rc.b_cbr || h->param.rc.b_stat_read )
643
        return;
Laurent Aimar's avatar
Laurent Aimar committed
644

645
646
    rc->buffer_fullness += rc->rcbufrate - bits;
    if(rc->buffer_fullness < 0){
647
648
        x264_log(h, X264_LOG_WARNING, "buffer underflow %i\n",
                 rc->buffer_fullness);
649
        rc->buffer_fullness = 0;
Laurent Aimar's avatar
Laurent Aimar committed
650
651
    }

652
    rc->qpa = (float)rc->qps / rc->mb;
653
654
655
656
    if(rc->slice_type == SLICE_TYPE_P){
        rc->qp_avg_p += rc->qpa;
        rc->qp_last_p = rc->qpa;
        rc->pframes++;
657
658
659
660
661
662
    } else if(rc->slice_type == SLICE_TYPE_I){
        float err = (float) rc->ufbits / rc->fbits;
        if(err > 1.1)
            rc->gop_qp++;
        else if(err < 0.9)
            rc->gop_qp--;
Laurent Aimar's avatar
Laurent Aimar committed
663
664
    }

665
666
    rc->overhead = bits - rc->ufbits;

667
    x264_log(h, X264_LOG_DEBUG, "bits=%i, qp=%.1f, z=%i, zr=%6.3f, buf=%i\n",
668
669
             bits, rc->qpa, rc->nzcoeffs, (float) rc->nzcoeffs / rc->ncoeffs,
             rc->buffer_fullness);
670
671
672
673

    rc->bits_last_gop += bits;
    rc->frames++;
    rc->mb = 0;
Laurent Aimar's avatar
Laurent Aimar committed
674
}
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689

/****************************************************************************
 * 2 pass functions
 ***************************************************************************/
double x264_eval( char *s, double *const_value, const char **const_name,
                  double (**func1)(void *, double), const char **func1_name,
                  double (**func2)(void *, double, double), char **func2_name,
                  void *opaque );

/**
 * modifies the bitrate curve from pass1 for one frame
 */
static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor)
{
    x264_ratecontrol_t *rcc= h->rc;
690
    const int pict_type = rce->pict_type;
691
    double q;
692
693
694
695
696

    double const_values[]={
        rce->i_tex_bits * rce->qscale,
        rce->p_tex_bits * rce->qscale,
        (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
697
        rce->mv_bits * rce->qscale,
698
699
700
701
702
703
704
705
706
707
708
709
        (double)rce->i_count / rcc->nmb,
        (double)rce->p_count / rcc->nmb,
        (double)rce->s_count / rcc->nmb,
        rce->pict_type == SLICE_TYPE_I,
        rce->pict_type == SLICE_TYPE_P,
        rce->pict_type == SLICE_TYPE_B,
        h->param.rc.f_qcompress,
        rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
        rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
        rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
Loren Merritt's avatar
Loren Merritt committed
710
        rce->blurred_complexity,
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
        0
    };
    static const char *const_names[]={
        "iTex",
        "pTex",
        "tex",
        "mv",
        "iCount",
        "pCount",
        "sCount",
        "isI",
        "isP",
        "isB",
        "qComp",
        "avgIITex",
        "avgPITex",
        "avgPPTex",
        "avgBPTex",
        "avgTex",
730
        "blurCplx",
731
732
733
        NULL
    };
    static double (*func1[])(void *, double)={
734
//      (void *)bits2qscale,
735
736
737
738
        (void *)qscale2bits,
        NULL
    };
    static const char *func1_names[]={
739
//      "bits2qp",
740
741
742
743
        "qp2bits",
        NULL
    };

744
745
    q = x264_eval((char*)h->param.rc.psz_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
    q /= rate_factor;
746

Loren Merritt's avatar
Loren Merritt committed
747
    // avoid NaN's in the rc_eq
748
749
750
751
    if(q != q || rce->i_tex_bits + rce->p_tex_bits + rce->mv_bits == 0)
        q = rcc->last_qscale;
    else
        rcc->last_qscale = q;
752

753
    return q;
754
755
756
757
758
}

static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
{
    x264_ratecontrol_t *rcc = h->rc;
759
    const int pict_type = rce->pict_type;
760
761
762
763

    // force I/B quants as a function of P quants
    const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
    const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
764
765
    if( pict_type == SLICE_TYPE_I )
    {
766
767
768
769
770
771
772
773
774
775
776
777
        double iq = q;
        double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
        double ip_factor = fabs( h->param.rc.f_ip_factor );
        /* don't apply ip_factor if the following frame is also I */
        if( rcc->accum_p_norm <= 0 )
            q = iq;
        else if( h->param.rc.f_ip_factor < 0 )
            q = iq / ip_factor;
        else if( rcc->accum_p_norm >= 1 )
            q = pq / ip_factor;
        else
            q = rcc->accum_p_norm * pq / ip_factor + (1 - rcc->accum_p_norm) * iq;
778
779
780
781
782
    }
    else if( pict_type == SLICE_TYPE_B )
    {
        if( h->param.rc.f_pb_factor > 0 )
            q = last_non_b_q;
783
784
        if( !rce->kept_as_ref )
            q *= fabs( h->param.rc.f_pb_factor );
785
786
787
788
789
    }
    else if( pict_type == SLICE_TYPE_P
             && rcc->last_non_b_pict_type == SLICE_TYPE_P
             && rce->i_tex_bits + rce->p_tex_bits == 0 )
    {
790
        q = last_p_q;
791
    }
792
793

    /* last qscale / qdiff stuff */
794
795
796
797
    /* TODO take intro account whether the I-frame is a scene cut
     * or just a seek point */
    if(rcc->last_non_b_pict_type==pict_type
       && (pict_type!=SLICE_TYPE_I || rcc->last_accum_p_norm < 1))
798
799
    {
        double last_q = rcc->last_qscale_for[pict_type];
Loren Merritt's avatar
Loren Merritt committed
800
801
        double max_qscale = last_q * rcc->lstep;
        double min_qscale = last_q / rcc->lstep;
802
803
804
805
806

        if     (q > max_qscale) q = max_qscale;
        else if(q < min_qscale) q = min_qscale;
    }

Loren Merritt's avatar
Loren Merritt committed
807
    rcc->last_qscale_for[pict_type] = q;
808
809
    if(pict_type!=SLICE_TYPE_B)
        rcc->last_non_b_pict_type = pict_type;
810
811
    if(pict_type==SLICE_TYPE_I)
    {
812
        rcc->last_accum_p_norm = rcc->accum_p_norm;
813
814
815
816
817
818
819
820
821
        rcc->accum_p_norm = 0;
        rcc->accum_p_qp = 0;
    }
    if(pict_type==SLICE_TYPE_P)
    {
        float mask = 1 - pow( (float)rce->i_count / rcc->nmb, 2 );
        rcc->accum_p_qp   = mask * (qscale2qp(q) + rcc->accum_p_qp);
        rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
    }
822
823
824
    return q;
}

Loren Merritt's avatar
Loren Merritt committed
825
826
// clip a qscale to between lmin and lmax
static double clip_qscale( x264_t *h, ratecontrol_entry_t *rce, double q )
827
{
828
829
    double lmin = h->rc->lmin[rce->pict_type];
    double lmax = h->rc->lmax[rce->pict_type];
830

Loren Merritt's avatar
Loren Merritt committed
831
832
    if(lmin==lmax){
        return lmin;
833
834
835
    }else{
        double min2 = log(lmin);
        double max2 = log(lmax);
Loren Merritt's avatar
Loren Merritt committed
836
837
        q = (log(q) - min2)/(max2-min2) - 0.5;
        q = 1.0/(1.0 + exp(-4*q));
838
        q = q*(max2-min2) + min2;
Loren Merritt's avatar
Loren Merritt committed
839
        return exp(q);
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
    }
}

// update qscale for 1 frame based on actual bits used so far
static float rate_estimate_qscale(x264_t *h, int pict_type)
{
    float q;
    float br_compensation;
    double diff;
    int picture_number = h->fenc->i_frame;
    x264_ratecontrol_t *rcc = h->rc;
    ratecontrol_entry_t *rce;
    double lmin = rcc->lmin[pict_type];
    double lmax = rcc->lmax[pict_type];
    int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
                          + h->stat.i_slice_size[SLICE_TYPE_P]
                          + h->stat.i_slice_size[SLICE_TYPE_B]);

//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);

    rce = &rcc->entry[picture_number];

862
    assert(pict_type == rce->pict_type);
863

864
865
    if(rce->pict_type == SLICE_TYPE_B)
    {
Loren Merritt's avatar
Loren Merritt committed
866
        if(h->fenc->b_kept_as_ref)
867
868
869
            return rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor);
        else
            return rcc->last_qscale * h->param.rc.f_pb_factor;
870
871
872
873
874
875
876
877
878
879
880
881
    }
    else
    {
        diff = (int64_t)total_bits - (int64_t)rce->expected_bits;
        br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
        br_compensation = x264_clip3f(br_compensation, .5, 2);

        q = rce->new_qscale / br_compensation;
        q = x264_clip3f(q, lmin, lmax);
        rcc->last_qscale = q;
        return q;
    }
882
883
884
885
886
887
888
889
890
}

static int init_pass2( x264_t *h )
{
    x264_ratecontrol_t *rcc = h->rc;
    uint64_t all_const_bits = 0;
    uint64_t all_available_bits = (uint64_t)(h->param.rc.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
    double rate_factor, step, step_mult;
    double qblur = h->param.rc.f_qblur;
Loren Merritt's avatar
Loren Merritt committed
891
    double cplxblur = h->param.rc.f_complexity_blur;
892
893
894
895
896
897
898
899
    const int filter_size = (int)(qblur*4) | 1;
    double expected_bits;
    double *qscale, *blurred_qscale;
    int i;

    /* find total/average complexity & const_bits */
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
900
        all_const_bits += rce->misc_bits;
901
902
        rcc->i_cplx_sum[rce->pict_type] += rce->i_tex_bits * rce->qscale;
        rcc->p_cplx_sum[rce->pict_type] += rce->p_tex_bits * rce->qscale;
903
        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits * rce->qscale;
904
        rcc->frame_count[rce->pict_type] ++;
905
906
907
908
909
910
911
912
913
    }

    if( all_available_bits < all_const_bits)
    {
        x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
                 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
        return -1;
    }

Loren Merritt's avatar
Loren Merritt committed
914
915
    for(i=0; i<rcc->num_entries; i++){
        ratecontrol_entry_t *rce = &rcc->entry[i];
916
        double weight_sum = 0;
Loren Merritt's avatar
Loren Merritt committed
917
        double cplx_sum = 0;
918
        double weight = 1.0;
Loren Merritt's avatar
Loren Merritt committed
919
920
        int j;
        /* weighted average of cplx of future frames */
921
        for(j=1; j<cplxblur*2 && j<rcc->num_entries-i; j++){
Loren Merritt's avatar
Loren Merritt committed
922
            ratecontrol_entry_t *rcj = &rcc->entry[i+j];
923
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
924
925
926
            if(weight < .0001)
                break;
            weight_sum += weight;
927
            cplx_sum += weight * qscale2bits(rcj, 1);
Loren Merritt's avatar
Loren Merritt committed
928
929
        }
        /* weighted average of cplx of past frames */
930
        weight = 1.0;
931
        for(j=0; j<=cplxblur*2 && j<=i; j++){
Loren Merritt's avatar
Loren Merritt committed
932
933
            ratecontrol_entry_t *rcj = &rcc->entry[i-j];
            weight_sum += weight;
934
            cplx_sum += weight * qscale2bits(rcj, 1);
935
            weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
Loren Merritt's avatar
Loren Merritt committed
936
937
938
939
940
941
            if(weight < .0001)
                break;
        }
        rce->blurred_complexity = cplx_sum / weight_sum;
    }

942
943
944
945
946
947
    qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    if(filter_size > 1)
        blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
    else
        blurred_qscale = qscale;

Loren Merritt's avatar
Loren Merritt committed
948
    expected_bits = 1;
949
950
951
952
953
    for(i=0; i<rcc->num_entries; i++)
        expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
    step_mult = all_available_bits / expected_bits;

    rate_factor = 0;
Loren Merritt's avatar
Loren Merritt committed
954
    for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step *= 0.5){
955
956
957
        expected_bits = 0;
        rate_factor += step;

958
959
960
        rcc->last_non_b_pict_type = -1;
        rcc->last_accum_p_norm = 1;

961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
        /* find qscale */
        for(i=0; i<rcc->num_entries; i++){
            qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
        }

        /* fixed I/B QP relative to P mode */
        for(i=rcc->num_entries-1; i>=0; i--){
            qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
            assert(qscale[i] >= 0);
        }

        /* smooth curve */
        if(filter_size > 1){
            assert(filter_size%2==1);
            for(i=0; i<rcc->num_entries; i++){
                ratecontrol_entry_t *rce = &rcc->entry[i];
                int j;
                double q=0.0, sum=0.0;

                for(j=0; j<filter_size; j++){
                    int index = i+j-filter_size/2;
                    double d = index-i;
                    double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
                    if(index < 0 || index >= rcc->num_entries) continue;
985
                    if(rce->pict_type != rcc->entry[index].pict_type) continue;
986
987
988
989
990
991
992
993
994
995
996
                    q += qscale[index] * coeff;
                    sum += coeff;
                }
                blurred_qscale[i] = q/sum;
            }
        }

        /* find expected bits */
        for(i=0; i<rcc->num_entries; i++){
            ratecontrol_entry_t *rce = &rcc->entry[i];
            double bits;
Loren Merritt's avatar
Loren Merritt committed
997
            rce->new_qscale = clip_qscale(h, rce, blurred_qscale[i]);
998
            assert(rce->new_qscale >= 0);
999
            bits = qscale2bits(rce, rce->new_qscale) + rce->misc_bits;
1000