slicetype.c 72.7 KB
Newer Older
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * slicetype.c: lookahead analysis
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2005-2011 x264 project
5
 *
Dylan Yudaken's avatar
Dylan Yudaken committed
6
7
8
 * Authors: Fiona Glaser <fiona@x264.com>
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Dylan Yudaken <dyudaken@gmail.com>
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
23
24
25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
26
27
28
29
30
31
 *****************************************************************************/

#include "common/common.h"
#include "macroblock.h"
#include "me.h"

32
33
34
// Indexed by pic_struct values
static const uint8_t delta_tfi_divisor[10] = { 0, 2, 1, 1, 2, 2, 3, 3, 4, 6 };

Dylan Yudaken's avatar
Dylan Yudaken committed
35
36
37
static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
                                      x264_frame_t **frames, int p0, int p1, int b,
                                      int b_intra_penalty );
38

39
static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
Loren Merritt's avatar
Loren Merritt committed
40
{
41
    a->i_qp = X264_LOOKAHEAD_QP;
42
    a->i_lambda = x264_lambda_tab[ a->i_qp ];
43
    x264_mb_analyse_load_costs( h, a );
Fiona Glaser's avatar
Fiona Glaser committed
44
45
46
47
48
49
50
51
    if( h->param.analyse.i_subpel_refine > 1 )
    {
        h->mb.i_me_method = X264_MIN( X264_ME_HEX, h->param.analyse.i_me_method );
        h->mb.i_subpel_refine = 4;
    }
    else
    {
        h->mb.i_me_method = X264_ME_DIA;
Fiona Glaser's avatar
Fiona Glaser committed
52
        h->mb.i_subpel_refine = 2;
Fiona Glaser's avatar
Fiona Glaser committed
53
    }
Loren Merritt's avatar
Loren Merritt committed
54
55
    h->mb.b_chroma_me = 0;
}
56

Loren Merritt's avatar
Loren Merritt committed
57
/* makes a non-h264 weight (i.e. fix7), into an h264 weight */
58
static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w )
Dylan Yudaken's avatar
Dylan Yudaken committed
59
60
61
62
63
64
65
66
67
68
69
70
{
    w->i_offset = offset;
    w->i_denom = 7;
    w->i_scale = weight_nonh264;
    while( w->i_denom > 0 && (w->i_scale > 127 || !(w->i_scale & 1)) )
    {
        w->i_denom--;
        w->i_scale >>= 1;
    }
    w->i_scale = X264_MIN( w->i_scale, 127 );
}

71
static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest )
Dylan Yudaken's avatar
Dylan Yudaken committed
72
73
74
75
{
    int ref0_distance = fenc->i_frame - ref->i_frame - 1;
    /* Note: this will never run during lookahead as weights_analyse is only called if no
     * motion search has been done. */
Dylan Yudaken's avatar
Dylan Yudaken committed
76
    if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
Dylan Yudaken's avatar
Dylan Yudaken committed
77
    {
Dylan Yudaken's avatar
Dylan Yudaken committed
78
79
80
        int i_stride = fenc->i_stride_lowres;
        int i_lines = fenc->i_lines_lowres;
        int i_width = fenc->i_width_lowres;
Dylan Yudaken's avatar
Dylan Yudaken committed
81
        int i_mb_xy = 0;
82
        pixel *p = dest;
Dylan Yudaken's avatar
Dylan Yudaken committed
83

84
85
        for( int y = 0; y < i_lines; y += 8, p += i_stride*8 )
            for( int x = 0; x < i_width; x += 8, i_mb_xy++ )
Dylan Yudaken's avatar
Dylan Yudaken committed
86
            {
Dylan Yudaken's avatar
Dylan Yudaken committed
87
88
                int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0];
                int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1];
Fiona Glaser's avatar
Fiona Glaser committed
89
                h->mc.mc_luma( p+x, i_stride, ref->lowres, i_stride,
Anton Mitrofanov's avatar
Anton Mitrofanov committed
90
                               mvx+(x<<2), mvy+(y<<2), 8, 8, x264_weight_none );
Dylan Yudaken's avatar
Dylan Yudaken committed
91
            }
92
        x264_emms();
Dylan Yudaken's avatar
Dylan Yudaken committed
93
94
        return dest;
    }
95
    x264_emms();
Dylan Yudaken's avatar
Dylan Yudaken committed
96
    return ref->lowres[0];
Dylan Yudaken's avatar
Dylan Yudaken committed
97
98
}

Fiona Glaser's avatar
Fiona Glaser committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/* How data is organized for chroma weightp:
 * [U: ref] [U: fenc]
 * [V: ref] [V: fenc]
 * fenc = ref + offset
 * v = u + stride * chroma height
 * We'll need more room if we do 4:2:2 or 4:4:4. */

static NOINLINE void x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv )
{
    int ref0_distance = fenc->i_frame - ref->i_frame - 1;
    int i_stride = fenc->i_stride[1];
    int i_offset = i_stride / 2;
    int i_lines = fenc->i_lines[1];
    int i_width = fenc->i_width[1];
    int cw = h->mb.i_mb_width  << 3;
    int ch = h->mb.i_mb_height << 3;

    if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
    {
118
        x264_frame_expand_border_chroma( h, ref, 1 );
Fiona Glaser's avatar
Fiona Glaser committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        for( int y = 0, mb_xy = 0, pel_offset_y = 0; y < i_lines; y += 8, pel_offset_y = y*i_stride )
            for( int x = 0, pel_offset_x = 0; x < i_width; x += 8, mb_xy++, pel_offset_x += 8 )
            {
                pixel *pixu = dstu + pel_offset_y + pel_offset_x;
                pixel *pixv = dstv + pel_offset_y + pel_offset_x;
                pixel *src1 =  ref->plane[1] + pel_offset_y + pel_offset_x*2; /* NV12 */
                int mvx = fenc->lowres_mvs[0][ref0_distance][mb_xy][0];
                int mvy = fenc->lowres_mvs[0][ref0_distance][mb_xy][1];
                h->mc.mc_chroma( pixu, pixv, i_stride, src1, i_stride, mvx, mvy, 8, 8 );
            }
    }
    else
        h->mc.plane_copy_deinterleave( dstu, i_stride, dstv, i_stride, ref->plane[1], i_stride, cw, ch );
    h->mc.plane_copy_deinterleave( dstu+i_offset, i_stride, dstv+i_offset, i_stride, fenc->plane[1], i_stride, cw, ch );
    x264_emms();
}

Fiona Glaser's avatar
Fiona Glaser committed
136
137
138
139
140
141
142
143
144
static NOINLINE pixel *x264_weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p )
{
    int ref0_distance = fenc->i_frame - ref->i_frame - 1;
    int i_stride = fenc->i_stride[p];
    int i_lines = fenc->i_lines[p];
    int i_width = fenc->i_width[p];

    if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
    {
145
        x264_frame_expand_border_chroma( h, ref, p );
Fiona Glaser's avatar
Fiona Glaser committed
146
147
148
149
        for( int y = 0, mb_xy = 0, pel_offset_y = 0; y < i_lines; y += 16, pel_offset_y = y*i_stride )
            for( int x = 0, pel_offset_x = 0; x < i_width; x += 16, mb_xy++, pel_offset_x += 16 )
            {
                pixel *pix = dst + pel_offset_y + pel_offset_x;
150
                pixel *src = ref->plane[p] + pel_offset_y + pel_offset_x;
Fiona Glaser's avatar
Fiona Glaser committed
151
152
153
154
155
156
157
158
159
160
                int mvx = fenc->lowres_mvs[0][ref0_distance][mb_xy][0] / 2;
                int mvy = fenc->lowres_mvs[0][ref0_distance][mb_xy][1] / 2;
                /* We don't want to calculate hpels for fenc frames, so we round the motion
                 * vectors to fullpel here.  It's not too bad, I guess? */
                h->mc.copy_16x16_unaligned( pix, i_stride, src+mvx+mvy*i_stride, i_stride, 16 );
            }
        x264_emms();
        return dst;
    }
    x264_emms();
161
    return ref->plane[p];
Fiona Glaser's avatar
Fiona Glaser committed
162
163
}

Fiona Glaser's avatar
Fiona Glaser committed
164
165
166
167
static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma )
{
    /* Add cost of weights in the slice header. */
    int lambda = x264_lambda_tab[X264_LOOKAHEAD_QP];
168
169
170
    /* 4 times higher, because chroma is analyzed at full resolution. */
    if( b_chroma )
        lambda *= 4;
Fiona Glaser's avatar
Fiona Glaser committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
    int numslices;
    if( h->param.i_slice_count )
        numslices = h->param.i_slice_count;
    else if( h->param.i_slice_max_mbs )
        numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
    else
        numslices = 1;
    /* FIXME: find a way to account for --slice-max-size?
     * Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
     * Cut denom cost in half if chroma, since it's shared between the two chroma planes. */
    int denom_cost = bs_size_ue( w[0].i_denom ) * (2 - b_chroma);
    return lambda * numslices * ( 10 + denom_cost + 2 * (bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset )) );
}

static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
Dylan Yudaken's avatar
Dylan Yudaken committed
186
187
{
    unsigned int cost = 0;
Dylan Yudaken's avatar
Dylan Yudaken committed
188
189
190
    int i_stride = fenc->i_stride_lowres;
    int i_lines = fenc->i_lines_lowres;
    int i_width = fenc->i_width_lowres;
191
    pixel *fenc_plane = fenc->lowres[0];
192
    ALIGNED_ARRAY_16( pixel, buf,[8*8] );
Dylan Yudaken's avatar
Dylan Yudaken committed
193
194
195
196
    int pixoff = 0;
    int i_mb = 0;

    if( w )
Fiona Glaser's avatar
Fiona Glaser committed
197
    {
198
199
        for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8)
Dylan Yudaken's avatar
Dylan Yudaken committed
200
            {
Dylan Yudaken's avatar
Dylan Yudaken committed
201
                w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
202
203
                int cmp = h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride );
                cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] );
Dylan Yudaken's avatar
Dylan Yudaken committed
204
            }
Fiona Glaser's avatar
Fiona Glaser committed
205
        cost += x264_weight_slice_header_cost( h, w, 0 );
Dylan Yudaken's avatar
Dylan Yudaken committed
206
    }
Fiona Glaser's avatar
Fiona Glaser committed
207
    else
208
209
        for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
210
211
212
213
            {
                int cmp = h->pixf.mbcmp[PIXEL_8x8]( &src[pixoff], i_stride, &fenc_plane[pixoff], i_stride );
                cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] );
            }
214
    x264_emms();
Dylan Yudaken's avatar
Dylan Yudaken committed
215
216
217
    return cost;
}

Fiona Glaser's avatar
Fiona Glaser committed
218
219
220
221
222
223
224
225
static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w )
{
    unsigned int cost = 0;
    int i_stride = fenc->i_stride[1];
    int i_offset = i_stride / 2;
    int i_lines = fenc->i_lines[1];
    int i_width = fenc->i_width[1];
    pixel *src = ref + i_offset;
226
    ALIGNED_ARRAY_16( pixel, buf, [8*8] );
Fiona Glaser's avatar
Fiona Glaser committed
227
    int pixoff = 0;
228
    ALIGNED_16( static pixel flat[8] ) = {0};
Fiona Glaser's avatar
Fiona Glaser committed
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
    if( w )
    {
        for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 8, pixoff += 8 )
            {
                w->weightfn[8>>2]( buf, 8, &ref[pixoff], i_stride, w, 8 );
                /* The naive and seemingly sensible algorithm is to use mbcmp as in luma.
                 * But testing shows that for chroma the DC coefficient is by far the most
                 * important part of the coding cost.  Thus a more useful chroma weight is
                 * obtained by comparing each block's DC coefficient instead of the actual
                 * pixels.
                 *
                 * FIXME: add a (faster) asm sum function to replace sad. */
                cost += abs( h->pixf.sad_aligned[PIXEL_8x8](          buf,        8, flat, 0 ) -
                             h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
            }
        cost += x264_weight_slice_header_cost( h, w, 1 );
    }
    else
        for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 8, pixoff += 8 )
                cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &ref[pixoff], i_stride, flat, 0 ) -
                             h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
    x264_emms();
    return cost;
}

Fiona Glaser's avatar
Fiona Glaser committed
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
static NOINLINE unsigned int x264_weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p )
{
    unsigned int cost = 0;
    int i_stride = fenc->i_stride[p];
    int i_lines = fenc->i_lines[p];
    int i_width = fenc->i_width[p];
    pixel *src = fenc->plane[p];
    ALIGNED_ARRAY_16( pixel, buf, [16*16] );
    int pixoff = 0;
    if( w )
    {
        for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 16, pixoff += 16 )
            {
                w->weightfn[16>>2]( buf, 16, &ref[pixoff], i_stride, w, 16 );
                cost += h->pixf.mbcmp[PIXEL_16x16]( buf, 16, &src[pixoff], i_stride );
            }
        cost += x264_weight_slice_header_cost( h, w, 1 );
    }
    else
        for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride )
            for( int x = 0; x < i_width; x += 16, pixoff += 16 )
278
                cost += h->pixf.mbcmp[PIXEL_16x16]( &ref[pixoff], i_stride, &src[pixoff], i_stride );
Fiona Glaser's avatar
Fiona Glaser committed
279
280
281
282
    x264_emms();
    return cost;
}

Dylan Yudaken's avatar
Dylan Yudaken committed
283
void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
Dylan Yudaken's avatar
Dylan Yudaken committed
284
285
286
{
    int i_delta_index = fenc->i_frame - ref->i_frame - 1;
    /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
287
    const float epsilon = 1.f/128.f;
Dylan Yudaken's avatar
Dylan Yudaken committed
288
    x264_weight_t *weights = fenc->weight[0];
289
    SET_WEIGHT( weights[0], 0, 1, 0, 0 );
Fiona Glaser's avatar
Fiona Glaser committed
290
291
292
    SET_WEIGHT( weights[1], 0, 1, 0, 0 );
    SET_WEIGHT( weights[2], 0, 1, 0, 0 );
    /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
293
    for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
Fiona Glaser's avatar
Fiona Glaser committed
294
    {
295
        int cur_offset, start_offset, end_offset;
296
297
298
299
300
301
        int minoff, minscale, mindenom;
        unsigned int minscore, origscore;
        int found;
        float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
        float ref_var  =  ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
        float guess_scale = sqrtf( fenc_var / ref_var );
302
303
        float fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
        float ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
Dylan Yudaken's avatar
Dylan Yudaken committed
304

Fiona Glaser's avatar
Fiona Glaser committed
305
        //early termination
306
        if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale ) < epsilon )
Fiona Glaser's avatar
Fiona Glaser committed
307
308
309
310
        {
            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
            continue;
        }
Dylan Yudaken's avatar
Dylan Yudaken committed
311

Fiona Glaser's avatar
Fiona Glaser committed
312
313
314
        if( plane )
        {
            weights[plane].i_denom = 6;
315
            weights[plane].i_scale = x264_clip3( round( guess_scale * 64 ), 0, 255 );
Fiona Glaser's avatar
Fiona Glaser committed
316
317
318
319
320
321
322
323
            if( weights[plane].i_scale > 127 )
            {
                weights[1].weightfn = weights[2].weightfn = NULL;
                break;
            }
        }
        else
            x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
Dylan Yudaken's avatar
Dylan Yudaken committed
324

Fiona Glaser's avatar
Fiona Glaser committed
325
326
327
328
        found = 0;
        mindenom = weights[plane].i_denom;
        minscale = weights[plane].i_scale;
        minoff = 0;
Dylan Yudaken's avatar
Dylan Yudaken committed
329

Fiona Glaser's avatar
Fiona Glaser committed
330
331
332
333
334
335
336
337
338
339
        pixel *mcbuf;
        if( !plane )
        {
            if( !fenc->b_intra_calculated )
            {
                x264_mb_analysis_t a;
                x264_lowres_context_init( h, &a );
                x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
            }
            mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
340
            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, NULL );
Fiona Glaser's avatar
Fiona Glaser committed
341
342
343
        }
        else
        {
Fiona Glaser's avatar
Fiona Glaser committed
344
            if( CHROMA444 )
345
            {
Fiona Glaser's avatar
Fiona Glaser committed
346
                mcbuf = x264_weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane );
347
348
                origscore = minscore = x264_weight_cost_chroma444( h, fenc, mcbuf, NULL, plane );
            }
Fiona Glaser's avatar
Fiona Glaser committed
349
350
351
352
353
354
355
356
            else
            {
                pixel *dstu = h->mb.p_weight_buf[0];
                pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
                /* Only initialize chroma data once. */
                if( plane == 1 )
                    x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
                mcbuf = plane == 1 ? dstu : dstv;
357
                origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL );
Fiona Glaser's avatar
Fiona Glaser committed
358
            }
Fiona Glaser's avatar
Fiona Glaser committed
359
        }
Dylan Yudaken's avatar
Dylan Yudaken committed
360

Fiona Glaser's avatar
Fiona Glaser committed
361
362
        if( !minscore )
            continue;
Dylan Yudaken's avatar
Dylan Yudaken committed
363

364
365
366
367
368
369
370
        // This gives a slight improvement due to rounding errors but only tests one offset in lookahead.
        // Currently only searches within +/- 1 of the best offset found so far.
        // TODO: Try other offsets/multipliers/combinations thereof?
        cur_offset = fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f * b_lookahead;
        start_offset = x264_clip3( cur_offset - !b_lookahead, -128, 127 );
        end_offset   = x264_clip3( cur_offset + !b_lookahead, -128, 127 );
        for( int i_off = start_offset; i_off <= end_offset; i_off++ )
Fiona Glaser's avatar
Fiona Glaser committed
371
372
373
374
        {
            SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
            unsigned int s;
            if( plane )
Fiona Glaser's avatar
Fiona Glaser committed
375
376
377
378
379
380
            {
                if( CHROMA444 )
                    s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
                else
                    s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
            }
Fiona Glaser's avatar
Fiona Glaser committed
381
382
383
            else
                s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
            COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
384
385
386
387

            // Don't check any more offsets if the previous one had a lower cost than the current one
            if( minoff == start_offset && i_off != start_offset )
                break;
Fiona Glaser's avatar
Fiona Glaser committed
388
389
390
391
392
        }
        x264_emms();

        /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
        /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
393
        if( !found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f )
Fiona Glaser's avatar
Fiona Glaser committed
394
395
396
397
398
399
400
401
402
        {
            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
            continue;
        }
        else
            SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );

        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
            fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
Dylan Yudaken's avatar
Dylan Yudaken committed
403
404
    }

Fiona Glaser's avatar
Fiona Glaser committed
405
406
    //FIXME, what is the correct way to deal with this?
    if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
Dylan Yudaken's avatar
Dylan Yudaken committed
407
    {
Fiona Glaser's avatar
Fiona Glaser committed
408
409
410
411
412
413
        int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
        int i;
        for( i = 1; i <= 2; i++ )
        {
            weights[i].i_scale = x264_clip3( weights[i].i_scale >> ( weights[i].i_denom - denom ), 0, 255 );
            weights[i].i_denom = denom;
414
            h->mc.weight_cache( h, &weights[i] );
Fiona Glaser's avatar
Fiona Glaser committed
415
        }
Dylan Yudaken's avatar
Dylan Yudaken committed
416
417
418
419
420
    }

    if( weights[0].weightfn && b_lookahead )
    {
        //scale lowres in lookahead for slicetype_frame_cost
421
422
        pixel *src = ref->buffer_lowres[0];
        pixel *dst = h->mb.p_weight_buf[0];
Dylan Yudaken's avatar
Dylan Yudaken committed
423
        int width = ref->i_width_lowres + PADH*2;
424
        int height = ref->i_lines_lowres + PADV*2;
Dylan Yudaken's avatar
Dylan Yudaken committed
425
426
        x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, ref->i_stride_lowres,
                                 width, height, &weights[0] );
427
        fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH + ref->i_stride_lowres * PADV;
Dylan Yudaken's avatar
Dylan Yudaken committed
428
429
430
    }
}

Fiona Glaser's avatar
Fiona Glaser committed
431
432
433
static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
                                    x264_frame_t **frames, int p0, int p1, int b,
                                    int dist_scale_factor, int do_search[2], const x264_weight_t *w )
434
435
436
437
438
439
440
{
    x264_frame_t *fref0 = frames[p0];
    x264_frame_t *fref1 = frames[p1];
    x264_frame_t *fenc  = frames[b];
    const int b_bidir = (b < p1);
    const int i_mb_x = h->mb.i_mb_x;
    const int i_mb_y = h->mb.i_mb_y;
441
    const int i_mb_stride = h->mb.i_mb_width;
442
443
    const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;
    const int i_stride = fenc->i_stride_lowres;
Loren Merritt's avatar
Loren Merritt committed
444
    const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride);
445
    const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
446
447
    int16_t (*fenc_mvs[2])[2] = { &fenc->lowres_mvs[0][b-p0-1][i_mb_xy], &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] };
    int (*fenc_costs[2]) = { &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy], &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
448
449
450
    int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 &&
                            i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) ||
                            h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
451

452
    ALIGNED_ARRAY_16( pixel, pix1,[9*FDEC_STRIDE] );
453
    pixel *pix2 = pix1+8;
454
    x264_me_t m[2];
Loren Merritt's avatar
Loren Merritt committed
455
    int i_bcost = COST_MAX;
Fiona Glaser's avatar
Fiona Glaser committed
456
    int list_used = 0;
457

458
459
460
    h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
    h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );

Fiona Glaser's avatar
Fiona Glaser committed
461
    if( p0 == p1 )
Loren Merritt's avatar
Loren Merritt committed
462
463
        goto lowres_intra_mb;

464
    // no need for h->mb.mv_min[]
Loren Merritt's avatar
Loren Merritt committed
465
    h->mb.mv_min_fpel[0] = -8*h->mb.i_mb_x - 4;
466
    h->mb.mv_max_fpel[0] = 8*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 4;
Loren Merritt's avatar
Loren Merritt committed
467
468
    h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 8 );
    h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 8 );
469
    if( h->mb.i_mb_x >= h->mb.i_mb_width - 2 )
470
471
    {
        h->mb.mv_min_fpel[1] = -8*h->mb.i_mb_y - 4;
472
        h->mb.mv_max_fpel[1] = 8*( h->mb.i_mb_height - h->mb.i_mb_y - 1 ) + 4;
473
474
475
        h->mb.mv_min_spel[1] = 4*( h->mb.mv_min_fpel[1] - 8 );
        h->mb.mv_max_spel[1] = 4*( h->mb.mv_max_fpel[1] + 8 );
    }
476

Loren Merritt's avatar
Loren Merritt committed
477
478
479
480
481
482
483
#define LOAD_HPELS_LUMA(dst, src) \
    { \
        (dst)[0] = &(src)[0][i_pel_offset]; \
        (dst)[1] = &(src)[1][i_pel_offset]; \
        (dst)[2] = &(src)[2][i_pel_offset]; \
        (dst)[3] = &(src)[3][i_pel_offset]; \
    }
Dylan Yudaken's avatar
Dylan Yudaken committed
484
485
486
#define LOAD_WPELS_LUMA(dst,src) \
    (dst) = &(src)[i_pel_offset];

Loren Merritt's avatar
Loren Merritt committed
487
488
489
490
491
#define CLIP_MV( mv ) \
    { \
        mv[0] = x264_clip3( mv[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); \
        mv[1] = x264_clip3( mv[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); \
    }
492
493
494
#define TRY_BIDIR( mv0, mv1, penalty ) \
    { \
        int i_cost; \
Fiona Glaser's avatar
Fiona Glaser committed
495
496
497
498
        if( h->param.analyse.i_subpel_refine <= 1 ) \
        { \
            int hpel_idx1 = (((mv0)[0]&2)>>1) + ((mv0)[1]&2); \
            int hpel_idx2 = (((mv1)[0]&2)>>1) + ((mv1)[1]&2); \
499
500
            pixel *src1 = m[0].p_fref[hpel_idx1] + ((mv0)[0]>>2) + ((mv0)[1]>>2) * m[0].i_stride[0]; \
            pixel *src2 = m[1].p_fref[hpel_idx2] + ((mv1)[0]>>2) + ((mv1)[1]>>2) * m[1].i_stride[0]; \
Fiona Glaser's avatar
Fiona Glaser committed
501
502
503
504
505
            h->mc.avg[PIXEL_8x8]( pix1, 16, src1, m[0].i_stride[0], src2, m[1].i_stride[0], i_bipred_weight ); \
        } \
        else \
        { \
            int stride1 = 16, stride2 = 16; \
506
            pixel *src1, *src2; \
Fiona Glaser's avatar
Fiona Glaser committed
507
508
509
510
511
512
            src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
                                  (mv0)[0], (mv0)[1], 8, 8, w ); \
            src2 = h->mc.get_ref( pix2, &stride2, m[1].p_fref, m[1].i_stride[0], \
                                  (mv1)[0], (mv1)[1], 8, 8, w ); \
            h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
        } \
513
        i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
514
                           m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
Fiona Glaser's avatar
Fiona Glaser committed
515
        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
516
517
518
519
    }

    m[0].i_pixel = PIXEL_8x8;
    m[0].p_cost_mv = a->p_cost_mv;
Loren Merritt's avatar
Loren Merritt committed
520
    m[0].i_stride[0] = i_stride;
521
    m[0].p_fenc[0] = h->mb.pic.p_fenc[0];
Dylan Yudaken's avatar
Dylan Yudaken committed
522
    m[0].weight = w;
Fiona Glaser's avatar
Fiona Glaser committed
523
    m[0].i_ref = 0;
Loren Merritt's avatar
Loren Merritt committed
524
    LOAD_HPELS_LUMA( m[0].p_fref, fref0->lowres );
Dylan Yudaken's avatar
Dylan Yudaken committed
525
526
527
    m[0].p_fref_w = m[0].p_fref[0];
    if( w[0].weightfn )
        LOAD_WPELS_LUMA( m[0].p_fref_w, fenc->weighted[0] );
528
529
530

    if( b_bidir )
    {
531
        int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy];
532
        ALIGNED_ARRAY_8( int16_t, dmv,[2],[2] );
533

Fiona Glaser's avatar
Fiona Glaser committed
534
535
536
537
538
        m[1].i_pixel = PIXEL_8x8;
        m[1].p_cost_mv = a->p_cost_mv;
        m[1].i_stride[0] = i_stride;
        m[1].p_fenc[0] = h->mb.pic.p_fenc[0];
        m[1].i_ref = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
539
        m[1].weight = x264_weight_none;
Loren Merritt's avatar
Loren Merritt committed
540
        LOAD_HPELS_LUMA( m[1].p_fref, fref1->lowres );
Dylan Yudaken's avatar
Dylan Yudaken committed
541
        m[1].p_fref_w = m[1].p_fref[0];
542
543
544
545
546

        dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8;
        dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8;
        dmv[1][0] = dmv[0][0] - mvr[0];
        dmv[1][1] = dmv[0][1] - mvr[1];
Loren Merritt's avatar
Loren Merritt committed
547
548
        CLIP_MV( dmv[0] );
        CLIP_MV( dmv[1] );
Fiona Glaser's avatar
Fiona Glaser committed
549
550
        if( h->param.analyse.i_subpel_refine <= 1 )
            M64( dmv ) &= ~0x0001000100010001ULL; /* mv & ~1 */
551
552

        TRY_BIDIR( dmv[0], dmv[1], 0 );
Fiona Glaser's avatar
Fiona Glaser committed
553
        if( M64( dmv ) )
554
555
556
557
        {
            int i_cost;
            h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
            i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
Fiona Glaser's avatar
Fiona Glaser committed
558
            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
559
        }
560
561
    }

562
    for( int l = 0; l < 1 + b_bidir; l++ )
563
    {
564
        if( do_search[l] )
565
        {
566
567
            int i_mvc = 0;
            int16_t (*fenc_mv)[2] = fenc_mvs[l];
568
            ALIGNED_4( int16_t mvc[4][2] );
569

570
            /* Reverse-order MV prediction. */
Fiona Glaser's avatar
Fiona Glaser committed
571
572
573
            M32( mvc[0] ) = 0;
            M32( mvc[2] ) = 0;
#define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
574
            if( i_mb_x < h->mb.i_mb_width - 1 )
575
                MVC( fenc_mv[1] );
576
            if( i_mb_y < h->mb.i_mb_height - 1 )
577
            {
578
                MVC( fenc_mv[i_mb_stride] );
579
                if( i_mb_x > 0 )
580
                    MVC( fenc_mv[i_mb_stride-1] );
581
                if( i_mb_x < h->mb.i_mb_width - 1 )
582
                    MVC( fenc_mv[i_mb_stride+1] );
583
            }
Loren Merritt's avatar
Loren Merritt committed
584
#undef MVC
585
586
587
588
            if( i_mvc <= 1 )
                CP32( m[l].mvp, mvc[0] );
            else
                x264_median_mv( m[l].mvp, mvc[0], mvc[1], mvc[2] );
589

590
591
592
593
594
595
596
597
598
599
600
601
602
            /* Fast skip for cases of near-zero residual.  Shortcut: don't bother except in the mv0 case,
             * since anything else is likely to have enough residual to not trigger the skip. */
            if( !M32( m[l].mvp ) )
            {
                m[l].cost = h->pixf.mbcmp[PIXEL_8x8]( m[l].p_fenc[0], FENC_STRIDE, m[l].p_fref[0], m[l].i_stride[0] );
                if( m[l].cost < 64 )
                {
                    M32( m[l].mv ) = 0;
                    goto skip_motionest;
                }
            }

            x264_me_search( h, &m[l], mvc, i_mvc );
603
            m[l].cost -= a->p_cost_mv[0]; // remove mvcost from skip mbs
Fiona Glaser's avatar
Fiona Glaser committed
604
            if( M32( m[l].mv ) )
605
                m[l].cost += 5 * a->i_lambda;
606
607

skip_motionest:
Fiona Glaser's avatar
Fiona Glaser committed
608
            CP32( fenc_mvs[l], m[l].mv );
609
610
611
612
            *fenc_costs[l] = m[l].cost;
        }
        else
        {
Fiona Glaser's avatar
Fiona Glaser committed
613
            CP32( m[l].mv, fenc_mvs[l] );
614
615
            m[l].cost = *fenc_costs[l];
        }
Fiona Glaser's avatar
Fiona Glaser committed
616
        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
617
618
    }

Fiona Glaser's avatar
Fiona Glaser committed
619
    if( b_bidir && ( M32( m[0].mv ) || M32( m[1].mv ) ) )
620
621
        TRY_BIDIR( m[0].mv, m[1].mv, 5 );

Loren Merritt's avatar
Loren Merritt committed
622
lowres_intra_mb:
Fiona Glaser's avatar
Fiona Glaser committed
623
    if( !fenc->b_intra_calculated )
624
    {
625
626
627
        ALIGNED_ARRAY_16( pixel, edge,[33] );
        pixel *pix = &pix1[8+FDEC_STRIDE - 1];
        pixel *src = &fenc->lowres[0][i_pel_offset - 1];
628
        const int intra_penalty = 5 * a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
629
630
        int satds[3];

631
        memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
Fiona Glaser's avatar
Fiona Glaser committed
632
633
634
635
        for( int i = 0; i < 8; i++ )
            pix[i*FDEC_STRIDE] = src[i*i_stride];
        pix++;

Fiona Glaser's avatar
Fiona Glaser committed
636
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[0], pix, satds );
Fiona Glaser's avatar
Fiona Glaser committed
637
        int i_icost = X264_MIN3( satds[0], satds[1], satds[2] );
Loren Merritt's avatar
Loren Merritt committed
638

Fiona Glaser's avatar
Fiona Glaser committed
639
640
641
642
643
644
645
        if( h->param.analyse.i_subpel_refine > 1 )
        {
            h->predict_8x8c[I_PRED_CHROMA_P]( pix );
            int satd = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE );
            i_icost = X264_MIN( i_icost, satd );
            h->predict_8x8_filter( pix, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
            for( int i = 3; i < 9; i++ )
646
            {
Fiona Glaser's avatar
Fiona Glaser committed
647
648
                h->predict_8x8[i]( pix, edge );
                satd = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE );
Loren Merritt's avatar
Loren Merritt committed
649
                i_icost = X264_MIN( i_icost, satd );
650
            }
Fiona Glaser's avatar
Fiona Glaser committed
651
        }
Loren Merritt's avatar
Loren Merritt committed
652

Fiona Glaser's avatar
Fiona Glaser committed
653
654
        i_icost += intra_penalty;
        fenc->i_intra_cost[i_mb_xy] = i_icost;
655
656
657
658
        int i_icost_aq = i_icost;
        if( h->param.rc.i_aq_mode )
            i_icost_aq = (i_icost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
        fenc->i_row_satds[0][0][h->mb.i_mb_y] += i_icost_aq;
Fiona Glaser's avatar
Fiona Glaser committed
659
660
661
662
        if( b_frame_score_mb )
        {
            fenc->i_cost_est[0][0] += i_icost;
            fenc->i_cost_est_aq[0][0] += i_icost_aq;
663
        }
Fiona Glaser's avatar
Fiona Glaser committed
664
665
666
667
668
669
670
671
672
    }

    /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
    /* FIXME: Should we still forbid them now that we cache intra scores? */
    if( !b_bidir )
    {
        int i_icost = fenc->i_intra_cost[i_mb_xy];
        int b_intra = i_icost < i_bcost;
        if( b_intra )
673
        {
Fiona Glaser's avatar
Fiona Glaser committed
674
            i_bcost = i_icost;
675
676
            list_used = 0;
        }
Fiona Glaser's avatar
Fiona Glaser committed
677
678
679
680
681
682
683
684
685
        if( b_frame_score_mb )
            fenc->i_intra_mbs[b-p0] += b_intra;
    }

    /* In an I-frame, we've already added the results above in the intra section. */
    if( p0 != p1 )
    {
        int i_bcost_aq = i_bcost;
        if( h->param.rc.i_aq_mode )
686
            i_bcost_aq = (i_bcost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
Fiona Glaser's avatar
Fiona Glaser committed
687
688
        fenc->i_row_satds[b-p0][p1-b][h->mb.i_mb_y] += i_bcost_aq;
        if( b_frame_score_mb )
Loren Merritt's avatar
Loren Merritt committed
689
        {
Fiona Glaser's avatar
Fiona Glaser committed
690
            /* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
691
692
            fenc->i_cost_est[b-p0][p1-b] += i_bcost;
            fenc->i_cost_est_aq[b-p0][p1-b] += i_bcost_aq;
693
694
695
        }
    }

696
    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
697
698
699
}
#undef TRY_BIDIR

700
#define NUM_MBS\
701
702
703
   (h->mb.i_mb_width > 2 && h->mb.i_mb_height > 2 ?\
   (h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) :\
    h->mb.i_mb_width * h->mb.i_mb_height)
704

Loic Le Loarer's avatar
Loic Le Loarer committed
705
static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
Dylan Yudaken's avatar
Dylan Yudaken committed
706
707
                                      x264_frame_t **frames, int p0, int p1, int b,
                                      int b_intra_penalty )
708
709
{
    int i_score = 0;
710
    int do_search[2];
Anton Mitrofanov's avatar
Anton Mitrofanov committed
711
    const x264_weight_t *w = x264_weight_none;
712
713
714
    /* Check whether we already evaluated this frame
     * If we have tried this frame as P, then we have also tried
     * the preceding frames as B. (is this still true?) */
715
    /* Also check that we already calculated the row SATDs for the current frame. */
Fiona Glaser's avatar
Fiona Glaser committed
716
    if( frames[b]->i_cost_est[b-p0][p1-b] >= 0 && (!h->param.rc.i_vbv_buffer_size || frames[b]->i_row_satds[b-p0][p1-b][0] != -1) )
Loren Merritt's avatar
Loren Merritt committed
717
718
719
720
721
        i_score = frames[b]->i_cost_est[b-p0][p1-b];
    else
    {
        int dist_scale_factor = 128;
        int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
Fiona Glaser's avatar
Fiona Glaser committed
722
        int *row_satd_intra = frames[b]->i_row_satds[0][0];
Loren Merritt's avatar
Loren Merritt committed
723

724
725
726
        /* For each list, check to see whether we have lowres motion-searched this reference frame before. */
        do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
        do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
Dylan Yudaken's avatar
Dylan Yudaken committed
727
728
        if( do_search[0] )
        {
729
            if( h->param.analyse.i_weighted_pred && b == p1 )
Dylan Yudaken's avatar
Dylan Yudaken committed
730
            {
Fiona Glaser's avatar
Fiona Glaser committed
731
                x264_emms();
Dylan Yudaken's avatar
Dylan Yudaken committed
732
                x264_weights_analyse( h, frames[b], frames[p0], 1 );
Dylan Yudaken's avatar
Dylan Yudaken committed
733
734
735
736
                w = frames[b]->weight[0];
            }
            frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
        }
737
        if( do_search[1] ) frames[b]->lowres_mvs[1][p1-b-1][0][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
738
739
740

        if( b == p1 )
            frames[b]->i_intra_mbs[b-p0] = 0;
Fiona Glaser's avatar
Fiona Glaser committed
741
742
        if( !frames[b]->b_intra_calculated )
        {
Loren Merritt's avatar
Loren Merritt committed
743
            frames[b]->i_cost_est[0][0] = 0;
744
            frames[b]->i_cost_est_aq[0][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
745
746
747
748
        }
        if( p1 != p0 )
            dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);

Fiona Glaser's avatar
Fiona Glaser committed
749
750
751
        frames[b]->i_cost_est[b-p0][p1-b] = 0;
        frames[b]->i_cost_est_aq[b-p0][p1-b] = 0;

752
753
754
        /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
         * This considerably improves MV prediction overall. */

Fiona Glaser's avatar
Fiona Glaser committed
755
        /* The edge mbs seem to reduce the predictive quality of the
Loren Merritt's avatar
Loren Merritt committed
756
         * whole frame's score, but are needed for a spatial distribution. */
Fiona Glaser's avatar
Fiona Glaser committed
757
        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
758
            h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
Loren Merritt's avatar
Loren Merritt committed
759
        {
760
            for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
761
            {
Fiona Glaser's avatar
Fiona Glaser committed
762
763
764
                row_satd[h->mb.i_mb_y] = 0;
                if( !frames[b]->b_intra_calculated )
                    row_satd_intra[h->mb.i_mb_y] = 0;
765
                for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
Fiona Glaser's avatar
Fiona Glaser committed
766
                    x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
767
768
            }
        }
769
        else
Loren Merritt's avatar
Loren Merritt committed
770
        {
771
772
            for( h->mb.i_mb_y = h->mb.i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y-- )
                for( h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
Fiona Glaser's avatar
Fiona Glaser committed
773
                    x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
774
775
        }

Fiona Glaser's avatar
Fiona Glaser committed
776
        i_score = frames[b]->i_cost_est[b-p0][p1-b];
Loren Merritt's avatar
Loren Merritt committed
777
        if( b != p1 )
778
            i_score = (uint64_t)i_score * 100 / (120 + h->param.i_bframe_bias);
779
780
        else
            frames[b]->b_intra_calculated = 1;
Loren Merritt's avatar
Loren Merritt committed
781
782

        frames[b]->i_cost_est[b-p0][p1-b] = i_score;
Loren Merritt's avatar
Loren Merritt committed
783
        x264_emms();
784
    }
Loren Merritt's avatar
Loren Merritt committed
785
786

    if( b_intra_penalty )
787
    {
Loren Merritt's avatar
Loren Merritt committed
788
        // arbitrary penalty for I-blocks after B-frames
789
        int nmb = NUM_MBS;
Loren Merritt's avatar
Loren Merritt committed
790
        i_score += i_score * frames[b]->i_intra_mbs[b-p0] / (nmb * 8);
791
    }
Loren Merritt's avatar
Loren Merritt committed
792
793
    return i_score;
}
794

Fiona Glaser's avatar
Fiona Glaser committed
795
796
/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
 * re-running lookahead. */
Fiona Glaser's avatar
Fiona Glaser committed
797
static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
Fiona Glaser's avatar
Fiona Glaser committed
798
799
800
{
    int i_score = 0;
    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
801
    float *qp_offset = IS_X264_TYPE_B(frames[b]->i_type) ? frames[b]->f_qp_offset_aq : frames[b]->f_qp_offset;
Fiona Glaser's avatar
Fiona Glaser committed
802
    x264_emms();
803
    for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
Fiona Glaser's avatar
Fiona Glaser committed
804
805
    {
        row_satd[ h->mb.i_mb_y ] = 0;
806
        for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
Fiona Glaser's avatar
Fiona Glaser committed
807
808
        {
            int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
809
            int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] & LOWRES_COST_MASK;
810
            float qp_adj = qp_offset[i_mb_xy];
Fiona Glaser's avatar
Fiona Glaser committed
811
            i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj) + 128) >> 8;
Fiona Glaser's avatar
Fiona Glaser committed
812
            row_satd[ h->mb.i_mb_y ] += i_mb_cost;
813
814
815
            if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->mb.i_mb_height - 1 &&
                 h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->mb.i_mb_width - 1) ||
                 h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
Fiona Glaser's avatar
Fiona Glaser committed
816
817
818
819
820
821
822
823
            {
                i_score += i_mb_cost;
            }
        }
    }
    return i_score;
}

824
static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
Fiona Glaser's avatar
Fiona Glaser committed
825
{
826
    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
Dylan Yudaken's avatar
Dylan Yudaken committed
827
828
829
830
    float weightdelta = 0.0;
    if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
        weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);

831
832
833
    /* Allow the strength to be adjusted via qcompress, since the two
     * concepts are very similar. */
    float strength = 5.0f * (1.0f - h->param.rc.f_qcompress);
834
    for( int mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
Fiona Glaser's avatar
Fiona Glaser committed
835
    {
836
        int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index] + 128) >> 8;
837
        if( intra_cost )
Fiona Glaser's avatar
Fiona Glaser committed
838
        {
839
840
            int propagate_cost = (frame->i_propagate_cost[mb_index] * fps_factor + 128) >> 8;
            float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost) + weightdelta;
Loren Merritt's avatar
Loren Merritt committed
841
            frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
Fiona Glaser's avatar
Fiona Glaser committed
842
843
844
845
        }
    }
}

846
static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced )
Fiona Glaser's avatar
Fiona Glaser committed
847
{
848
    uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost};
Loren Merritt's avatar
Loren Merritt committed
849
    int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
Fiona Glaser's avatar
Fiona Glaser committed
850
    int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
Loren Merritt's avatar
Loren Merritt committed
851
    int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
852
    int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
853
    int *buf = h->scratch_buffer;
854
855
    uint16_t *propagate_cost = frames[b]->i_propagate_cost;

856
857
858
    x264_emms();
    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);

859
860
    /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
    if( !referenced )
861
        memset( frames[b]->i_propagate_cost, 0, h->mb.i_mb_width * sizeof(uint16_t) );
Fiona Glaser's avatar
Fiona Glaser committed
862

863
    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->mb.i_mb_height; h->mb.i_mb_y++ )
Fiona Glaser's avatar
Fiona Glaser committed
864
    {
Loren Merritt's avatar
Loren Merritt committed
865
        int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
866
        h->mc.mbtree_propagate_cost( buf, propagate_cost,
867
            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
868
            frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
869
        if( referenced )
870
871
            propagate_cost += h->mb.i_mb_width;
        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
Fiona Glaser's avatar
Fiona Glaser committed
872
        {
873
            int propagate_amount = buf[h->mb.i_mb_x];
Fiona Glaser's avatar
Fiona Glaser committed
874
            /* Don't propagate for an intra block. */
875
            if( propagate_amount > 0 )
Fiona Glaser's avatar
Fiona Glaser committed
876
            {
877
                /* Access width-2 bitfield. */
878
                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
Fiona Glaser's avatar
Fiona Glaser committed
879
                /* Follow the MVs to the previous frame(s). */
880
                for( int list = 0; list < 2; list++ )
Fiona Glaser's avatar
Fiona Glaser committed
881
882
                    if( (lists_used >> list)&1 )
                    {
883
884
885
886
887
888
889
890
891
892
893
894
895
#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
                        int listamount = propagate_amount;
                        /* Apply bipred weighting. */
                        if( lists_used == 3 )
                            listamount = (listamount * bipred_weights[list] + 32) >> 6;

                        /* Early termination for simple case of mv0. */
                        if( !M32( mvs[list][mb_index] ) )
                        {
                            CLIP_ADD( ref_costs[list][mb_index], listamount );
                            continue;
                        }

Loren Merritt's avatar
Loren Merritt committed
896
897
                        int x = mvs[list][mb_index][0];
                        int y = mvs[list][mb_index][1];
Fiona Glaser's avatar
Fiona Glaser committed
898
                        int mbx = (x>>5)+h->mb.i_mb_x;
899
                        int mby = (y>>5)+h->mb.i_mb_y;
900
                        int idx0 = mbx + mby * h->mb.i_mb_stride;
Fiona Glaser's avatar
Fiona Glaser committed
901
902
903
                        int idx1 = idx0 + 1;
                        int idx2 = idx0 + h->mb.i_mb_stride;
                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
Loren Merritt's avatar
Loren Merritt committed
904
905
906
907
908
909
                        x &= 31;
                        y &= 31;
                        int idx0weight = (32-y)*(32-x);
                        int idx1weight = (32-y)*x;
                        int idx2weight = y*(32-x);
                        int idx3weight = y*x;
Fiona Glaser's avatar
Fiona Glaser committed
910
911
912

                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
                         * be counted. */
913
                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
Fiona Glaser's avatar
Fiona Glaser committed
914
                        {
Fiona Glaser's avatar