Commit 68c13530 authored by Loren Merritt's avatar Loren Merritt

Preliminary adaptive B-frame decision (not yet tuned).

Fix flushing of delayed frames when the encode finishes.


git-svn-id: svn://svn.videolan.org/x264/trunk@137 df754926-b1dd-0310-bc7b-ec298dee348c
parent e2efb4b7
......@@ -64,6 +64,8 @@ void x264_param_default( x264_param_t *param )
param->i_keyint_min = 100;
param->i_bframe = 0;
param->i_scenecut_threshold = 40;
param->b_bframe_adaptive = 1;
param->i_bframe_bias = 0;
param->b_deblocking_filter = 1;
param->i_deblocking_filter_alphac0 = 0;
......
......@@ -52,6 +52,8 @@
#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) )
#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) )
#define X264_ABS(a) ( (a)< 0 ? -(a) : (a) )
#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c)))
#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
/****************************************************************************
* Generals functions
......@@ -80,6 +82,23 @@ static inline float x264_clip3f( float v, float f_min, float f_max )
return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v );
}
static inline int x264_median( int a, int b, int c )
{
int min = a, max =a;
if( b < min )
min = b;
else
max = b; /* no need to do 'b > max' (more consuming than always doing affectation) */
if( c < min )
min = c;
else if( c > max )
max = c;
return a + b + c - min - max;
}
/****************************************************************************
*
****************************************************************************/
......@@ -215,6 +234,8 @@ struct x264_t
x264_frame_t *next[X264_BFRAME_MAX+1];
/* Unused frames */
x264_frame_t *unused[X264_BFRAME_MAX+1];
/* For adaptive B decision */
x264_frame_t *last_nonb;
/* frames used for reference +1 for decoding */
x264_frame_t *reference[16+1];
......@@ -222,6 +243,8 @@ struct x264_t
int i_last_idr; /* Frame number of the last IDR */
int i_input; /* Number of input frames already accepted */
int i_delay; /* Number of frames buffered for B reordering */
} frames;
/* current frame being encoded */
......@@ -266,6 +289,9 @@ struct x264_t
int i_mb_xy;
int i_b8_xy;
int i_b4_xy;
/* Search parameters */
int i_subpel_refine;
/* Allowed qpel MV range to stay within the picture + emulated edge pixels */
int mv_min[2];
......
......@@ -73,9 +73,18 @@ x264_frame_t *x264_frame_new( x264_t *h )
( frame->i_lines[0] + 64 ) );
frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
frame->i_stride[0] * 32 + 32;
frame->i_stride[0] * 32 + 32;
}
frame->i_stride_lowres = frame->i_stride[0]/2;
frame->i_lines_lowres = frame->i_lines[0]/2;
for( i = 0; i < 4; i++ )
{
frame->buffer[7+i] = x264_malloc( frame->i_stride_lowres *
( frame->i_lines[0]/2 + 64 ) );
frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) +
frame->i_stride_lowres * 32 + 32;
}
frame->i_poc = -1;
frame->i_type = X264_TYPE_AUTO;
......@@ -106,7 +115,7 @@ void x264_frame_delete( x264_frame_t *frame )
{
x264_free( frame->buffer[i] );
}
for( i = 4; i < 7; i++ ) /* filtered planes */
for( i = 4; i < 11; i++ ) /* filtered planes */
{
x264_free( frame->buffer[i] );
}
......@@ -235,6 +244,43 @@ void x264_frame_expand_border_filtered( x264_frame_t *frame )
}
}
void x264_frame_expand_lowres( x264_frame_t *frame )
{
int w = 32;
int i, y;
for( i = 0; i < 4; i++ )
{
int i_stride = frame->i_stride_lowres;
int i_lines = frame->i_lines_lowres;
#define PPIXEL(x, y) ( frame->lowres[i] + (x) +(y)*i_stride )
for( y = 0; y < w; y++ )
{
/* upper band */
memcpy( PPIXEL(0,-y-1), PPIXEL(0,0), i_stride - 2 * w);
/* up left corner */
memset( PPIXEL(-w,-y-1 ), PPIXEL(0,0)[0], w );
/* up right corner */
memset( PPIXEL(i_stride - 2*w,-y-1), PPIXEL( i_stride-1-2*w,0)[0], w );
/* lower band */
memcpy( PPIXEL(0, i_lines+y), PPIXEL(0,i_lines-1), i_stride - 2 * w );
/* low left corner */
memset( PPIXEL(-w, i_lines+y), PPIXEL(0,i_lines-1)[0], w);
/* low right corner */
memset( PPIXEL(i_stride-2*w, i_lines+y), PPIXEL(i_stride-1-2*w,i_lines-1)[0], w);
}
for( y = 0; y < i_lines; y++ )
{
/* left band */
memset( PPIXEL( -w, y ), PPIXEL( 0, y )[0], w );
/* right band */
memset( PPIXEL( i_stride-2*w, y ), PPIXEL( i_stride - 1-2*w, y )[0], w );
}
#undef PPIXEL
}
}
/* FIXME theses tables are duplicated with the ones in macroblock.c */
static const uint8_t block_idx_xy[4][4] =
......
......@@ -37,12 +37,15 @@ typedef struct
int i_plane;
int i_stride[4];
int i_lines[4];
int i_stride_lowres;
int i_lines_lowres;
uint8_t *plane[4];
uint8_t *filtered[4]; /* plane[0], H, V, HV */
uint8_t *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
/* for unrestricted mv we allocate more data than needed
* allocated data are stored in buffer */
void *buffer[7];
void *buffer[11];
/* motion data */
int16_t (*mv[2])[2];
......@@ -50,6 +53,12 @@ typedef struct
int i_ref[2];
int ref_poc[2][16];
/* for adaptive B-frame decision.
* contains the SATD cost of the lowres frame encoded in various modes
* FIXME: how big an array do we need? */
int i_cost_est[16][16];
int i_intra_mbs[16];
} x264_frame_t;
x264_frame_t *x264_frame_new( x264_t *h );
......@@ -58,11 +67,12 @@ void x264_frame_delete( x264_frame_t *frame );
void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src );
void x264_frame_expand_border( x264_frame_t *frame );
void x264_frame_expand_border_filtered( x264_frame_t *frame );
void x264_frame_expand_border_lowres( x264_frame_t *frame );
void x264_frame_deblocking_filter( x264_t *h, int i_slice_type );
void x264_frame_filter( int cpu, x264_frame_t *frame );
void x264_frame_init_lowres( int cpu, x264_frame_t *frame );
#endif
......@@ -166,22 +166,6 @@ void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale )
}
}
static inline int x264_median( int a, int b, int c )
{
int min = a, max =a;
if( b < min )
min = b;
else
max = b; /* no need to do 'b > max' (more consuming than always doing affectation) */
if( c < min )
min = c;
else if( c > max )
max = c;
return a + b + c - min - max;
}
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] )
{
const int i8 = x264_scan8[idx];
......
......@@ -444,3 +444,40 @@ void x264_frame_filter( int cpu, x264_frame_t *frame )
}
}
}
void x264_frame_init_lowres( int cpu, x264_frame_t *frame )
{
// FIXME: tapfilter?
const int i_stride = frame->i_stride[0];
const int i_stride2 = frame->i_stride_lowres;
const int i_width2 = i_stride2 - 64;
int x, y, i;
for( y = 0; y < frame->i_lines_lowres - 1; y++ )
{
uint8_t *src0 = &frame->plane[0][2*y*i_stride];
uint8_t *src1 = src0+i_stride;
uint8_t *src2 = src1+i_stride;
uint8_t *dst0 = &frame->lowres[0][y*i_stride2];
uint8_t *dsth = &frame->lowres[1][y*i_stride2];
uint8_t *dstv = &frame->lowres[2][y*i_stride2];
uint8_t *dstc = &frame->lowres[3][y*i_stride2];
for( x = 0; x < i_width2 - 1; x++ )
{
dst0[x] = (src0[2*x ] + src0[2*x+1] + src1[2*x ] + src1[2*x+1] + 2) >> 2;
dsth[x] = (src0[2*x+1] + src0[2*x+2] + src1[2*x+1] + src1[2*x+2] + 2) >> 2;
dstv[x] = (src1[2*x ] + src1[2*x+1] + src2[2*x ] + src2[2*x+1] + 2) >> 2;
dstc[x] = (src1[2*x+1] + src1[2*x+2] + src2[2*x+1] + src2[2*x+2] + 2) >> 2;
}
dst0[x] = (src0[2*x ] + src0[2*x+1] + src1[2*x ] + src1[2*x+1] + 2) >> 2;
dstv[x] = (src1[2*x ] + src1[2*x+1] + src2[2*x ] + src2[2*x+1] + 2) >> 2;
dsth[x] = (src0[2*x+1] + src1[2*x+1] + 1) >> 1;
dstc[x] = (src1[2*x+1] + src2[2*x+1] + 1) >> 1;
}
for( i = 0; i < 4; i++ )
memcpy( &frame->lowres[i][y*i_stride2], &frame->lowres[i][(y-1)*i_stride2], i_width2 );
for( y = 0; y < 16; y++ )
for( x = 0; x < 16; x++ )
frame->i_cost_est[x][y] = -1;
}
......@@ -176,6 +176,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
int i;
int i_fmv_range = h->param.analyse.i_mv_range - 16;
h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
/* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range )
h->mb.mv_min_fpel[0] = CLIP_FMV( -16*h->mb.i_mb_x - 8 );
......@@ -1795,3 +1797,5 @@ void x264_macroblock_analyse( x264_t *h )
}
}
#include "voptype_decision.c"
......@@ -25,5 +25,6 @@
#define _ANALYSE_H 1
void x264_macroblock_analyse( x264_t *h );
void x264_voptype_analyse( x264_t *h );
#endif
......@@ -341,7 +341,10 @@ x264_t *x264_encoder_open ( x264_param_t *param )
if( h->param.i_keyint_max <= 0 )
h->param.i_keyint_max = 1;
h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
h->frames.i_delay = h->param.i_bframe;
h->param.i_deblocking_filter_alphac0 = x264_clip3( h->param.i_deblocking_filter_alphac0, -6, 6 );
h->param.i_deblocking_filter_beta = x264_clip3( h->param.i_deblocking_filter_beta, -6, 6 );
......@@ -424,7 +427,7 @@ x264_t *x264_encoder_open ( x264_param_t *param )
h->frames.next[i] = NULL;
h->frames.unused[i] = NULL;
}
for( i = 0; i < 1 + h->param.i_bframe; i++ )
for( i = 0; i < 1 + h->frames.i_delay; i++ )
{
h->frames.unused[i] = x264_frame_new( h );
}
......@@ -678,6 +681,18 @@ static inline void x264_reference_update( x264_t *h )
/* expand border of filtered images */
x264_frame_expand_border_filtered( h->fdec );
/* move lowres copy of the image to the ref frame */
for( i = 0; i < 4; i++)
{
uint8_t *tmp = h->fdec->lowres[i];
h->fdec->lowres[i] = h->fenc->lowres[i];
h->fenc->lowres[i] = tmp;
}
/* adaptive B decision needs a pointer, since it can't use the ref lists */
if( h->sh.i_type != SLICE_TYPE_B )
h->frames.last_nonb = h->fdec;
/* move frame in the buffer */
h->fdec = h->frames.reference[h->param.i_frame_reference+1];
for( i = h->param.i_frame_reference+1; i > 0; i-- )
......@@ -911,7 +926,8 @@ static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_id
****************************************************************************/
int x264_encoder_encode( x264_t *h,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic )
x264_picture_t *pic_in,
x264_picture_t *pic_out )
{
x264_frame_t *frame_psnr = h->fdec; /* just to keep the current decoded frame for psnr calculation */
int i_nal_type;
......@@ -931,22 +947,24 @@ int x264_encoder_encode( x264_t *h,
/* ------------------- Setup new frame from picture -------------------- */
TIMER_START( i_mtime_encode_frame );
if( pic != NULL )
if( pic_in != NULL )
{
/* 1: Copy the picture to a frame and move it to a buffer */
x264_frame_t *fenc = x264_frame_get( h->frames.unused );
x264_frame_copy_picture( h, fenc, pic );
x264_frame_copy_picture( h, fenc, pic_in );
fenc->i_frame = h->frames.i_input++;
x264_frame_put( h->frames.next, fenc );
if( h->frames.i_input <= h->param.i_bframe )
x264_frame_init_lowres( h->param.cpu, fenc );
if( h->frames.i_input <= h->frames.i_delay )
{
/* Nothing yet to encode */
/* waiting for filling bframe buffer */
pic->i_type = X264_TYPE_AUTO;
pic_out->i_type = X264_TYPE_AUTO;
return 0;
}
}
......@@ -967,6 +985,8 @@ int x264_encoder_encode( x264_t *h,
h->frames.next[i]->i_type =
x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
}
else if( h->param.i_bframe && h->param.b_bframe_adaptive )
x264_voptype_analyse( h );
for( bframes = 0;; bframes++ )
{
......@@ -1027,7 +1047,7 @@ int x264_encoder_encode( x264_t *h,
{
/* Nothing yet to encode (ex: waiting for I/P with B frames) */
/* waiting for filling bframe buffer */
pic->i_type = X264_TYPE_AUTO;
pic_out->i_type = X264_TYPE_AUTO;
return 0;
}
......@@ -1069,7 +1089,6 @@ do_encode:
i_slice_type = SLICE_TYPE_B;
}
pic->i_type =
h->fdec->i_type = h->fenc->i_type;
h->fdec->i_poc = h->fenc->i_poc;
h->fdec->i_frame = h->fenc->i_frame;
......@@ -1080,10 +1099,8 @@ do_encode:
/* Init the rate control */
x264_ratecontrol_start( h, i_slice_type );
i_global_qp = x264_ratecontrol_qp( h );
if( h->fenc->i_qpplus1 > 0 )
{
i_global_qp = x264_clip3( h->fenc->i_qpplus1 - 1, 0, 51 );
}
pic_out->i_qpplus1 =
h->fdec->i_qpplus1 = i_global_qp + 1;
/* build ref list 0/1 */
x264_reference_build_list( h, h->fdec->i_poc );
......@@ -1251,12 +1268,12 @@ do_encode:
/* Set output picture properties */
if( i_slice_type == SLICE_TYPE_I )
pic->i_type = i_nal_type == NAL_SLICE_IDR ? X264_TYPE_IDR : X264_TYPE_I;
pic_out->i_type = i_nal_type == NAL_SLICE_IDR ? X264_TYPE_IDR : X264_TYPE_I;
else if( i_slice_type == SLICE_TYPE_P )
pic->i_type = X264_TYPE_P;
pic_out->i_type = X264_TYPE_P;
else
pic->i_type = X264_TYPE_B;
pic->i_pts = h->fenc->i_pts;
pic_out->i_type = X264_TYPE_B;
pic_out->i_pts = h->fenc->i_pts;
/* ---------------------- Update encoder state ------------------------- */
/* update cabac */
......
......@@ -99,7 +99,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
COST_MV( 0, 0 );
if( h->param.analyse.i_subpel_refine >= 2 )
if( h->mb.i_subpel_refine >= 2 )
{
/* hexagon search */
/* Don't need to test mv_range each time, we won't go outside picture+padding */
......@@ -157,7 +157,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
+ m->cost_mv;
/* subpel refine */
if( h->param.analyse.i_subpel_refine >= 3 )
if( h->mb.i_subpel_refine >= 3 )
{
int hpel, qpel;
......@@ -172,8 +172,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
*p_fullpel_thresh = m->cost;
}
hpel = subpel_iterations[h->param.analyse.i_subpel_refine][2];
qpel = subpel_iterations[h->param.analyse.i_subpel_refine][3];
hpel = subpel_iterations[h->mb.i_subpel_refine][2];
qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel );
}
}
......@@ -181,8 +181,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
{
int hpel = subpel_iterations[h->param.analyse.i_subpel_refine][0];
int qpel = subpel_iterations[h->param.analyse.i_subpel_refine][1];
int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
// if( hpel || qpel )
refine_subpel( h, m, hpel, qpel );
}
......
/*****************************************************************************
* voptype_decision.c: h264 encoder library
*****************************************************************************
* Copyright (C) 2005 Loren Merritt
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include "common/common.h"
#include "common/macroblock.h"
#include "common/cpu.h"
#include "macroblock.h"
#include "me.h"
static void x264_mb_analyse_load_costs_lowres( x264_t *h, x264_mb_analysis_t *a )
{
static int16_t *p_cost_mv;
if( !p_cost_mv )
{
int i;
x264_cpu_restore( h->param.cpu );
p_cost_mv = x264_malloc( (2*2*h->param.analyse.i_mv_range + 1) * sizeof(int16_t) );
p_cost_mv += 2*h->param.analyse.i_mv_range;
for( i = 0; i <= 2*h->param.analyse.i_mv_range; i++ )
p_cost_mv[-i] =
p_cost_mv[i] = (int)( a->i_lambda * (1 + 2*log(2*i+1)/log(2)) );
}
a->p_cost_mv = p_cost_mv;
}
int x264_voptype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b )
{
x264_frame_t *fref0 = frames[p0];
x264_frame_t *fref1 = frames[p1];
x264_frame_t *fenc = frames[b];
const int b_bidir = (b < p1);
const int i_mb_x = h->mb.i_mb_x;
const int i_mb_y = h->mb.i_mb_y;
const int i_mb_stride = h->sps->i_mb_width;
const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
const int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
uint8_t pix1[9*9], pix2[8*8];
x264_me_t m[2];
int mvc[3][2], i_mvc;
int i_bcost = INT_MAX;
int i_cost_bak;
int l, i;
#define SAVE_MVS( mv0, mv1 ) \
{ \
fenc->mv[0][i_mb_xy][0] = mv0[0]; \
fenc->mv[0][i_mb_xy][1] = mv0[1]; \
fenc->mv[1][i_mb_xy][0] = mv1[0]; \
fenc->mv[1][i_mb_xy][1] = mv1[1]; \
}
#define TRY_BIDIR( mv0, mv1, penalty ) \
{ \
int stride2 = 8; \
uint8_t *src2; \
int i_cost; \
h->mc.mc_luma( m[0].p_fref, m[0].i_stride, pix1, 8, \
(mv0)[0], (mv0)[1], 8, 8 ); \
src2 = h->mc.get_ref( m[1].p_fref, m[1].i_stride, pix2, &stride2, \
(mv1)[0], (mv1)[1], 8, 8 ); \
h->pixf.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
i_cost = penalty + h->pixf.satd[PIXEL_8x8]( \
m[0].p_fenc, m[0].i_stride, pix1, 8 ); \
if( i_bcost > i_cost ) \
{ \
i_bcost = i_cost; \
SAVE_MVS( mv0, mv1 ); \
} \
}
m[0].i_pixel = PIXEL_8x8;
m[0].p_cost_mv = a->p_cost_mv;
m[0].i_stride = i_stride;
m[0].p_fenc = &fenc->lowres[0][ i_pel_offset ];
LOAD_HPELS( m[0].p_fref, fref0->lowres, i_pel_offset );
if( b_bidir )
{
int16_t *mvr = fref1->mv[0][i_mb_xy];
int dmv[2][2];
int mv0[2] = {0,0};
m[1] = m[0];
LOAD_HPELS( m[1].p_fref, fref1->lowres, i_pel_offset );
dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8;
dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8;
dmv[1][0] = dmv[0][0] - mvr[0];
dmv[1][1] = dmv[0][1] - mvr[1];
TRY_BIDIR( dmv[0], dmv[1], 0 );
TRY_BIDIR( mv0, mv0, 0 );
// if( i_bcost < 60 ) // arbitrary threshold
// return i_bcost;
}
i_cost_bak = i_bcost;
for( l = 0; l < 1 + b_bidir; l++ )
{
int16_t (*fenc_mv)[2] = &fenc->mv[0][i_mb_xy];
mvc[0][0] = fenc_mv[-1][0];
mvc[0][1] = fenc_mv[-1][1];
mvc[1][0] = fenc_mv[-i_mb_stride][0];
mvc[1][1] = fenc_mv[-i_mb_stride][1];
mvc[2][0] = fenc_mv[-i_mb_stride+1][0];
mvc[2][1] = fenc_mv[-i_mb_stride+1][1];
mvc[3][0] = fenc_mv[-i_mb_stride-1][0];
mvc[3][1] = fenc_mv[-i_mb_stride-1][1];
m[l].mvp[0] = x264_median( mvc[0][0], mvc[1][0], mvc[2][0] );
m[l].mvp[1] = x264_median( mvc[0][1], mvc[1][1], mvc[2][1] );
i_mvc = 4;
x264_me_search( h, &m[l], mvc, i_mvc );
i_bcost = X264_MIN( i_bcost, m[l].cost + 3 );
}
if( b_bidir )
TRY_BIDIR( m[0].mv, m[1].mv, 5 );
if( i_bcost < i_cost_bak )
SAVE_MVS( m[0].mv, m[1].mv );
/* intra */
// if( i_bcost > 100 ) // arbitrary threshold
{
uint8_t *src = &fenc->lowres[0][ i_pel_offset - i_stride - 1 ];
int intra_penalty = 5 + 10 * b_bidir;
i_cost_bak = i_bcost;
memcpy( pix1, src, 9 );
for( i=1; i<9; i++, src += i_stride )
pix1[i*9] = src[0];
src = &fenc->lowres[0][ i_pel_offset ];
for( i = I_PRED_CHROMA_DC; i <= I_PRED_CHROMA_P; i++ )
{
int i_cost;
h->predict_8x8[i]( &pix1[9], 9 );
i_cost = h->pixf.satd[PIXEL_8x8]( &pix1[9], 9, src, i_stride ) + intra_penalty;
i_bcost = X264_MIN( i_bcost, i_cost );
}
if( i_bcost != i_cost_bak )
{
if( !b_bidir )
fenc->i_intra_mbs[b-p0]++;
if( p1 > p0+1 )
i_bcost = i_bcost * 9 / 8; // arbitray penalty for I-blocks in and after B-frames
}
}
return i_bcost;
}
#undef TRY_BIDIR
#undef SAVE_MVS
int x264_voptype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b )
{
int i_score = 0;
/* Init MVs so that we don't have to check edge conditions when loading predictors. */
/* FIXME: not needed every time */
memset( frames[p1]->mv[0], 0, h->sps->i_mb_height * h->sps->i_mb_width * 2*sizeof(int) );
memset( frames[p1]->mv[1], 0, h->sps->i_mb_height * h->sps->i_mb_width * 2*sizeof(int) );
/* Skip the outermost ring of macroblocks, to simplify mv range and intra prediction. */
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?) */
if( frames[b]->i_cost_est[b-p0][p1-b] >= 0 )
return frames[b]->i_cost_est[b-p0][p1-b];
if( b == p1 )
frames[b]->i_intra_mbs[b-p0] = 0;
for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
i_score += x264_voptype_mb_cost( h, a, frames, p0, p1, b );
if( b != p1 )
i_score = i_score * 100 / (120 + h->param.i_bframe_bias);
frames[b]->i_cost_est[b-p0][p1-b] = i_score;
return i_score;
// fprintf( stderr, "frm %d %c(%d,%d): %6lld I:%d \n", frames[b]->i_frame,
// (b<p1?'B':'P'), b-p0, p1-b, i_score, frames[b]->i_intra_mbs[b-p0] );
}
void x264_voptype_analyse( x264_t *h )
{
x264_mb_analysis_t a;
x264_frame_t *frames[X264_BFRAME_MAX+3] = { NULL, };
int num_frames;
int j;
if( !h->frames.last_nonb )
return;
frames[0] = h->frames.last_nonb;
for( j = 0; h->frames.next[j]; j++ )
frames[j+1] = h->frames.next[j];
num_frames = j;
if( num_frames == 0 )
return;
if( num_frames == 1 )
{
no_b_frames:
frames[1]->i_type = X264_TYPE_P;
return;
}
a.i_qp = 12; // arbitrary, but low because SATD scores are 1/4 normal
a.i_lambda = i_qp0_cost_table[ a.i_qp ];
x264_mb_analyse_load_costs_lowres( h, &a );
h->mb.i_subpel_refine = 4; // 3 should be enough, but not tweaking for speed now
h->mb.mv_min_fpel[0] =
h->mb.mv_min_fpel[1] = -16;
h->mb.mv_max_fpel[0] =
h->mb.mv_max_fpel[1] = 16;
h->mb.mv_min[0] =
h->mb.mv_min[1] = -4*32;
h->mb.mv_max[0] =
h->mb.mv_max[1] = 4*32;
#if 0
/* BFS over possible frame types for minimum total SATD cost.
* requires higher encoding delay to be effective. */
{
int p0, p1, b;
struct {
int64_t score;
char path[X264_BFRAME_MAX+1];
} paths[X264_BFRAME_MAX+1];
for( p1 = 1; frames[p1]; p1++ )
for( p0 = X264_MAX(0, p1 - h->param.i_bframe - 1); p0 < p1; p0++ )
for( b = p0+1; b <= p1; b++ )
x264_voptype_frame_cost( h, &a, frames, p0, p1, b );
p1--;
paths[0].score = 0;
for( j = 1; j <= p1; j++ )
{
int k, i;