Commit 15ecd54f authored by Loren Merritt's avatar Loren Merritt

rate-distortion optimized MB types in I- and P-frames (--subme 6)


git-svn-id: svn://svn.videolan.org/x264/trunk@262 df754926-b1dd-0310-bc7b-ec298dee348c
parent 41c37d9e
...@@ -75,5 +75,9 @@ void x264_cabac_encode_bypass( x264_cabac_t *cb, int b ); ...@@ -75,5 +75,9 @@ void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
void x264_cabac_encode_terminal( x264_cabac_t *cb, int b ); void x264_cabac_encode_terminal( x264_cabac_t *cb, int b );
void x264_cabac_encode_flush( x264_cabac_t *cb ); void x264_cabac_encode_flush( x264_cabac_t *cb );
static inline int x264_cabac_pos( x264_cabac_t *cb )
{
return bs_pos( cb->s ) + cb->i_bits_outstanding;
}
#endif #endif
...@@ -404,6 +404,7 @@ struct x264_t ...@@ -404,6 +404,7 @@ struct x264_t
} cache; } cache;
/* */ /* */
int i_qp; /* current qp */
int i_last_qp; /* last qp */ int i_last_qp; /* last qp */
int i_last_dqp; /* last delta qp */ int i_last_dqp; /* last delta qp */
int b_variable_qp; /* whether qp is allowed to vary per macroblock */ int b_variable_qp; /* whether qp is allowed to vary per macroblock */
......
...@@ -1485,7 +1485,7 @@ void x264_macroblock_cache_save( x264_t *h ) ...@@ -1485,7 +1485,7 @@ void x264_macroblock_cache_save( x264_t *h )
if( h->param.b_cabac ) if( h->param.b_cabac )
{ {
if( i_mb_type == I_4x4 || i_mb_type == I_16x16 ) if( i_mb_type == I_4x4 || i_mb_type == I_16x16 )
h->mb.chroma_pred_mode[i_mb_xy] = h->mb.i_chroma_pred_mode; h->mb.chroma_pred_mode[i_mb_xy] = x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ];
else else
h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC; h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC;
......
/***************************************************************************** /*****************************************************************************
* analyse.c: h264 encoder library * analyse.c: h264 encoder library
***************************************************************************** *****************************************************************************
* Copyright (C) 2003 Laurent Aimar * Copyright (C) 2003 x264 project
* $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $ * $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
* *
* Authors: Laurent Aimar <fenrir@via.ecp.fr> * Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -32,6 +33,8 @@ ...@@ -32,6 +33,8 @@
#include "macroblock.h" #include "macroblock.h"
#include "me.h" #include "me.h"
#include "ratecontrol.h" #include "ratecontrol.h"
#include "analyse.h"
#include "rdo.c"
typedef struct typedef struct
{ {
...@@ -69,13 +72,16 @@ typedef struct ...@@ -69,13 +72,16 @@ typedef struct
{ {
/* conduct the analysis using this lamda and QP */ /* conduct the analysis using this lamda and QP */
int i_lambda; int i_lambda;
int i_lambda2;
int i_qp; int i_qp;
int16_t *p_cost_mv; int16_t *p_cost_mv;
int b_mbrd;
/* I: Intra part */ /* I: Intra part */
/* Take some shortcuts in intra search if intra is deemed unlikely */ /* Take some shortcuts in intra search if intra is deemed unlikely */
int b_fast_intra; int b_fast_intra;
int i_best_satd;
/* Luma part */ /* Luma part */
int i_sad_i16x16; int i_sad_i16x16;
...@@ -111,6 +117,7 @@ typedef struct ...@@ -111,6 +117,7 @@ typedef struct
} x264_mb_analysis_t; } x264_mb_analysis_t;
/* lambda = pow(2,qp/6-2) */
static const int i_qp0_cost_table[52] = { static const int i_qp0_cost_table[52] = {
1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */ 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
1, 1, 1, 1, /* 8-11 */ 1, 1, 1, 1, /* 8-11 */
...@@ -121,6 +128,19 @@ static const int i_qp0_cost_table[52] = { ...@@ -121,6 +128,19 @@ static const int i_qp0_cost_table[52] = {
40,45,51,57,64,72,81,91 /* 44-51 */ 40,45,51,57,64,72,81,91 /* 44-51 */
}; };
/* pow(lambda,2) * .9 */
static const int i_qp0_cost2_table[52] = {
1, 1, 1, 1, 1, 1, /* 0-5 */
1, 1, 1, 1, 1, 1, /* 6-11 */
1, 1, 1, 2, 2, 3, /* 12-17 */
4, 5, 6, 7, 9, 11, /* 18-23 */
14, 18, 23, 29, 36, 46, /* 24-29 */
58, 73, 91, 115, 145, 183, /* 30-35 */
230, 290, 366, 461, 581, 731, /* 36-41 */
922,1161,1463,1843,2322,2926, /* 42-47 */
3686,4645,5852,7373
};
static const uint8_t block_idx_x[16] = { static const uint8_t block_idx_x[16] = {
0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
}; };
...@@ -142,6 +162,8 @@ static const int i_sub_mb_p_cost_table[4] = { ...@@ -142,6 +162,8 @@ static const int i_sub_mb_p_cost_table[4] = {
5, 3, 3, 1 5, 3, 3, 1
}; };
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
/* initialize an array of lambda*nbits for all possible mvs */ /* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a ) static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{ {
...@@ -171,12 +193,13 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -171,12 +193,13 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
/* conduct the analysis using this lamda and QP */ /* conduct the analysis using this lamda and QP */
a->i_qp = i_qp; a->i_qp = i_qp;
a->i_lambda = i_qp0_cost_table[i_qp]; a->i_lambda = i_qp0_cost_table[i_qp];
a->i_lambda2 = i_qp0_cost2_table[i_qp];
a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && h->sh.i_type != SLICE_TYPE_B;
h->mb.i_me_method = h->param.analyse.i_me_method; h->mb.i_me_method = h->param.analyse.i_me_method;
h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine; h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
&& h->mb.i_subpel_refine >= 5; && h->mb.i_subpel_refine >= 5;
a->b_fast_intra = 0;
h->mb.b_transform_8x8 = 0; h->mb.b_transform_8x8 = 0;
...@@ -186,6 +209,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -186,6 +209,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
a->i_sad_i4x4 = a->i_sad_i4x4 =
a->i_sad_i8x8chroma = COST_MAX; a->i_sad_i8x8chroma = COST_MAX;
a->b_fast_intra = 0;
a->i_best_satd = COST_MAX;
/* II: Inter part P/B frame */ /* II: Inter part P/B frame */
if( h->sh.i_type != SLICE_TYPE_I ) if( h->sh.i_type != SLICE_TYPE_I )
{ {
...@@ -245,7 +271,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -245,7 +271,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
/* Fast intra decision */ /* Fast intra decision */
if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 ) if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
{ {
if( IS_INTRA( h->mb.i_mb_type_left ) if( a->b_mbrd
|| IS_INTRA( h->mb.i_mb_type_left )
|| IS_INTRA( h->mb.i_mb_type_top ) || IS_INTRA( h->mb.i_mb_type_top )
|| IS_INTRA( h->mb.i_mb_type_topleft ) || IS_INTRA( h->mb.i_mb_type_topleft )
|| IS_INTRA( h->mb.i_mb_type_topright ) || IS_INTRA( h->mb.i_mb_type_topright )
...@@ -382,18 +409,73 @@ static void predict_4x4_mode_available( unsigned int i_neighbour, ...@@ -382,18 +409,73 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
} }
} }
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cost_inter ) static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
int i;
int i_max;
int predict_mode[9];
uint8_t *p_dstc[2], *p_srcc[2];
int i_stride[2];
if( a->i_sad_i8x8chroma < COST_MAX )
return;
/* 8x8 prediction selection for chroma */
p_dstc[0] = h->mb.pic.p_fdec[1];
p_dstc[1] = h->mb.pic.p_fdec[2];
p_srcc[0] = h->mb.pic.p_fenc[1];
p_srcc[1] = h->mb.pic.p_fenc[2];
i_stride[0] = h->mb.pic.i_stride[1];
i_stride[1] = h->mb.pic.i_stride[2];
predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
a->i_sad_i8x8chroma = COST_MAX;
for( i = 0; i < i_max; i++ )
{
int i_sad;
int i_mode;
i_mode = predict_mode[i];
/* we do the prediction */
h->predict_8x8c[i_mode]( p_dstc[0], i_stride[0] );
h->predict_8x8c[i_mode]( p_dstc[1], i_stride[1] );
/* we calculate the cost */
i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
p_srcc[0], i_stride[0] ) +
h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
p_srcc[1], i_stride[1] ) +
a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
/* if i_score is lower it is better */
if( a->i_sad_i8x8chroma > i_sad )
{
a->i_predict8x8chroma = i_mode;
a->i_sad_i8x8chroma = i_sad;
}
}
h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_inter )
{ {
const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter; const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
const int i_stride = h->mb.pic.i_stride[0]; const int i_stride = h->mb.pic.i_stride[0];
uint8_t *p_src = h->mb.pic.p_fenc[0]; uint8_t *p_src = h->mb.pic.p_fenc[0];
uint8_t *p_dst = h->mb.pic.p_fdec[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0];
int f8_satd_rd_ratio = 0;
int i, idx; int i, idx;
int i_max; int i_max;
int predict_mode[9]; int predict_mode[9];
const int i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10;
/*---------------- Try all mode and calculate their score ---------------*/ /*---------------- Try all mode and calculate their score ---------------*/
/* 16x16 prediction selection */ /* 16x16 prediction selection */
...@@ -404,34 +486,45 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos ...@@ -404,34 +486,45 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos
int i_mode; int i_mode;
i_mode = predict_mode[i]; i_mode = predict_mode[i];
/* we do the prediction */
h->predict_16x16[i_mode]( p_dst, i_stride ); h->predict_16x16[i_mode]( p_dst, i_stride );
/* we calculate the diff and get the square sum of the diff */
i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) + i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_stride, p_src, i_stride ) +
res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] ); a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
/* if i_score is lower it is better */ if( a->i_sad_i16x16 > i_sad )
if( res->i_sad_i16x16 > i_sad )
{ {
res->i_predict16x16 = i_mode; a->i_predict16x16 = i_mode;
res->i_sad_i16x16 = i_sad; a->i_sad_i16x16 = i_sad;
} }
} }
/* cavlc mb type prefix */
if( h->sh.i_type == SLICE_TYPE_B )
res->i_sad_i16x16 += res->i_lambda * i_mb_b_cost_table[I_16x16];
if( res->b_fast_intra ) if( a->b_mbrd )
{ {
if( res->i_sad_i16x16 > 2*i_cost_inter ) f8_satd_rd_ratio = ((unsigned)i_cost_inter << 8) / a->i_best_satd + 1;
x264_mb_analyse_intra_chroma( h, a );
if( h->mb.b_chroma_me )
a->i_sad_i16x16 += a->i_sad_i8x8chroma;
if( a->i_sad_i16x16 < i_satd_thresh )
{
h->mb.i_type = I_16x16;
h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
a->i_sad_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
}
else
a->i_sad_i16x16 = a->i_sad_i16x16 * f8_satd_rd_ratio >> 8;
}
else
{
if( h->sh.i_type == SLICE_TYPE_B )
/* cavlc mb type prefix */
a->i_sad_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16];
if( a->b_fast_intra && a->i_sad_i16x16 > 2*i_cost_inter )
return; return;
} }
/* 4x4 prediction selection */ /* 4x4 prediction selection */
if( flags & X264_ANALYSE_I4x4 ) if( flags & X264_ANALYSE_I4x4 )
{ {
res->i_sad_i4x4 = 0; a->i_sad_i4x4 = 0;
for( idx = 0; idx < 16; idx++ ) for( idx = 0; idx < 16; idx++ )
{ {
uint8_t *p_src_by; uint8_t *p_src_by;
...@@ -455,40 +548,51 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos ...@@ -455,40 +548,51 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos
int i_mode; int i_mode;
i_mode = predict_mode[i]; i_mode = predict_mode[i];
/* we do the prediction */
h->predict_4x4[i_mode]( p_dst_by, i_stride ); h->predict_4x4[i_mode]( p_dst_by, i_stride );
/* we calculate diff and get the square sum of the diff */
i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride, i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_stride,
p_src_by, i_stride ); p_src_by, i_stride )
+ a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
/* if i_score is lower it is better */
if( i_best > i_sad ) if( i_best > i_sad )
{ {
res->i_predict4x4[x][y] = i_mode; a->i_predict4x4[x][y] = i_mode;
i_best = i_sad; i_best = i_sad;
} }
} }
res->i_sad_i4x4 += i_best; a->i_sad_i4x4 += i_best;
/* we need to encode this block now (for next ones) */ /* we need to encode this block now (for next ones) */
h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride ); h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by, i_stride );
x264_mb_encode_i4x4( h, idx, res->i_qp ); x264_mb_encode_i4x4( h, idx, a->i_qp );
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = res->i_predict4x4[x][y]; h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
}
a->i_sad_i4x4 += a->i_lambda * 24; /* from JVT (SATD0) */
if( a->b_mbrd )
{
if( h->mb.b_chroma_me )
a->i_sad_i4x4 += a->i_sad_i8x8chroma;
if( a->i_sad_i4x4 < i_satd_thresh )
{
h->mb.i_type = I_4x4;
a->i_sad_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 );
}
else
a->i_sad_i4x4 = a->i_sad_i4x4 * f8_satd_rd_ratio >> 8;
}
else
{
if( h->sh.i_type == SLICE_TYPE_B )
a->i_sad_i4x4 += a->i_lambda * i_mb_b_cost_table[I_4x4];
} }
res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
if( h->sh.i_type == SLICE_TYPE_B )
res->i_sad_i4x4 += res->i_lambda * i_mb_b_cost_table[I_4x4];
} }
/* 8x8 prediction selection */ /* 8x8 prediction selection */
if( flags & X264_ANALYSE_I8x8 ) if( flags & X264_ANALYSE_I8x8 )
{ {
res->i_sad_i8x8 = 0; a->i_sad_i8x8 = 0;
for( idx = 0; idx < 4; idx++ ) for( idx = 0; idx < 4; idx++ )
{ {
uint8_t *p_src_by; uint8_t *p_src_by;
...@@ -516,81 +620,43 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos ...@@ -516,81 +620,43 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos
/* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */ /* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
i_sad = h->pixf.satd[PIXEL_8x8]( p_dst_by, i_stride, i_sad = h->pixf.satd[PIXEL_8x8]( p_dst_by, i_stride,
p_src_by, i_stride ); p_src_by, i_stride )
+ a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
if( i_best > i_sad ) if( i_best > i_sad )
{ {
res->i_predict8x8[x][y] = i_mode; a->i_predict8x8[x][y] = i_mode;
i_best = i_sad; i_best = i_sad;
} }
} }
res->i_sad_i8x8 += i_best; a->i_sad_i8x8 += i_best;
/* we need to encode this block now (for next ones) */ /* we need to encode this block now (for next ones) */
h->predict_8x8[res->i_predict8x8[x][y]]( p_dst_by, i_stride, h->mb.i_neighbour ); h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, i_stride, h->mb.i_neighbour );
x264_mb_encode_i8x8( h, idx, res->i_qp ); x264_mb_encode_i8x8( h, idx, a->i_qp );
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, res->i_predict4x4[x][y] ); x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
} }
// FIXME some bias like in i4x4?
if( h->sh.i_type == SLICE_TYPE_B )
res->i_sad_i8x8 += res->i_lambda * i_mb_b_cost_table[I_8x8];
}
}
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
{
int i;
int i_max;
int predict_mode[9];
uint8_t *p_dstc[2], *p_srcc[2];
int i_stride[2];
if( res->i_sad_i8x8chroma < COST_MAX ) if( a->b_mbrd )
return; {
if( h->mb.b_chroma_me )
/* 8x8 prediction selection for chroma */ a->i_sad_i8x8 += a->i_sad_i8x8chroma;
p_dstc[0] = h->mb.pic.p_fdec[1]; if( a->i_sad_i8x8 < i_satd_thresh )
p_dstc[1] = h->mb.pic.p_fdec[2]; {
p_srcc[0] = h->mb.pic.p_fenc[1]; h->mb.i_type = I_8x8;
p_srcc[1] = h->mb.pic.p_fenc[2]; a->i_sad_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
}
i_stride[0] = h->mb.pic.i_stride[1]; else
i_stride[1] = h->mb.pic.i_stride[2]; a->i_sad_i8x8 = a->i_sad_i8x8 * f8_satd_rd_ratio >> 8;
}
predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max ); else
res->i_sad_i8x8chroma = COST_MAX;
for( i = 0; i < i_max; i++ )
{
int i_sad;
int i_mode;
i_mode = predict_mode[i];
/* we do the prediction */
h->predict_8x8c[i_mode]( p_dstc[0], i_stride[0] );
h->predict_8x8c[i_mode]( p_dstc[1], i_stride[1] );
/* we calculate the cost */
i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
p_srcc[0], i_stride[0] ) +
h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
p_srcc[1], i_stride[1] ) +
res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
/* if i_score is lower it is better */
if( res->i_sad_i8x8chroma > i_sad )
{ {
res->i_predict8x8chroma = i_mode; // FIXME some bias like in i4x4?
res->i_sad_i8x8chroma = i_sad; if( h->sh.i_type == SLICE_TYPE_B )
a->i_sad_i8x8 += a->i_lambda * i_mb_b_cost_table[I_8x8];
} }
} }
h->mb.i_chroma_pred_mode = res->i_predict8x8chroma;
} }
#define LOAD_FENC( m, src, xoff, yoff) \ #define LOAD_FENC( m, src, xoff, yoff) \
...@@ -646,11 +712,22 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) ...@@ -646,11 +712,22 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1]; h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
} }
/* subtract ref cost, so we don't have to add it for the other P types */
a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
/* Set global ref, needed for all others modes */ /* Set global ref, needed for all others modes */
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref ); x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
if( a->b_mbrd )
{
a->i_best_satd = a->l0.me16x16.cost;
h->mb.i_type = P_L0;
h->mb.i_partition = D_16x16;
x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
}
else
{
/* subtract ref cost, so we don't have to add it for the other P types */
a->l0.me16x16.cost -= a->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
}
} }
static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
...@@ -693,7 +770,16 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) ...@@ -693,7 +770,16 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
} }
a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost + a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
a->l0.me8x8[2].cost + a->l0.me8x8[3].cost; a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;
if( a->b_mbrd )
{
if( a->i_best_satd > a->l0.i_cost8x8 )
a->i_best_satd = a->l0.i_cost8x8;
h->mb.i_type = P_8x8;