Commit 3b66f690 authored by Loren Merritt's avatar Loren Merritt

RD subpel motion estimation (--subme 7)



git-svn-id: svn://svn.videolan.org/x264/trunk@476 df754926-b1dd-0310-bc7b-ec298dee348c
parent 48633d2a
......@@ -687,6 +687,69 @@ static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
}
}
void x264_mb_mc_8x8( x264_t *h, int i8 )
{
const int x = 2*(i8&1);
const int y = 2*(i8>>1);
switch( h->mb.i_sub_partition[i8] )
{
case D_L0_8x8:
x264_mb_mc_0xywh( h, x, y, 2, 2 );
break;
case D_L0_8x4:
x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
break;
case D_L0_4x8:
x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
break;
case D_L0_4x4:
x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
break;
case D_L1_8x8:
x264_mb_mc_1xywh( h, x, y, 2, 2 );
break;
case D_L1_8x4:
x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
break;
case D_L1_4x8:
x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
break;
case D_L1_4x4:
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
break;
case D_BI_8x8:
x264_mb_mc_01xywh( h, x, y, 2, 2 );
break;
case D_BI_8x4:
x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
break;
case D_BI_4x8:
x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
break;
case D_BI_4x4:
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
break;
case D_DIRECT_8x8:
x264_mb_mc_direct8x8( h, x, y );
break;
}
}
void x264_mb_mc( x264_t *h )
{
if( h->mb.i_type == P_L0 )
......@@ -710,67 +773,7 @@ void x264_mb_mc( x264_t *h )
{
int i;
for( i = 0; i < 4; i++ )
{
const int x = 2*(i%2);
const int y = 2*(i/2);
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
x264_mb_mc_0xywh( h, x, y, 2, 2 );
break;
case D_L0_8x4:
x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
break;
case D_L0_4x8:
x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
break;
case D_L0_4x4:
x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
break;
case D_L1_8x8:
x264_mb_mc_1xywh( h, x, y, 2, 2 );
break;
case D_L1_8x4:
x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
break;
case D_L1_4x8:
x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
break;
case D_L1_4x4:
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
break;
case D_BI_8x8:
x264_mb_mc_01xywh( h, x, y, 2, 2 );
break;
case D_BI_8x4:
x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
break;
case D_BI_4x8:
x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
break;
case D_BI_4x4:
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
break;
case D_DIRECT_8x8:
x264_mb_mc_direct8x8( h, x, y );
break;
}
}
x264_mb_mc_8x8( h, i );
}
else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
{
......
......@@ -263,6 +263,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale );
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale );
void x264_mb_mc( x264_t *h );
void x264_mb_mc_8x8( x264_t *h, int i8 );
static inline void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, int ref )
......
......@@ -660,6 +660,136 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
}
}
static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t *p_src = h->mb.pic.p_fenc[0];
uint8_t *p_dst = h->mb.pic.p_fdec[0];
int i, idx, x, y;
int i_max, i_sad, i_best, i_mode;
int i_pred_mode;
int predict_mode[9];
if( h->mb.i_type == I_16x16 )
{
int old_pred_mode = a->i_predict16x16;
i_best = a->i_sad_i16x16;
predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
for( i = 0; i < i_max; i++ )
{
if( predict_mode[i] == old_pred_mode )
continue;
h->mb.i_intra16x16_pred_mode = predict_mode[i];
i_sad = x264_rd_cost_mb( h, a->i_lambda2 );
if( i_best > i_sad )
{
a->i_predict16x16 = predict_mode[i];
i_best = i_sad;
}
}
}
else if( h->mb.i_type == I_4x4 )
{
for( idx = 0; idx < 16; idx++ )
{
uint32_t pels[4];
int i_nnz = 0;
uint8_t *p_src_by;
uint8_t *p_dst_by;
i_best = COST_MAX;
i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx );
x = block_idx_x[idx];
y = block_idx_y[idx];
p_src_by = p_src + 4*x + 4*y*FENC_STRIDE;
p_dst_by = p_dst + 4*x + 4*y*FDEC_STRIDE;
predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
/* emulate missing topright samples */
*(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
for( i = 0; i < i_max; i++ )
{
i_mode = predict_mode[i];
h->predict_4x4[i_mode]( p_dst_by );
i_sad = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
if( i_best > i_sad )
{
a->i_predict4x4[x][y] = i_mode;
i_best = i_sad;
pels[0] = *(uint32_t*)(p_dst_by+0*FDEC_STRIDE);
pels[1] = *(uint32_t*)(p_dst_by+1*FDEC_STRIDE);
pels[2] = *(uint32_t*)(p_dst_by+2*FDEC_STRIDE);
pels[3] = *(uint32_t*)(p_dst_by+3*FDEC_STRIDE);
i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]];
}
}
*(uint32_t*)(p_dst_by+0*FDEC_STRIDE) = pels[0];
*(uint32_t*)(p_dst_by+1*FDEC_STRIDE) = pels[1];
*(uint32_t*)(p_dst_by+2*FDEC_STRIDE) = pels[2];
*(uint32_t*)(p_dst_by+3*FDEC_STRIDE) = pels[3];
h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz;
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
}
}
else if( h->mb.i_type == I_8x8 )
{
for( idx = 0; idx < 4; idx++ )
{
uint64_t pels_h = 0;
uint8_t pels_v[7];
int i_nnz[3];
uint8_t *p_src_by;
uint8_t *p_dst_by;
int j;
i_best = COST_MAX;
i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx );
x = idx&1;
y = idx>>1;
p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
for( i = 0; i < i_max; i++ )
{
i_mode = predict_mode[i];
h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] );
i_sad = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );
if( i_best > i_sad )
{
a->i_predict8x8[x][y] = i_mode;
i_best = i_sad;
pels_h = *(uint64_t*)(p_dst_by+7*FDEC_STRIDE);
if( !(idx&1) )
for( j=0; j<7; j++ )
pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
for( j=0; j<3; j++ )
i_nnz[j] = h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]];
}
}
*(uint64_t*)(p_dst_by+7*FDEC_STRIDE) = pels_h;
if( !(idx&1) )
for( j=0; j<7; j++ )
p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
for( j=0; j<3; j++ )
h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]] = i_nnz[j];
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
}
}
}
#define LOAD_FENC( m, src, xoff, yoff) \
(m)->i_stride[0] = h->mb.pic.i_stride[0]; \
(m)->i_stride[1] = h->mb.pic.i_stride[1]; \
......@@ -1805,6 +1935,9 @@ void x264_macroblock_analyse( x264_t *h )
}
if( analysis.i_sad_i8x8 < i_cost )
h->mb.i_type = I_8x8;
if( h->mb.i_subpel_refine >= 7 )
x264_intra_rd_refine( h, &analysis );
}
else if( h->sh.i_type == SLICE_TYPE_P )
{
......@@ -1859,8 +1992,6 @@ void x264_macroblock_analyse( x264_t *h )
if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
{
int i;
i_type = P_8x8;
i_partition = D_8x8;
h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
......@@ -2041,6 +2172,41 @@ void x264_macroblock_analyse( x264_t *h )
h->mb.i_type = i_type;
h->stat.frame.i_intra_cost += i_intra_cost;
h->stat.frame.i_inter_cost += i_cost;
if( h->mb.i_subpel_refine >= 7 )
{
if( IS_INTRA( h->mb.i_type ) )
{
x264_intra_rd_refine( h, &analysis );
}
else if( i_partition == D_16x16 )
{
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0 );
}
else if( i_partition == D_16x8 )
{
x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );
x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );
x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 2 );
}
else if( i_partition == D_8x16 )
{
x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );
x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );
x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 1 );
}
else if( i_partition == D_8x8 )
{
int i8x8;
x264_analyse_update_cache( h, &analysis );
for( i8x8 = 0; i8x8 < 4; i8x8++ )
if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8 );
}
}
}
}
else if( h->sh.i_type == SLICE_TYPE_B )
......
......@@ -500,7 +500,7 @@ static inline void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, i
static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd )
static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd )
{
const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) +
abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] );
......@@ -556,44 +556,38 @@ static inline void x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, i
x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy );
}
static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list )
static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int i )
{
int i;
for( i = 0; i < 4; i++ )
{
if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
{
continue;
}
if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
return;
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
case D_L1_8x8:
case D_BI_8x8:
x264_cabac_mb_mvd( h, cb, i_list, 4*i, 2, 2 );
break;
case D_L0_8x4:
case D_L1_8x4:
case D_BI_8x4:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 2, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 2, 1 );
break;
case D_L0_4x8:
case D_L1_4x8:
case D_BI_4x8:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 2 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 2 );
break;
case D_L0_4x4:
case D_L1_4x4:
case D_BI_4x4:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+3, 1, 1 );
break;
}
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
case D_L1_8x8:
case D_BI_8x8:
x264_cabac_mb_mvd( h, cb, i_list, 4*i, 2, 2 );
break;
case D_L0_8x4:
case D_L1_8x4:
case D_BI_8x4:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 2, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 2, 1 );
break;
case D_L0_4x8:
case D_L1_4x8:
case D_BI_4x8:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 2 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 2 );
break;
case D_L0_4x4:
case D_L1_4x4:
case D_BI_4x4:
x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 1, 1 );
x264_cabac_mb_mvd( h, cb, i_list, 4*i+3, 1, 1 );
break;
}
}
......@@ -912,7 +906,8 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
x264_cabac_mb_ref( h, cb, 0, 12 );
}
x264_cabac_mb8x8_mvd( h, cb, 0 );
for( i = 0; i < 4; i++ )
x264_cabac_mb8x8_mvd( h, cb, 0, i );
}
else if( i_mb_type == B_8x8 )
{
......@@ -932,8 +927,10 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
x264_cabac_mb_ref( h, cb, i_list, 4*i );
}
x264_cabac_mb8x8_mvd( h, cb, 0 );
x264_cabac_mb8x8_mvd( h, cb, 1 );
for( i = 0; i < 4; i++ )
x264_cabac_mb8x8_mvd( h, cb, 0, i );
for( i = 0; i < 4; i++ )
x264_cabac_mb8x8_mvd( h, cb, 1, i );
}
else if( i_mb_type != B_DIRECT )
{
......@@ -1052,3 +1049,88 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
#endif
}
#ifdef RDO_SKIP_BS
/*****************************************************************************
* RD only; doesn't generate a valid bitstream
* doesn't write cbp or chroma dc (I don't know how much this matters)
* works on all partition sizes except 16x16
* for sub8x8, call once per 8x8 block
*****************************************************************************/
void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
{
const int i_mb_type = h->mb.i_type;
int j;
if( i_mb_type == P_8x8 )
{
x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
if( h->sh.i_num_ref_idx_l0_active > 1 )
x264_cabac_mb_ref( h, cb, 0, 4*i8 );
x264_cabac_mb8x8_mvd( h, cb, 0, i8 );
}
else if( i_mb_type == P_L0 )
{
if( h->sh.i_num_ref_idx_l0_active > 1 )
x264_cabac_mb_ref( h, cb, 0, 4*i8 );
if( h->mb.i_partition == D_16x8 )
x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4, 2 );
else //8x16
x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 4 );
}
else if( i_mb_type == B_8x8 )
{
x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i8] );
if( h->sh.i_num_ref_idx_l0_active > 1
&& x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
x264_cabac_mb_ref( h, cb, 0, 4*i8 );
if( h->sh.i_num_ref_idx_l1_active > 1
&& x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
x264_cabac_mb_ref( h, cb, 1, 4*i8 );
x264_cabac_mb8x8_mvd( h, cb, 0, i8 );
x264_cabac_mb8x8_mvd( h, cb, 1, i8 );
}
else
{
x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" );
return;
}
for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
{
if( h->mb.i_cbp_luma & (1 << i8) )
{
if( h->mb.b_transform_8x8 )
block_residual_write_cabac( h, cb, DCT_LUMA_8x8, i8, h->dct.luma8x8[i8], 64 );
else
{
int i4;
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
}
}
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8, h->dct.block[16+i8 ].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8+4, h->dct.block[16+i8+4].residual_ac, 15 );
i8 += x264_pixel_size[i_pixel].h >> 3;
}
}
static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
{
const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
i_mode = x264_mb_pred_mode4x4_fix( i_mode );
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
block_residual_write_cabac( h, cb, DCT_LUMA_8x8, 4*i8, h->dct.luma8x8[i8], 64 );
}
static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
{
const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
i_mode = x264_mb_pred_mode4x4_fix( i_mode );
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 );
}
#endif
......@@ -267,79 +267,56 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
bs_write_se( s, i_dqp );
}
static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list )
static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
{
int i;
for( i = 0; i < 4; i++ )
{
int mvp[2];
int mvp[2];
x264_mb_predict_mv( h, i_list, idx, width, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
}
if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
{
continue;
}
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
case D_L1_8x8:
case D_BI_8x8:
x264_mb_predict_mv( h, i_list, 4*i, 2, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
break;
case D_L0_8x4:
case D_L1_8x4:
case D_BI_8x4:
x264_mb_predict_mv( h, i_list, 4*i+0, 2, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
x264_mb_predict_mv( h, i_list, 4*i+2, 2, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] );
break;
case D_L0_4x8:
case D_L1_4x8:
case D_BI_4x8:
x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] );
break;
case D_L0_4x4:
case D_L1_4x4:
case D_BI_4x4:
x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] );
x264_mb_predict_mv( h, i_list, 4*i+2, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] );
x264_mb_predict_mv( h, i_list, 4*i+3, 1, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][1] - mvp[1] );
break;
}
static void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i_list, int i )
{
if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
return;
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
case D_L1_8x8:
case D_BI_8x8:
cavlc_mb_mvd( h, s, i_list, 4*i, 2 );
break;
case D_L0_8x4:
case D_L1_8x4:
case D_BI_8x4:
cavlc_mb_mvd( h, s, i_list, 4*i+0, 2 );
cavlc_mb_mvd( h, s, i_list, 4*i+2, 2 );
break;
case D_L0_4x8:
case D_L1_4x8:
case D_BI_4x8:
cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 );
cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 );
break;
case D_L0_4x4:
case D_L1_4x4:
case D_BI_4x4:
cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 );
cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 );
cavlc_mb_mvd( h, s, i_list, 4*i+2, 1 );
cavlc_mb_mvd( h, s, i_list, 4*i+3, 1 );
break;
}
}
static void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
{
int i8, i4, i;
if( h->mb.b_transform_8x8 )
{
/* shuffle 8x8 dct coeffs into 4x4 lists */
for( i8 = 0; i8 < 4; i8++ )
for( i8 = i8start; i8 <= i8end; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
{
......@@ -350,7 +327,7 @@ static void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
}
}
for( i8 = 0; i8 < 4; i8++ )
for( i8 = i8start; i8 <= i8end; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
......@@ -541,7 +518,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[12]] );
}
x264_sub_mb_mv_write_cavlc( h, s, 0 );
for( i = 0; i < 4; i++ )
cavlc_mb8x8_mvd( h, s, 0, i );
}
else if( i_mb_type == B_8x8 )
{
......@@ -568,8 +546,10 @@ vo