Commit 56967517 authored by Fiona Glaser's avatar Fiona Glaser

Optimize neighbor CBP calculation and fix related regression

r1105 introduced array overflow in cbp handling
parent ce4de643
...@@ -529,6 +529,10 @@ struct x264_t ...@@ -529,6 +529,10 @@ struct x264_t
/* number of neighbors (top and left) that used 8x8 dct */ /* number of neighbors (top and left) that used 8x8 dct */
int i_neighbour_transform_size; int i_neighbour_transform_size;
int i_neighbour_interlaced; int i_neighbour_interlaced;
/* neighbor CBPs */
int i_cbp_top;
int i_cbp_left;
} cache; } cache;
/* */ /* */
......
...@@ -897,6 +897,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -897,6 +897,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
{ {
h->mb.i_mb_type_top = h->mb.i_mb_type_top =
i_top_type= h->mb.type[i_top_xy]; i_top_type= h->mb.type[i_top_xy];
h->mb.cache.i_cbp_top = h->mb.cbp[i_top_xy];
h->mb.i_neighbour |= MB_TOP; h->mb.i_neighbour |= MB_TOP;
...@@ -912,6 +913,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -912,6 +913,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
else else
{ {
h->mb.i_mb_type_top = -1; h->mb.i_mb_type_top = -1;
h->mb.cache.i_cbp_top = -1;
/* load intra4x4 */ /* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] =
...@@ -935,6 +937,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -935,6 +937,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
i_left_xy = i_mb_xy - 1; i_left_xy = i_mb_xy - 1;
h->mb.i_mb_type_left = h->mb.i_mb_type_left =
i_left_type = h->mb.type[i_left_xy]; i_left_type = h->mb.type[i_left_xy];
h->mb.cache.i_cbp_left = h->mb.cbp[h->mb.i_mb_xy - 1];
h->mb.i_neighbour |= MB_LEFT; h->mb.i_neighbour |= MB_LEFT;
...@@ -959,6 +962,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -959,6 +962,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
else else
{ {
h->mb.i_mb_type_left = -1; h->mb.i_mb_type_left = -1;
h->mb.cache.i_cbp_left = -1;
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =
......
...@@ -233,8 +233,8 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) ...@@ -233,8 +233,8 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb ) static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
{ {
int cbp = h->mb.i_cbp_luma; int cbp = h->mb.i_cbp_luma;
int cbp_l = h->mb.i_neighbour & MB_LEFT ? h->mb.cbp[h->mb.i_mb_xy - 1] : -1; int cbp_l = h->mb.cache.i_cbp_left;
int cbp_t = h->mb.i_neighbour & MB_TOP ? h->mb.cbp[h->mb.i_mb_top_xy] : -1; int cbp_t = h->mb.cache.i_cbp_top;
x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (h->mb.i_cbp_luma >> 0) & 1 ); x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (h->mb.i_cbp_luma >> 0) & 1 );
x264_cabac_encode_decision( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (h->mb.i_cbp_luma >> 1) & 1 ); x264_cabac_encode_decision( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (h->mb.i_cbp_luma >> 1) & 1 );
x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (h->mb.i_cbp_luma >> 2) & 1 ); x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (h->mb.i_cbp_luma >> 2) & 1 );
...@@ -243,20 +243,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb ) ...@@ -243,20 +243,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb ) static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
{ {
int cbp_a = -1; int cbp_a = h->mb.cache.i_cbp_left & 0x30;
int cbp_b = -1; int cbp_b = h->mb.cache.i_cbp_top & 0x30;
int ctx; int ctx = 0;
/* No need to test for SKIP/PCM */
if( h->mb.i_neighbour & MB_LEFT )
cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3;
if( h->mb.i_neighbour & MB_TOP ) if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
cbp_b = (h->mb.cbp[h->mb.i_mb_top_xy] >> 4)&0x3; if( cbp_b && h->mb.cache.i_cbp_top != -1 ) ctx+=2;
ctx = 0;
if( cbp_a > 0 ) ctx++;
if( cbp_b > 0 ) ctx += 2;
if( h->mb.i_cbp_chroma == 0 ) if( h->mb.i_cbp_chroma == 0 )
x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 ); x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
else else
...@@ -264,8 +256,8 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb ) ...@@ -264,8 +256,8 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 ); x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
ctx = 4; ctx = 4;
if( cbp_a == 2 ) ctx++; if( cbp_a == 0x20 ) ctx++;
if( cbp_b == 2 ) ctx += 2; if( cbp_b == 0x20 ) ctx += 2;
x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 ); x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 );
} }
} }
...@@ -531,15 +523,14 @@ static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int ...@@ -531,15 +523,14 @@ static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int
i_nzb &= 0x7f + (b_intra << 7); i_nzb &= 0x7f + (b_intra << 7);
return 4*i_cat + 2*!!i_nzb + !!i_nza; return 4*i_cat + 2*!!i_nzb + !!i_nza;
case DCT_LUMA_DC: case DCT_LUMA_DC:
/* Note: this depends on the exact values of MB_LEFT and MB_TOP enums */ i_nza = (h->mb.cache.i_cbp_left >> 8) & 1;
i_nza = ((h->mb.cbp[h->mb.i_mb_xy - 1] >> 8) | ~h->mb.i_neighbour) & 1; i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1;
i_nzb = ((h->mb.cbp[h->mb.i_mb_top_xy] >> 7) | ~h->mb.i_neighbour) & 2; return 4*i_cat + 2*i_nzb + i_nza;
return 4*i_cat + i_nzb + i_nza;
case DCT_CHROMA_DC: case DCT_CHROMA_DC:
/* no need to test skip/pcm */ /* no need to test skip/pcm */
i_idx -= 25; i_idx -= 25;
i_nza = h->mb.i_neighbour & MB_LEFT ? (h->mb.cbp[h->mb.i_mb_xy - 1] >> (9 + i_idx)) & 1 : b_intra; i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra;
i_nzb = h->mb.i_neighbour & MB_TOP ? (h->mb.cbp[h->mb.i_mb_top_xy] >> (9 + i_idx)) & 1 : b_intra; i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (9 + i_idx)) & 1 : b_intra;
return 4*i_cat + 2*i_nzb + i_nza; return 4*i_cat + 2*i_nzb + i_nza;
default: default:
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment