Commit 4947b0fb authored by Fiona Glaser's avatar Fiona Glaser

Overhaul deblocking again

Move deblock strength calculation to immediately after encoding to take advantage of the data that's already in cache.
Keep the deblocking itself as per-row.
parent 57729402
......@@ -709,6 +709,8 @@ struct x264_t
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
int8_t map_col_to_list0[18];
int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
int8_t deblock_ref_table[32+2];
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
} mb;
/* rate control encoding only */
......@@ -779,6 +781,7 @@ struct x264_t
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
uint8_t (*deblock_strength[2])[2][4][4];
/* CPU functions dependents */
x264_predict_t predict_16x16[4+3];
......
......@@ -274,13 +274,15 @@ static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int b
deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, int bframe, int step, int first_edge_only )
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
int bframe )
{
for( int dir = 0; dir < 2; dir++ )
{
int s1 = dir ? 1 : 8;
int s2 = dir ? 8 : 1;
for( int edge = 0; edge < (first_edge_only ? 1 : 4); edge += step )
for( int edge = 0; edge < 4; edge++ )
for( int i = 0, loc = X264_SCAN8_0+edge*s2; i < 4; i++, loc += s1 )
{
int locn = loc - s2;
......@@ -337,46 +339,25 @@ static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2,
void x264_frame_deblock_row( x264_t *h, int mb_y )
{
int b_interlaced = h->sh.b_mbaff;
int mvy_limit = 4 >> b_interlaced;
int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
int stridey = h->fdec->i_stride[0];
int stride2y = stridey << b_interlaced;
int strideuv = h->fdec->i_stride[1];
int stride2uv = strideuv << b_interlaced;
int deblock_ref_table[2][32+2];
uint8_t (*nnz_backup)[16] = h->scratch_buffer;
for( int l = 0; l < 2; l++ )
{
int refs = (l ? h->i_ref1 : h->i_ref0) << h->sh.b_mbaff;
x264_frame_t **fref = l ? h->fref1 : h->fref0;
deblock_ref_table(l,-2) = -2;
deblock_ref_table(l,-1) = -1;
for( int i = 0; i < refs; i++ )
{
/* Mask off high bits to avoid frame num collisions with -1/-2.
* frame num values don't actually have to be correct, just unique.
* frame num values can't cover a range of more than 32. */
if( !h->mb.b_interlaced )
deblock_ref_table(l,i) = fref[i]->i_frame_num&63;
else
deblock_ref_table(l,i) = ((fref[i>>1]->i_frame_num&63)<<1) + (i&1);
}
}
if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
munge_cavlc_nnz( h, mb_y, nnz_backup, munge_cavlc_nnz_row );
for( int mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
{
ALIGNED_ARRAY_16( uint8_t, bs, [2][4][4] );
x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
x264_macroblock_cache_load_deblock( h, mb_x, mb_y, deblock_ref_table );
x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
int mb_xy = h->mb.i_mb_xy;
int transform_8x8 = h->mb.mb_transform_size[mb_xy];
int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&b_interlaced][mb_x];
uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
......@@ -404,11 +385,6 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
h->loopf.deblock_chroma##intra[dir] );\
} while(0)
if( intra_cur )
memset( bs, 3, sizeof(bs) );
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, transform_8x8 + 1, first_edge_only );
if( h->mb.i_neighbour & MB_LEFT )
{
int qpl = h->mb.qp[h->mb.i_mb_left_xy];
......@@ -468,13 +444,13 @@ void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int be
void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
int mvy_limit, int bframe, int step, int first_edge_only );
int mvy_limit, int bframe );
void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
int mvy_limit, int bframe, int step, int first_edge_only );
int mvy_limit, int bframe );
void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
int mvy_limit, int bframe, int step, int first_edge_only );
int mvy_limit, int bframe );
#ifdef ARCH_X86
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
......
......@@ -166,7 +166,7 @@ typedef struct
x264_deblock_intra_t deblock_chroma_intra[2];
void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
int bframe, int step, int first_edge_only );
int bframe );
} x264_deblock_function_t;
x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
......
......@@ -325,12 +325,15 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
for( int i = 0; i <= h->param.b_interlaced; i++ )
{
for( int j = 0; j < 3; j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32)>>!!j );
h->intra_border_backup[i][j] += 8;
}
CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->sps->i_mb_width );
}
/* Allocate scratch buffer */
int scratch_size = 0;
......@@ -357,8 +360,11 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
for( int i = 0; i <= h->param.b_interlaced; i++ )
{
x264_free( h->deblock_strength[i] );
for( int j = 0; j < 3; j++ )
x264_free( h->intra_border_backup[i][j] - 8 );
}
x264_free( h->scratch_buffer );
}
......@@ -413,6 +419,19 @@ void x264_macroblock_slice_init( x264_t *h )
h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta;
}
deblock_ref_table(-2) = -2;
deblock_ref_table(-1) = -1;
for( int i = 0; i < h->i_ref0 << h->sh.b_mbaff; i++ )
{
/* Mask off high bits to avoid frame num collisions with -1/-2.
* In current x264 frame num values don't cover a range of more
* than 32, so 6 bits is enough for uniqueness. */
if( !h->mb.b_interlaced )
deblock_ref_table(i) = h->fref0[i]->i_frame_num&63;
else
deblock_ref_table(i) = ((h->fref0[i>>1]->i_frame_num&63)<<1) + (i&1);
}
h->mb.i_neighbour4[6] =
h->mb.i_neighbour4[9] =
h->mb.i_neighbour4[12] =
......@@ -873,15 +892,13 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
| ((h->mb.i_neighbour_intra & MB_TOP) ? MB_TOP|MB_TOPLEFT : 0);
}
static void inline x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
{
int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
h->mb.i_neighbour = 0;
h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
if( mb_x > 0 )
{
......@@ -898,86 +915,103 @@ static void inline x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int
}
}
void x264_macroblock_cache_load_deblock( x264_t *h, int mb_x, int mb_y, int deblock_ref_table[2][34] )
void x264_macroblock_cache_load_deblock( x264_t *h )
{
x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
return;
int cur = h->mb.i_mb_xy;
int left = h->mb.i_mb_left_xy;
int top = h->mb.i_mb_top_xy;
int top_y = mb_y - (1 << h->mb.b_interlaced);
int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
/* If we have multiple slices and we're deblocking on slice edges, we
* have to reload neighbour data. */
if( h->sh.i_first_mb && h->sh.i_disable_deblocking_filter_idc != 2 )
{
int old_neighbour = h->mb.i_neighbour;
int mb_x = h->mb.i_mb_x;
int mb_y = h->mb.i_mb_y;
x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
h->mb.i_neighbour &= ~old_neighbour;
if( h->mb.i_neighbour )
{
int left = h->mb.i_mb_left_xy;
int top = h->mb.i_mb_top_xy;
int top_y = mb_y - (1 << h->mb.b_interlaced);
int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
uint8_t (*nnz)[24] = h->mb.non_zero_count;
uint8_t (*nnz)[24] = h->mb.non_zero_count;
if( h->mb.i_neighbour & MB_TOP )
CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[top][12] );
if( h->mb.i_neighbour & MB_TOP )
CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[top][12] );
if( h->mb.i_neighbour & MB_LEFT )
{
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
}
if( h->mb.i_neighbour & MB_LEFT )
{
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
}
CP32( &h->mb.cache.non_zero_count[x264_scan8[0]+0*8], &nnz[cur][0*4] );
CP32( &h->mb.cache.non_zero_count[x264_scan8[0]+1*8], &nnz[cur][1*4] );
CP32( &h->mb.cache.non_zero_count[x264_scan8[0]+2*8], &nnz[cur][2*4] );
CP32( &h->mb.cache.non_zero_count[x264_scan8[0]+3*8], &nnz[cur][3*4] );
for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
{
int16_t (*mv)[2] = h->mb.mv[l];
int8_t *ref = h->mb.ref[l];
for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
{
int16_t (*mv)[2] = h->mb.mv[l];
int8_t *ref = h->mb.ref[l];
int i8 = x264_scan8[0] - 8;
if( h->mb.i_neighbour & MB_TOP )
{
h->mb.cache.ref[l][i8+0] =
h->mb.cache.ref[l][i8+1] = ref[top_8x8 + 0];
h->mb.cache.ref[l][i8+2] =
h->mb.cache.ref[l][i8+3] = ref[top_8x8 + 1];
CP128( h->mb.cache.mv[l][i8], mv[top_4x4] );
}
int i8 = x264_scan8[0] - 8;
if( h->mb.i_neighbour & MB_TOP )
{
h->mb.cache.ref[l][i8+0] =
h->mb.cache.ref[l][i8+1] = deblock_ref_table(l,ref[top_8x8 + 0]);
h->mb.cache.ref[l][i8+2] =
h->mb.cache.ref[l][i8+3] = deblock_ref_table(l,ref[top_8x8 + 1]);
CP128( h->mb.cache.mv[l][i8], mv[top_4x4] );
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT )
{
int ir = h->mb.i_b8_xy - 1;
int iv = h->mb.i_b4_xy - 1;
h->mb.cache.ref[l][i8+0*8] =
h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
h->mb.cache.ref[l][i8+2*8] =
h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
}
}
}
}
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT )
{
int ir = h->mb.i_b8_xy - 1;
int iv = h->mb.i_b4_xy - 1;
h->mb.cache.ref[l][i8+0*8] =
h->mb.cache.ref[l][i8+1*8] = deblock_ref_table(l,ref[ir + 0*s8x8]);
h->mb.cache.ref[l][i8+2*8] =
h->mb.cache.ref[l][i8+3*8] = deblock_ref_table(l,ref[ir + 1*s8x8]);
CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
}
if( h->param.analyse.i_weighted_pred && h->sh.i_type == SLICE_TYPE_P )
{
/* Handle reference frame duplicates */
int i8 = x264_scan8[0] - 8;
h->mb.cache.ref[0][i8+0] =
h->mb.cache.ref[0][i8+1] = deblock_ref_table(h->mb.cache.ref[0][i8+0]);
h->mb.cache.ref[0][i8+2] =
h->mb.cache.ref[0][i8+3] = deblock_ref_table(h->mb.cache.ref[0][i8+2]);
int ref0 = deblock_ref_table(l,ref[h->mb.i_b8_xy+0+0*s8x8]);
int ref1 = deblock_ref_table(l,ref[h->mb.i_b8_xy+1+0*s8x8]);
int ref2 = deblock_ref_table(l,ref[h->mb.i_b8_xy+0+1*s8x8]);
int ref3 = deblock_ref_table(l,ref[h->mb.i_b8_xy+1+1*s8x8]);
i8 = x264_scan8[0] - 1;
h->mb.cache.ref[0][i8+0*8] =
h->mb.cache.ref[0][i8+1*8] = deblock_ref_table(h->mb.cache.ref[0][i8+0*8]);
h->mb.cache.ref[0][i8+2*8] =
h->mb.cache.ref[0][i8+3*8] = deblock_ref_table(h->mb.cache.ref[0][i8+2*8]);
int ref0 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 0]]);
int ref1 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 4]]);
int ref2 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 8]]);
int ref3 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[12]]);
uint32_t reftop = pack16to32( (uint8_t)ref0, (uint8_t)ref1 ) * 0x0101;
uint32_t refbot = pack16to32( (uint8_t)ref2, (uint8_t)ref3 ) * 0x0101;
M32( &h->mb.cache.ref[l][x264_scan8[0]+8*0] ) = reftop;
M32( &h->mb.cache.ref[l][x264_scan8[0]+8*1] ) = reftop;
M32( &h->mb.cache.ref[l][x264_scan8[0]+8*2] ) = refbot;
M32( &h->mb.cache.ref[l][x264_scan8[0]+8*3] ) = refbot;
CP128( h->mb.cache.mv[l][x264_scan8[0]+8*0], mv[h->mb.i_b4_xy+0*s4x4] );
CP128( h->mb.cache.mv[l][x264_scan8[0]+8*1], mv[h->mb.i_b4_xy+1*s4x4] );
CP128( h->mb.cache.mv[l][x264_scan8[0]+8*2], mv[h->mb.i_b4_xy+2*s4x4] );
CP128( h->mb.cache.mv[l][x264_scan8[0]+8*3], mv[h->mb.i_b4_xy+3*s4x4] );
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*0] ) = reftop;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*1] ) = reftop;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*2] ) = refbot;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*3] ) = refbot;
}
}
......@@ -1041,6 +1075,8 @@ void x264_macroblock_cache_save( x264_t *h )
h->mb.cbp[i_mb_xy] = 0x72f; /* all set */
h->mb.b_transform_8x8 = 0;
memset( nnz, 16, sizeof( *h->mb.non_zero_count ) );
for( int i = 0; i < 24; i++ )
h->mb.cache.non_zero_count[x264_scan8[i]] = 16;
}
else
{
......
......@@ -271,8 +271,8 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead );
void x264_macroblock_slice_init( x264_t *h );
void x264_macroblock_thread_init( x264_t *h );
void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y );
void x264_macroblock_cache_load_deblock( x264_t *h, int mb_x, int mb_y, int deblock_ref_table[2][34] );
#define deblock_ref_table(l,x) deblock_ref_table[l][x+2]
void x264_macroblock_cache_load_deblock( x264_t *h );
void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y );
void x264_macroblock_cache_save( x264_t *h );
void x264_macroblock_bipred_init( x264_t *h );
......
......@@ -889,8 +889,7 @@ chroma_intra_body_mmxext:
;-----------------------------------------------------------------------------
; static void deblock_strength( uint8_t nnz[48], int8_t ref[2][40], int16_t mv[2][40][2],
; uint8_t bs[2][4][4], int mvy_limit, int bframe, int step,
; int first_edge_only )
; uint8_t bs[2][4][4], int mvy_limit, int bframe )
;-----------------------------------------------------------------------------
%define scan8start (4+1*8)
......
......@@ -1752,6 +1752,9 @@ static int x264_slice_write( x264_t *h )
int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
int starting_bits = bs_pos(&h->out.bs);
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_hpel = h->fdec->b_kept_as_ref;
b_deblock &= b_hpel || h->param.psz_dump_yuv;
bs_realign( &h->out.bs );
/* Slice */
......@@ -1890,6 +1893,19 @@ static int x264_slice_write( x264_t *h )
/* save cache */
x264_macroblock_cache_save( h );
/* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
if( b_deblock )
{
int mvy_limit = 4 >> h->sh.b_mbaff;
uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&h->sh.b_mbaff][h->mb.i_mb_x];
x264_macroblock_cache_load_deblock( h );
if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
memset( bs, 3, 2*4*4*sizeof(uint8_t) );
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
}
/* accumulate mb stats */
h->stat.frame.i_mb_count[h->mb.i_type]++;
......
......@@ -459,8 +459,12 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
static void x264_macroblock_encode_skip( x264_t *h )
{
for( int i = 0; i < sizeof( h->mb.cache.non_zero_count ); i += 16 )
M128( &h->mb.cache.non_zero_count[i] ) = M128_ZERO;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+0*8] ) = 0;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+1*8] ) = 0;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+2*8] ) = 0;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+3*8] ) = 0;
for( int i = 16; i < 24; i++ )
h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
h->mb.i_cbp_luma = 0;
h->mb.i_cbp_chroma = 0;
h->mb.cbp[h->mb.i_mb_xy] = 0;
......
......@@ -1166,8 +1166,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512;
}
set_func_name( "deblock_strength" );
call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), 1, 0 );
call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), 1, 0 );
call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) );
call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) );
if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) )
{
ok = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment