Commit b019515e authored by Henrik Gramner's avatar Henrik Gramner
Browse files

Merge zero buffers

Improves cache efficiency.
parent d75b93b0
......@@ -155,7 +155,7 @@ static weight_fn_t mc_weight_wtab[6] =
mc_weight_w16,
mc_weight_w20,
};
const x264_weight_t x264_weight_none[3] = { {{0}} };
static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
{
for( int y = 0; y < i_height; y++ )
......
......@@ -244,8 +244,7 @@ typedef struct x264_weight_t
weight_fn_t *weightfn;
} ALIGNED_16( x264_weight_t );
#define x264_weight_none x264_template(weight_none)
extern const x264_weight_t x264_weight_none[3];
#define x264_weight_none ((const x264_weight_t*)x264_zero)
#define SET_WEIGHT( w, b, s, d, o )\
{\
......
......@@ -2534,3 +2534,6 @@ const vlc_t x264_run_before_init[7][16] =
{ 0x1, 11 }, /* str=00000000001 */
},
};
/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*sizeof(pixel) */
ALIGNED_64( uint8_t x264_zero[1024] ) = { 0 };
......@@ -94,4 +94,6 @@ extern const vlc_t x264_total_zeros_2x2_dc[3][4];
extern const vlc_t x264_total_zeros_2x4_dc[7][8];
extern const vlc_t x264_run_before_init[7][16];
extern uint8_t x264_zero[1024];
#endif
......@@ -558,12 +558,10 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra,
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline psy_trellis_init( x264_t *h, int do_both_dct )
{
ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0};
if( do_both_dct || h->mb.b_transform_8x8 )
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
if( do_both_dct || !h->mb.b_transform_8x8 )
h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
}
/* Reset fenc satd scores cache for psy RD */
......
......@@ -633,7 +633,6 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
/* successive elimination by comparing DC before a full SAD,
* because sum(abs(diff)) >= abs(diff(sum)). */
uint16_t *sums_base = m->integral;
ALIGNED_16( static pixel zero[8*FENC_STRIDE] ) = {0};
ALIGNED_ARRAY_16( int, enc_dc,[4] );
int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
int delta = x264_pixel_size[sad_size].w;
......@@ -641,7 +640,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
int xn;
uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
h->pixf.sad_x4[sad_size]( (pixel*)x264_zero, p_fenc, p_fenc+delta,
p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
FENC_STRIDE, enc_dc );
if( delta == 4 )
......
......@@ -96,7 +96,6 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
static const uint8_t satd_shift_x[3] = {3, 2, 2};
static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2};
static const uint8_t satd_offset[3] = {0, 8, 16};
ALIGNED_16( static pixel zero[16] ) = {0};
int cache_index = (x >> satd_shift_x[size - PIXEL_8x4]) + (y >> satd_shift_y[size - PIXEL_8x4])
+ satd_offset[size - PIXEL_8x4];
int res = h->mb.pic.fenc_satd_cache[cache_index];
......@@ -105,8 +104,8 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
else
{
pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
res = h->pixf.satd[size]( fenc, FENC_STRIDE, zero, 0 ) - dc;
int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
res = h->pixf.satd[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) - dc;
h->mb.pic.fenc_satd_cache[cache_index] = res + 1;
return res;
}
......@@ -123,7 +122,6 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
{
ALIGNED_16( static pixel zero[16] ) = {0};
int satd = 0;
pixel *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
pixel *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
......@@ -140,8 +138,8 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
}
else
{
int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1;
satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - cached_satd( h, size, x, y ));
int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) - dc - cached_satd( h, size, x, y ));
}
satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment