Commit 0cbf0fc2 authored by Loren Merritt's avatar Loren Merritt

fix some strides that weren't a multiple of 16.


git-svn-id: svn://svn.videolan.org/x264/trunk@576 df754926-b1dd-0310-bc7b-ec298dee348c
parent 58e12b0e
......@@ -1162,15 +1162,15 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
{
DECLARE_ALIGNED( uint8_t, pix1[8*8], 8 );
DECLARE_ALIGNED( uint8_t, pix2[8*8], 8 );
DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 );
uint8_t *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
#define CHROMA4x4MC( width, height, me, x, y ) \
h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*16], 16, (me).mv[0], (me).mv[1], width, height ); \
h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*16], 16, (me).mv[0], (me).mv[1], width, height );
if( pixel == PIXEL_4x4 )
{
......@@ -1190,8 +1190,8 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
}
return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 8 )
+ h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 8 );
return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 16 )
+ h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
}
static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
......
......@@ -55,7 +55,8 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
uint8_t pix1[9*9], pix2[8*8];
DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 );
uint8_t *pix2 = pix1+8;
x264_me_t m[2];
int i_bcost = COST_MAX;
int i_cost_bak;
......@@ -104,16 +105,16 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
}
#define TRY_BIDIR( mv0, mv1, penalty ) \
{ \
int stride2 = 8; \
int stride2 = 16; \
uint8_t *src2; \
int i_cost; \
h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 8, \
h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 16, \
(mv0)[0], (mv0)[1], 8, 8 ); \
src2 = h->mc.get_ref( m[1].p_fref, m[1].i_stride[0], pix2, &stride2, \
(mv1)[0], (mv1)[1], 8, 8 ); \
h->mc.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
h->mc.avg[PIXEL_8x8]( pix1, 16, src2, stride2 ); \
i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
m[0].p_fenc[0], FENC_STRIDE, pix1, 8 ); \
m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
if( i_bcost > i_cost ) \
{ \
i_bcost = i_cost; \
......@@ -196,8 +197,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
lowres_intra_mb:
{
DECLARE_ALIGNED( uint8_t, pix_buf[9*FDEC_STRIDE], 8 );
uint8_t *pix = &pix_buf[8+FDEC_STRIDE - 1];
uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
int intra_penalty = 5 + 10 * b_bidir;
int satds[4], i_icost;
......
......@@ -56,8 +56,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
if( pixel_asm.name[i] != pixel_ref.name[i] ) \
{ \
used_asm = 1; \
res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
res_c = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
if( res_c != res_asm ) \
{ \
ok = 0; \
......@@ -79,16 +79,16 @@ static int check_pixel( int cpu_ref, int cpu_new )
if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
{ \
used_asm = 1; \
res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
if(N==4) \
{ \
res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
} \
else \
pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
{ \
ok = 0; \
......@@ -387,8 +387,8 @@ static int check_mc( int cpu_ref, int cpu_new )
if( mc_a.name[i] != mc_ref.name[i] ) \
{ \
used_asm = 1; \
mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
if( memcmp( buf3, buf4, 1024 ) ) \
{ \
ok = 0; \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment