Commit 42e179e8 authored by Fiona Glaser's avatar Fiona Glaser

MMX CABAC mvd sum calculation

Faster CABAC mvd coding.
parent 46b10798
......@@ -143,6 +143,15 @@ static inline int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
return sum;
}
static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
{
int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
amvd0 = (amvd0 > 2) + (amvd0 > 32);
amvd1 = (amvd1 > 2) + (amvd1 > 32);
return amvd0 + (amvd1<<16);
}
/****************************************************************************
*
****************************************************************************/
......
......@@ -107,6 +107,35 @@ static ALWAYS_INLINE int array_non_zero_int_mmx( void *v, int i_count )
}
else return array_non_zero_int_c( v, i_count );
}
#define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16_t *mvdtop)
{
static const uint64_t pw_2 = 0x0002000200020002ULL;
static const uint64_t pw_28 = 0x001C001C001C001CULL;
static const uint64_t pw_2184 = 0x0888088808880888ULL;
/* MIN(((x+28)*2184)>>16,2) = (x>2) + (x>32) */
/* 2184 = fix16(1/30) */
uint32_t amvd;
asm(
"movd %1, %%mm0 \n"
"movd %2, %%mm1 \n"
"pxor %%mm2, %%mm2 \n"
"pxor %%mm3, %%mm3 \n"
"psubw %%mm0, %%mm2 \n"
"psubw %%mm1, %%mm3 \n"
"pmaxsw %%mm2, %%mm0 \n"
"pmaxsw %%mm3, %%mm1 \n"
"paddw %3, %%mm0 \n"
"paddw %%mm1, %%mm0 \n"
"pmulhuw %4, %%mm0 \n"
"pminsw %5, %%mm0 \n"
"movd %%mm0, %0 \n"
:"=r"(amvd)
:"m"(*(uint32_t*)mvdleft),"m"(*(uint32_t*)mvdtop),
"m"(pw_28),"m"(pw_2184),"m"(pw_2)
);
return amvd;
}
#endif
#endif
......@@ -390,14 +390,11 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx
x264_cabac_encode_decision( cb, 54 + ctx, 0 );
}
static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd )
static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
{
static const uint8_t ctxes[9] = { 0,3,4,5,6,6,6,6,6 };
const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) +
abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] );
const int i_abs = abs( mvd );
const int ctxbase = l ? 47 : 40;
int ctx = (amvd>2) + (amvd>32);
int i;
if( i_abs == 0 )
......@@ -443,16 +440,19 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height )
{
DECLARE_ALIGNED_4( int16_t mvp[2] );
uint32_t amvd;
int mdx, mdy;
/* Calculate mvd */
x264_mb_predict_mv( h, i_list, idx, width, mvp );
mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
/* encode */
x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx );
x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy );
x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF );
x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 );
return pack16to32_mask(mdx,mdy);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment