Commit 52041128 authored by Fiona Glaser's avatar Fiona Glaser

More tweaks to me.c

Added inline MMX version of UMH's predictor difference test
Various cosmetics throughout me.c
Removed a C99-ism introduced in r878.
parent d4e07786
......@@ -130,6 +130,17 @@ static inline void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t
dst[1] = x264_median( a[1], b[1], c[1] );
}
static inline int x264_predictor_difference( int16_t (*mvc)[2], int i_mvc )
{
int sum = 0, i;
for( i = 0; i < i_mvc-1; i++ )
{
sum += abs( mvc[i][0] - mvc[i+1][0] )
+ abs( mvc[i][1] - mvc[i+1][1] );
}
return sum;
}
#ifdef HAVE_MMX
#include "x86/util.h"
#endif
......
......@@ -39,6 +39,37 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
:"m"(*(uint32_t*)a), "m"(*(uint32_t*)b), "m"(*(uint32_t*)c)
);
}
#define x264_predictor_difference x264_predictor_difference_mmxext
static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], int i_mvc )
{
int sum = 0;
uint16_t output[4];
asm(
"pxor %%mm4, %%mm4 \n"
"test $1, %1 \n"
"jnz 3f \n"
"movd -8(%2,%1,4), %%mm0 \n"
"movd -4(%2,%1,4), %%mm3 \n"
"psubw %%mm3, %%mm0 \n"
"jmp 2f \n"
"3: \n"
"sub $1, %1 \n"
"1: \n"
"movq -8(%2,%1,4), %%mm0 \n"
"psubw -4(%2,%1,4), %%mm0 \n"
"2: \n"
"sub $2, %1 \n"
"pxor %%mm2, %%mm2 \n"
"psubw %%mm0, %%mm2 \n"
"pmaxsw %%mm2, %%mm0 \n"
"paddusw %%mm0, %%mm4 \n"
"jg 1b \n"
"movq %%mm4, %0 \n"
:"=m"(output), "+r"(i_mvc), "+r"(mvc)
);
sum += output[0] + output[1] + output[2] + output[3];
return sum;
}
#endif
#endif
......@@ -186,8 +186,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
/* try extra predictors if provided */
if( h->mb.i_subpel_refine >= 3 )
{
COST_MV_HPEL( bmx, bmy );
uint32_t bmv = pack16to32_mask(bmx,bmy);
COST_MV_HPEL( bmx, bmy );
do
{
if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
......@@ -235,7 +235,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
for( i = 0; i < i_me_range; i++ )
{
DIA1_ITER( bmx, bmy );
if( bmx == omx && bmy == omy )
if( (bmx == omx) & (bmy == omy) )
break;
if( !CHECK_MVRANGE(bmx, bmy) )
break;
......@@ -389,9 +389,7 @@ me_hex2:
+ abs( m->mvp[1] - mvc[0][1] );
denom++;
}
for( i = 0; i < i_mvc-1; i++ )
mvd += abs( mvc[i][0] - mvc[i+1][0] )
+ abs( mvc[i][1] - mvc[i+1][1] );
mvd += x264_predictor_difference( mvc, i_mvc );
}
sad_ctx = SAD_THRESH(1000) ? 0
......@@ -689,13 +687,12 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
int bcost = m->cost;
int odir = -1, bdir;
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
if( mx != bmx || my != bmy )
if( (mx-bmx)|(my-bmy) )
COST_MV_SAD( mx, my );
}
......@@ -715,7 +712,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 );
COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy );
COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy );
if( bmx == omx && bmy == omy )
if( (bmx == omx) & (bmy == omy) )
break;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment