Commit 7b8c02d5 authored by Loren Merritt's avatar Loren Merritt

move hpel_filter cpu detection to a function pointer like everything else


git-svn-id: svn://svn.videolan.org/x264/trunk@687 df754926-b1dd-0310-bc7b-ec298dee348c
parent a79aac14
......@@ -43,6 +43,8 @@ extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
extern void x264_plane_copy_mmxext( uint8_t *, int, uint8_t *, int, int w, int h);
extern void x264_prefetch_fenc_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_prefetch_ref_mmxext( uint8_t *, int, int );
extern void x264_hpel_filter_mmxext( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int i_stride, int i_width, int i_height );
#define AVG(W,H) \
static void x264_pixel_avg_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src ) \
......@@ -164,6 +166,7 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
pf->plane_copy = x264_plane_copy_mmxext;
pf->hpel_filter = x264_hpel_filter_mmxext;
pf->prefetch_fenc = x264_prefetch_fenc_mmxext;
pf->prefetch_ref = x264_prefetch_ref_mmxext;
......
......@@ -220,6 +220,14 @@ static inline void mc_hc( uint8_t *src, int i_src_stride, uint8_t *dst, int i_ds
}
}
static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int stride, int width, int height )
{
mc_hh( src, stride, dsth, stride, width, height );
mc_hv( src, stride, dstv, stride, width, height );
mc_hc( src, stride, dstc, stride, width, height );
}
static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
......@@ -361,6 +369,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = mc_copy_w4;
pf->plane_copy = plane_copy;
pf->hpel_filter = hpel_filter;
pf->prefetch_fenc = prefetch_fenc_null;
pf->prefetch_ref = prefetch_ref_null;
......@@ -379,57 +388,28 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
#endif
}
extern void x264_hpel_filter_mmxext( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int i_stride, int i_width, int i_height );
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
const int b_interlaced = h->sh.b_mbaff;
const int x_inc = 16, y_inc = 16;
const int stride = frame->i_stride[0] << b_interlaced;
const int width = stride + frame->i_width[0] - frame->i_stride[0];
const int width = frame->i_width[0];
int start = (mb_y*16 >> b_interlaced) - 8;
int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
int offs = start*stride - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
int x, y;
if( mb_y & b_interlaced )
return;
mb_y >>= b_interlaced;
#ifdef HAVE_MMX
if( h->param.cpu & X264_CPU_MMXEXT )
for( y=0; y<=b_interlaced; y++, offs+=frame->i_stride[0] )
{
// buffer = 4 for deblock + 3 for 6tap, rounded to 8
int offs = start*stride - 8;
x264_hpel_filter_mmxext(
h->mc.hpel_filter(
frame->filtered[1] + offs,
frame->filtered[2] + offs,
frame->filtered[3] + offs,
frame->plane[0] + offs,
stride, width + 16, height - start );
}
else
#endif
{
for( y = start; y < height; y += y_inc )
{
uint8_t *p_in = frame->plane[0] + y * stride - 8;
uint8_t *p_h = frame->filtered[1] + y * stride - 8;
uint8_t *p_v = frame->filtered[2] + y * stride - 8;
uint8_t *p_c = frame->filtered[3] + y * stride - 8;
for( x = -8; x < width + 8; x += x_inc )
{
mc_hh( p_in, stride, p_h, stride, x_inc, y_inc );
mc_hv( p_in, stride, p_v, stride, x_inc, y_inc );
mc_hc( p_in, stride, p_c, stride, x_inc, y_inc );
p_h += x_inc;
p_v += x_inc;
p_c += x_inc;
p_in += x_inc;
}
}
}
/* generate integral image:
* frame->integral contains 2 planes. in the upper plane, each element is
......
......@@ -57,6 +57,9 @@ typedef struct
void (*plane_copy)( uint8_t *dst, int i_dst,
uint8_t *src, int i_src, int w, int h);
void (*hpel_filter)( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int i_stride, int i_width, int i_height );
/* prefetch the next few macroblocks of fenc or fdec */
void (*prefetch_fenc)( uint8_t *pix_y, int stride_y,
uint8_t *pix_uv, int stride_uv, int mb_x );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment