Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
a8703c89
Commit
a8703c89
authored
Jun 29, 2004
by
Laurent Aimar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* all: fixed ss2 runtime selection.
git-svn-id:
svn://svn.videolan.org/x264/trunk@11
df754926-b1dd-0310-bc7b-ec298dee348c
parent
02713c24
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
333 additions
and
203 deletions
+333
-203
Jamfile
Jamfile
+1
-1
core/i386/mc-c.c
core/i386/mc-c.c
+232
-167
core/i386/mc.asm
core/i386/mc.asm
+94
-31
core/i386/mc.h
core/i386/mc.h
+1
-0
core/mc.c
core/mc.c
+5
-4
No files found.
Jamfile
View file @
a8703c89
...
...
@@ -35,7 +35,7 @@ SOURCES_ALTIVEC = core/ppc/mc.c core/ppc/pixel.c ;
SOURCES_X264 = $(SOURCES_C) ;
if $(OS) = LINUX
{
DEFINES += ARCH_X86 HAVE_MMXEXT HAVE_MALLOC_H ;
DEFINES += ARCH_X86 HAVE_MMXEXT
HAVE_SSE2
HAVE_MALLOC_H ;
SOURCES_X264 += $(SOURCES_MMX) ;
SOURCES_X264 += $(SOURCES_X86) ;
ASFLAGS = -f elf ;
...
...
core/i386/mc-c.c
View file @
a8703c89
...
...
@@ -181,107 +181,87 @@ static inline int x264_tapfilter1( uint8_t *pix )
return
pix
[
-
2
]
-
5
*
pix
[
-
1
]
+
20
*
(
pix
[
0
]
+
pix
[
1
])
-
5
*
pix
[
2
]
+
pix
[
3
];
}
#if 0
static inline void pixel_avg_w4( uint8_t *dst, int i_dst_stride,
uint8_t *src1, int i_src1_stride,
uint8_t *src2, int i_src2_stride,
int i_height )
{
int x, y;
for( y = 0; y < i_height; y++ )
{
for( x = 0; x < 4; x++ )
{
dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
}
dst += i_dst_stride;
src1 += i_src1_stride;
src2 += i_src2_stride;
}
typedef
void
(
*
pf_mc_t
)(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
);
/* NASM functions */
extern
void
x264_pixel_avg_w4_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
extern
void
x264_pixel_avg_w8_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
extern
void
x264_pixel_avg_w16_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
extern
void
x264_pixel_avg_w16_sse2
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
/* Macro to define NxM functions */
/* mc I+H */
#define MC_IH( name, cpu, width, height, off ) \
static void name##_w##width##_##cpu( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ) \
{ \
DECLARE_ALIGNED( uint8_t, tmp[width*height], width ); \
\
mc_hh_w##width( src, i_src_stride, tmp, width, i_height ); \
x264_pixel_avg_w##width##_##cpu( dst, i_dst_stride, \
src+(off), i_src_stride, \
tmp, width, i_height ); \
}
static inline void pixel_avg_w8( uint8_t *dst, int i_dst_stride,
uint8_t *src1, int i_src1_stride,
uint8_t *src2, int i_src2_stride,
int i_height )
{
int y;
for( y = 0; y < i_height; y++ )
{
asm volatile(
"movq (%1), %%mm0\n"
"movq (%2), %%mm1\n"
"pavgb %%mm1, %%mm0\n"
"movq %%mm0, (%0)\n"
: : "r"(dst), "r"(src1), "r"(src2)
);
dst += i_dst_stride;
src1 += i_src1_stride;
src2 += i_src2_stride;
}
/* mc I+V */
#define MC_IV( name, cpu, width, height, off ) \
static void name##_w##width##_##cpu( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ) \
{ \
DECLARE_ALIGNED( uint8_t, tmp[width*height], width ); \
\
mc_hv_w##width( src, i_src_stride, tmp, width, i_height ); \
x264_pixel_avg_w##width##_##cpu( dst, i_dst_stride, \
src+(off), i_src_stride, \
tmp, width, i_height ); \
}
static inline void pixel_avg_w16( uint8_t *dst, int i_dst_stride,
uint8_t *src1, int i_src1_stride,
uint8_t *src2, int i_src2_stride,
int i_height )
{
int y;
for( y = 0; y < i_height; y++ )
{
asm volatile(
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm2\n"
"movq (%2), %%mm1\n"
"movq 8(%2), %%mm3\n"
"pavgb %%mm1, %%mm0\n"
"movq %%mm0, (%0)\n"
"pavgb %%mm3, %%mm2\n"
"movq %%mm2, 8(%0)\n"
: : "r"(dst), "r"(src1), "r"(src2)
);
dst += i_dst_stride;
src1 += i_src1_stride;
src2 += i_src2_stride;
}
/* mc H+V */
#define MC_HV( name, cpu, width, height, off1, off2 ) \
static void name##_w##width##_##cpu( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ) \
{ \
DECLARE_ALIGNED( uint8_t, tmp1[width*height], width ); \
DECLARE_ALIGNED( uint8_t, tmp2[width*height], width ); \
\
mc_hv_w##width( src+(off1), i_src_stride, tmp1, width, i_height ); \
mc_hh_w##width( src+(off2), i_src_stride, tmp2, width, i_height ); \
x264_pixel_avg_w##width##_##cpu( dst, i_dst_stride, \
tmp1, width, tmp2, width, \
i_height ); \
}
/* mc C+H */
#define MC_CH( name, cpu, width, height, off ) \
static void name##_w##width##_##cpu( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ) \
{ \
DECLARE_ALIGNED( uint8_t, tmp1[width*height], width ); \
DECLARE_ALIGNED( uint8_t, tmp2[width*height], width ); \
\
mc_hc_w##width( src, i_src_stride, tmp1, width, i_height ); \
mc_hh_w##width( src+(off), i_src_stride, tmp2, width, i_height ); \
x264_pixel_avg_w##width##_##cpu( dst, i_dst_stride, \
tmp1, width, tmp2, width, \
i_height ); \
}
/* mc C+V */
#define MC_CV( name, cpu, width, height, off ) \
static void name##_w##width##_##cpu( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ) \
{ \
DECLARE_ALIGNED( uint8_t, tmp1[width*height], width ); \
DECLARE_ALIGNED( uint8_t, tmp2[width*height], width ); \
\
mc_hc_w##width( src, i_src_stride, tmp1, width, i_height ); \
mc_hv_w##width( src+(off), i_src_stride, tmp2, width, i_height ); \
x264_pixel_avg_w##width##_##cpu( dst, i_dst_stride, \
tmp1, width, tmp2, width, \
i_height ); \
}
#else
extern
void
pixel_avg_w4
(
uint8_t
*
dst
,
int
i_dst_stride
,
uint8_t
*
src1
,
int
i_src1_stride
,
uint8_t
*
src2
,
int
i_src2_stride
,
int
i_height
);
extern
void
pixel_avg_w8
(
uint8_t
*
dst
,
int
i_dst_stride
,
uint8_t
*
src1
,
int
i_src1_stride
,
uint8_t
*
src2
,
int
i_src2_stride
,
int
i_height
);
extern
void
pixel_avg_w16
(
uint8_t
*
dst
,
int
i_dst_stride
,
uint8_t
*
src1
,
int
i_src1_stride
,
uint8_t
*
src2
,
int
i_src2_stride
,
int
i_height
);
#endif
typedef
void
(
*
pf_mc_t
)(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
);
/*****************************************************************************
* MC with width == 4 (height <= 8)
*****************************************************************************/
#if 0
static void mc_copy_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
int y;
for( y = 0; y < i_height; y++ )
{
memcpy( dst, src, 4 );
src += i_src_stride;
dst += i_dst_stride;
}
}
#else
extern
void
mc_copy_w4
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
);
#endif
extern
void
x264_mc_copy_w4_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
static
inline
void
mc_hh_w4
(
uint8_t
*
src
,
int
i_src
,
uint8_t
*
dst
,
int
i_dst
,
int
i_height
)
{
...
...
@@ -384,7 +364,24 @@ static inline void mc_hc_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i
}
}
/* mc I+H */
MC_IH
(
mc_xy10
,
mmxext
,
4
,
8
,
0
)
MC_IH
(
mc_xy30
,
mmxext
,
4
,
8
,
1
)
MC_IV
(
mc_xy01
,
mmxext
,
4
,
8
,
0
)
MC_IV
(
mc_xy03
,
mmxext
,
4
,
8
,
i_src_stride
)
MC_HV
(
mc_xy11
,
mmxext
,
4
,
8
,
0
,
0
)
MC_HV
(
mc_xy31
,
mmxext
,
4
,
8
,
1
,
0
)
MC_HV
(
mc_xy13
,
mmxext
,
4
,
8
,
0
,
i_src_stride
)
MC_HV
(
mc_xy33
,
mmxext
,
4
,
8
,
1
,
i_src_stride
)
MC_CH
(
mc_xy21
,
mmxext
,
4
,
8
,
0
)
MC_CH
(
mc_xy23
,
mmxext
,
4
,
8
,
i_src_stride
)
MC_CV
(
mc_xy12
,
mmxext
,
4
,
8
,
0
)
MC_CV
(
mc_xy32
,
mmxext
,
4
,
8
,
1
)
#if 0
static void mc_xy10_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp[4*8];
...
...
@@ -397,7 +394,7 @@ static void mc_xy30_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_
mc_hh_w4( src, i_src_stride, tmp, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, src+1, i_src_stride, tmp, 4, i_height );
}
/* mc I+V */
static void mc_xy01_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp[4*8];
...
...
@@ -410,7 +407,7 @@ static void mc_xy03_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_
mc_hv_w4( src, i_src_stride, tmp, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, 4, i_height );
}
/* H+V */
static void mc_xy11_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp1[4*8];
...
...
@@ -447,6 +444,7 @@ static void mc_xy33_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_
mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );
}
static void mc_xy21_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp1[4*8];
...
...
@@ -456,54 +454,40 @@ static void mc_xy21_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_
mc_hh_w4( src, i_src_stride, tmp2, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );
}
static
void
mc_xy
1
2_w4
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
)
static void mc_xy2
3
_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp1[4*8];
uint8_t tmp2[4*8];
mc_hc_w4
(
src
,
i_src_stride
,
tmp1
,
4
,
i_height
);
mc_h
v
_w4
(
src
,
i_src_stride
,
tmp2
,
4
,
i_height
);
mc_hc_w4( src,
i_src_stride, tmp1, 4, i_height );
mc_h
h
_w4( src
+i_src_stride
, i_src_stride, tmp2, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );
}
static
void
mc_xy32_w4
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
)
static void mc_xy12_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp1[4*8];
uint8_t tmp2[4*8];
mc_hc_w4
(
src
,
i_src_stride
,
tmp1
,
4
,
i_height
);
mc_hv_w4
(
src
+
1
,
i_src_stride
,
tmp2
,
4
,
i_height
);
mc_hc_w4( src, i_src_stride, tmp1, 4, i_height );
mc_hv_w4( src, i_src_stride, tmp2, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );
}
static
void
mc_xy
2
3_w4
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
)
static void mc_xy3
2
_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
uint8_t tmp1[4*8];
uint8_t tmp2[4*8];
mc_hc_w4
(
src
,
i_src_stride
,
tmp1
,
4
,
i_height
);
mc_h
h
_w4
(
src
+
i_src_stride
,
i_src_stride
,
tmp2
,
4
,
i_height
);
mc_hc_w4( src, i_src_stride, tmp1, 4, i_height );
mc_h
v
_w4( src+
1
, i_src_stride, tmp2, 4, i_height );
pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );
}
#endif
/*****************************************************************************
* MC with width == 8 (height <= 16)
*****************************************************************************/
#if 0
static void mc_copy_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
int y;
for( y = 0; y < i_height; y++ )
{
memcpy( dst, src, 8 );
src += i_src_stride;
dst += i_dst_stride;
}
}
#else
extern
void
mc_copy_w8
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
);
#endif
extern
void
x264_mc_copy_w8_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
static
inline
void
mc_hh_w8
(
uint8_t
*
src
,
int
i_src
,
uint8_t
*
dst
,
int
i_dst
,
int
i_height
)
{
...
...
@@ -670,6 +654,24 @@ static inline void mc_hc_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i
}
}
MC_IH
(
mc_xy10
,
mmxext
,
8
,
16
,
0
)
MC_IH
(
mc_xy30
,
mmxext
,
8
,
16
,
1
)
MC_IV
(
mc_xy01
,
mmxext
,
8
,
16
,
0
)
MC_IV
(
mc_xy03
,
mmxext
,
8
,
16
,
i_src_stride
)
MC_HV
(
mc_xy11
,
mmxext
,
8
,
16
,
0
,
0
)
MC_HV
(
mc_xy31
,
mmxext
,
8
,
16
,
1
,
0
)
MC_HV
(
mc_xy13
,
mmxext
,
8
,
16
,
0
,
i_src_stride
)
MC_HV
(
mc_xy33
,
mmxext
,
8
,
16
,
1
,
i_src_stride
)
MC_CH
(
mc_xy21
,
mmxext
,
8
,
16
,
0
)
MC_CH
(
mc_xy23
,
mmxext
,
8
,
16
,
i_src_stride
)
MC_CV
(
mc_xy12
,
mmxext
,
8
,
16
,
0
)
MC_CV
(
mc_xy32
,
mmxext
,
8
,
16
,
1
)
#if 0
/* mc I+H */
static void mc_xy10_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
...
...
@@ -769,27 +771,15 @@ static void mc_xy23_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_
mc_hh_w8( src+i_src_stride, i_src_stride, tmp2, 8, i_height );
pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );
}
#endif
/*****************************************************************************
* MC with width == 16 (height <= 16)
*****************************************************************************/
#if 0
static void mc_copy_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
int y;
for( y = 0; y < i_height; y++ )
{
memcpy( dst, src, 16 );
extern
void
x264_mc_copy_w16_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
extern
void
x264_mc_copy_w16_sse2
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
src += i_src_stride;
dst += i_dst_stride;
}
}
#else
extern
void
mc_copy_w16
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
i_height
);
#endif
static
inline
void
mc_hh_w16
(
uint8_t
*
src
,
int
i_src
,
uint8_t
*
dst
,
int
i_dst
,
int
i_height
)
{
mc_hh_w4
(
&
src
[
0
],
i_src
,
&
dst
[
0
],
i_dst
,
i_height
);
...
...
@@ -809,6 +799,44 @@ static inline void mc_hc_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int
mc_hc_w8
(
&
src
[
8
],
i_src_stride
,
&
dst
[
8
],
i_dst_stride
,
i_height
);
}
/* MMX avg/copy */
MC_IH
(
mc_xy10
,
mmxext
,
16
,
16
,
0
)
MC_IH
(
mc_xy30
,
mmxext
,
16
,
16
,
1
)
MC_IV
(
mc_xy01
,
mmxext
,
16
,
16
,
0
)
MC_IV
(
mc_xy03
,
mmxext
,
16
,
16
,
i_src_stride
)
MC_HV
(
mc_xy11
,
mmxext
,
16
,
16
,
0
,
0
)
MC_HV
(
mc_xy31
,
mmxext
,
16
,
16
,
1
,
0
)
MC_HV
(
mc_xy13
,
mmxext
,
16
,
16
,
0
,
i_src_stride
)
MC_HV
(
mc_xy33
,
mmxext
,
16
,
16
,
1
,
i_src_stride
)
MC_CH
(
mc_xy21
,
mmxext
,
16
,
16
,
0
)
MC_CH
(
mc_xy23
,
mmxext
,
16
,
16
,
i_src_stride
)
MC_CV
(
mc_xy12
,
mmxext
,
16
,
16
,
0
)
MC_CV
(
mc_xy32
,
mmxext
,
16
,
16
,
1
)
/* SSE2 avg/copy */
MC_IH
(
mc_xy10
,
sse2
,
16
,
16
,
0
)
MC_IH
(
mc_xy30
,
sse2
,
16
,
16
,
1
)
MC_IV
(
mc_xy01
,
sse2
,
16
,
16
,
0
)
MC_IV
(
mc_xy03
,
sse2
,
16
,
16
,
i_src_stride
)
MC_HV
(
mc_xy11
,
sse2
,
16
,
16
,
0
,
0
)
MC_HV
(
mc_xy31
,
sse2
,
16
,
16
,
1
,
0
)
MC_HV
(
mc_xy13
,
sse2
,
16
,
16
,
0
,
i_src_stride
)
MC_HV
(
mc_xy33
,
sse2
,
16
,
16
,
1
,
i_src_stride
)
MC_CH
(
mc_xy21
,
sse2
,
16
,
16
,
0
)
MC_CH
(
mc_xy23
,
sse2
,
16
,
16
,
i_src_stride
)
MC_CV
(
mc_xy12
,
sse2
,
16
,
16
,
0
)
MC_CV
(
mc_xy32
,
sse2
,
16
,
16
,
1
)
#if 0
/* mc I+H */
static void mc_xy10_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
{
...
...
@@ -908,55 +936,92 @@ static void mc_xy23_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst
mc_hh_w16( src+i_src_stride, i_src_stride, tmp2, 16, i_height );
pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );
}
#endif
static
void
motion_compensation_luma
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
mvx
,
int
mvy
,
int
i_width
,
int
i_height
)
#define MOTION_COMPENSATION_LUMA \
src += (mvy >> 2) * i_src_stride + (mvx >> 2); \
if( i_width == 4 ) \
{ \
pf_mc[0][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); \
} \
else if( i_width == 8 ) \
{ \
pf_mc[1][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); \
} \
else if( i_width == 16 ) \
{ \
pf_mc[2][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); \
} \
else \
{ \
fprintf( stderr, "Error: motion_compensation_luma called with invalid width" ); \
}
static
void
motion_compensation_luma_mmxext
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
mvx
,
int
mvy
,
int
i_width
,
int
i_height
)
{
static
const
pf_mc_t
pf_mc
[
3
][
4
][
4
]
=
/*XXX [dqy][dqx] */
{
{
{
mc_copy_w4
,
mc_xy10_w4
,
mc_hh_w4
,
mc_xy30_w4
},
{
mc_xy01_w4
,
mc_xy11_w4
,
mc_xy21_w4
,
mc_xy31_w4
},
{
mc_hv_w4
,
mc_xy12_w4
,
mc_hc_w4
,
mc_xy32_w4
},
{
mc_xy03_w4
,
mc_xy13_w4
,
mc_xy23_w4
,
mc_xy33_w4
},
{
x264_
mc_copy_w4
_mmxext
,
mc_xy10_w4
_mmxext
,
mc_hh_w4
,
mc_xy30_w4
_mmxext
},
{
mc_xy01_w4
_mmxext
,
mc_xy11_w4
_mmxext
,
mc_xy21_w4
_mmxext
,
mc_xy31_w4
_mmxext
},
{
mc_hv_w4
,
mc_xy12_w4
_mmxext
,
mc_hc_w4
,
mc_xy32_w4
_mmxext
},
{
mc_xy03_w4
_mmxext
,
mc_xy13_w4
_mmxext
,
mc_xy23_w4
_mmxext
,
mc_xy33_w4
_mmxext
},
},
{
{
mc_copy_w8
,
mc_xy10_w8
,
mc_hh_w8
,
mc_xy30_w8
},
{
mc_xy01_w8
,
mc_xy11_w8
,
mc_xy21_w8
,
mc_xy31_w8
},
{
mc_hv_w8
,
mc_xy12_w8
,
mc_hc_w8
,
mc_xy32_w8
},
{
mc_xy03_w8
,
mc_xy13_w8
,
mc_xy23_w8
,
mc_xy33_w8
},
{
x264_
mc_copy_w8
_mmxext
,
mc_xy10_w8
_mmxext
,
mc_hh_w8
,
mc_xy30_w8
_mmxext
},
{
mc_xy01_w8
_mmxext
,
mc_xy11_w8
_mmxext
,
mc_xy21_w8
_mmxext
,
mc_xy31_w8
_mmxext
},
{
mc_hv_w8
,
mc_xy12_w8
_mmxext
,
mc_hc_w8
,
mc_xy32_w8
_mmxext
},
{
mc_xy03_w8
_mmxext
,
mc_xy13_w8
_mmxext
,
mc_xy23_w8
_mmxext
,
mc_xy33_w8
_mmxext
},
},
{
{
mc_copy_w16
,
mc_xy10_w16
,
mc_hh_w16
,
mc_xy30_w16
},
{
mc_xy01_w16
,
mc_xy11_w16
,
mc_xy21_w16
,
mc_xy31_w16
},
{
mc_hv_w16
,
mc_xy12_w16
,
mc_hc_w16
,
mc_xy32_w16
},
{
mc_xy03_w16
,
mc_xy13_w16
,
mc_xy23_w16
,
mc_xy33_w16
},
{
x264_
mc_copy_w16
_mmxext
,
mc_xy10_w16
_mmxext
,
mc_hh_w16
,
mc_xy30_w16
_mmxext
},
{
mc_xy01_w16
_mmxext
,
mc_xy11_w16
_mmxext
,
mc_xy21_w16
_mmxext
,
mc_xy31_w16
_mmxext
},
{
mc_hv_w16
,
mc_xy12_w16
_mmxext
,
mc_hc_w16
,
mc_xy32_w16
_mmxext
},
{
mc_xy03_w16
_mmxext
,
mc_xy13_w16
_mmxext
,
mc_xy23_w16
_mmxext
,
mc_xy33_w16
_mmxext
},
}
};
src
+=
(
mvy
>>
2
)
*
i_src_stride
+
(
mvx
>>
2
);
if
(
i_width
==
4
)
{
pf_mc
[
0
][
mvy
&
0x03
][
mvx
&
0x03
](
src
,
i_src_stride
,
dst
,
i_dst_stride
,
i_height
);
}
else
if
(
i_width
==
8
)
{
pf_mc
[
1
][
mvy
&
0x03
][
mvx
&
0x03
](
src
,
i_src_stride
,
dst
,
i_dst_stride
,
i_height
);
}
else
if
(
i_width
==
16
)
{
pf_mc
[
2
][
mvy
&
0x03
][
mvx
&
0x03
](
src
,
i_src_stride
,
dst
,
i_dst_stride
,
i_height
);
}
else
MOTION_COMPENSATION_LUMA
}
static
void
motion_compensation_luma_sse2
(
uint8_t
*
src
,
int
i_src_stride
,
uint8_t
*
dst
,
int
i_dst_stride
,
int
mvx
,
int
mvy
,
int
i_width
,
int
i_height
)
{
static
const
pf_mc_t
pf_mc
[
3
][
4
][
4
]
=
/*XXX [dqy][dqx] */
{
fprintf
(
stderr
,
"Error: motion_compensation_luma called with invalid width"
);
}
{
{
x264_mc_copy_w4_mmxext
,
mc_xy10_w4_mmxext
,
mc_hh_w4
,
mc_xy30_w4_mmxext
},
{
mc_xy01_w4_mmxext
,
mc_xy11_w4_mmxext
,
mc_xy21_w4_mmxext
,
mc_xy31_w4_mmxext
},
{
mc_hv_w4
,
mc_xy12_w4_mmxext
,
mc_hc_w4
,
mc_xy32_w4_mmxext
},
{
mc_xy03_w4_mmxext
,
mc_xy13_w4_mmxext
,
mc_xy23_w4_mmxext
,
mc_xy33_w4_mmxext
},
},
{
{
x264_mc_copy_w8_mmxext
,
mc_xy10_w8_mmxext
,
mc_hh_w8
,
mc_xy30_w8_mmxext
},
{
mc_xy01_w8_mmxext
,
mc_xy11_w8_mmxext
,
mc_xy21_w8_mmxext
,
mc_xy31_w8_mmxext
},
{
mc_hv_w8
,
mc_xy12_w8_mmxext
,
mc_hc_w8
,
mc_xy32_w8_mmxext
},
{
mc_xy03_w8_mmxext
,
mc_xy13_w8_mmxext
,
mc_xy23_w8_mmxext
,
mc_xy33_w8_mmxext
},
},
{
{
x264_mc_copy_w16_sse2
,
mc_xy10_w16_sse2
,
mc_hh_w16
,
mc_xy30_w16_sse2
},
{
mc_xy01_w16_sse2
,
mc_xy11_w16_sse2
,
mc_xy21_w16_sse2
,
mc_xy31_w16_sse2
},
{
mc_hv_w16
,
mc_xy12_w16_sse2
,
mc_hc_w16
,
mc_xy32_w16_sse2
},
{
mc_xy03_w16_sse2
,
mc_xy13_w16_sse2
,
mc_xy23_w16_sse2
,
mc_xy33_w16_sse2
},
}
};
MOTION_COMPENSATION_LUMA
}
void
x264_mc_mmxext_init
(
x264_mc_function_t
pf
[
2
]
)
{
pf
[
MC_LUMA
]
=
motion_compensation_luma
;
pf
[
MC_LUMA
]
=
motion_compensation_luma_mmxext
;
}
void
x264_mc_sse2_init
(
x264_mc_function_t
pf
[
2
]
)
{
pf
[
MC_LUMA
]
=
motion_compensation_luma_sse2
;
}
core/i386/mc.asm
View file @
a8703c89
...
...
@@ -67,16 +67,25 @@ ALIGN 16
SECTION
.text
cglobal
pixel_avg_w4
cglobal
x264_pixel_avg_w4_mmxext
cglobal
x264_pixel_avg_w8_mmxext
cglobal
x264_pixel_avg_w16_mmxext
cglobal
x264_pixel_avg_w16_sse2
cglobal
x264_mc_copy_w4_mmxext
cglobal
x264_mc_copy_w8_mmxext
cglobal
x264_mc_copy_w16_mmxext
cglobal
x264_mc_copy_w16_sse2
ALIGN
16
;-----------------------------------------------------------------------------
; void pixel_avg_w4( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
; void
x264_
pixel_avg_w4
_mmxext
( uint8_t *dst, int i_dst_stride,
;
uint8_t *src1, int i_src1_stride,
;
uint8_t *src2, int i_src2_stride,
;
int i_height );
;-----------------------------------------------------------------------------
pixel_avg_w4:
x264_
pixel_avg_w4
_mmxext
:
push
ebp
push
ebx
push
esi
...
...
@@ -111,16 +120,15 @@ ALIGN 4
ret
cglobal
pixel_avg_w8
ALIGN
16
;-----------------------------------------------------------------------------
; void pixel_avg_w8( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
; void
x264_
pixel_avg_w8
_mmxext
( uint8_t *dst, int i_dst_stride,
;
uint8_t *src1, int i_src1_stride,
;
uint8_t *src2, int i_src2_stride,
;
int i_height );
;-----------------------------------------------------------------------------
pixel_avg_w8:
x264_
pixel_avg_w8
_mmxext
:
push
ebp
push
ebx
push
esi
...
...
@@ -151,16 +159,15 @@ ALIGN 4
ret
cglobal
pixel_avg_w16
ALIGN
16
;-----------------------------------------------------------------------------
; void pixel_avg_w16( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
; void
x264_
pixel_avg_w16
_mmxext
( uint8_t *dst, int i_dst_stride,
;
uint8_t *src1, int i_src1_stride,
;
uint8_t *src2, int i_src2_stride,
;
int i_height );
;-----------------------------------------------------------------------------
pixel_avg_w16:
x264_
pixel_avg_w16
_mmxext
:
push
ebp
push
ebx
push
esi
...
...
@@ -175,18 +182,50 @@ pixel_avg_w16:
mov
ebp
,
[
esp
+
44
]
; i_height
ALIGN
4
.height_loop
%ifndef HAVE_SSE2
movq
mm0
,
[
ebx
]
movq
mm1
,
[
ebx
+
8
]
pavgb
mm0
,
[
ecx
]
pavgb
mm1
,
[
ecx
+
8
]
movq
[
edi
],
mm0
movq
[
edi
+
8
],
mm1
%else
dec
ebp
lea
ebx
,
[
ebx
+
eax
]
lea
ecx
,
[
ecx
+
edx
]
lea
edi
,
[
edi
+
esi
]
jne
.height_loop
pop
edi
pop
esi
pop
ebx
pop
ebp
ret
ALIGN
16
;-----------------------------------------------------------------------------