Commit 615a016b authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Check for MMXEXT and SSE at build-time if possible

parent 5288c382
......@@ -32,8 +32,8 @@ VLC_API unsigned vlc_CPU(void);
# define HAVE_FPU 1
# define VLC_CPU_MMX 8
# define CPU_CAPABILITY_3DNOW (1<<4)
# define CPU_CAPABILITY_MMXEXT (1<<5)
# define CPU_CAPABILITY_SSE (1<<6)
# define VLC_CPU_MMXEXT 32
# define VLC_CPU_SSE 64
# define CPU_CAPABILITY_SSE2 (1<<7)
# define CPU_CAPABILITY_SSE3 (1<<8)
# define CPU_CAPABILITY_SSSE3 (1<<9)
......@@ -54,11 +54,17 @@ VLC_API unsigned vlc_CPU(void);
# endif
# if defined (__SSE__)
# define vlc_CPU_MMXEXT() (1)
# define vlc_CPU_SSE() (1)
# define VLC_SSE
# elif VLC_GCC_VERSION(4, 4)
# define VLC_SSE __attribute__ ((__target__ ("sse")))
# else
# define VLC_SSE VLC_SSE_is_not_implemented_on_this_compiler
# define vlc_CPU_MMXEXT() ((vlc_CPU() & VLC_CPU_MMXEXT) != 0)
# define vlc_CPU_SSE() ((vlc_CPU() & VLC_CPU_SSE) != 0)
# if VLC_GCC_VERSION(4, 4)
# define VLC_SSE __attribute__ ((__target__ ("sse")))
# else
# define VLC_SSE VLC_SSE_is_not_implemented_on_this_compiler
# endif
# endif
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
......
......@@ -334,11 +334,11 @@ static int OpenDecoder( vlc_object_t *p_this )
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !(i_cpu & CPU_CAPABILITY_MMXEXT) )
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !(i_cpu & CPU_CAPABILITY_SSE) )
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
......
......@@ -328,11 +328,11 @@ int OpenEncoder( vlc_object_t *p_this )
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !(i_cpu & CPU_CAPABILITY_MMXEXT) )
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !(i_cpu & CPU_CAPABILITY_SSE) )
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
......
......@@ -197,7 +197,7 @@ static int OpenDecoder( vlc_object_t *p_this )
i_accel |= MPEG2_ACCEL_X86_MMX;
if( cpu & CPU_CAPABILITY_3DNOW )
i_accel |= MPEG2_ACCEL_X86_3DNOW;
if( cpu & CPU_CAPABILITY_MMXEXT )
if( vlc_CPU_MMXEXT() )
i_accel |= MPEG2_ACCEL_X86_MMXEXT;
#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __ppc64__ )
if( vlc_CPU_ALTIVEC() )
......
......@@ -1260,14 +1260,13 @@ static int Open ( vlc_object_t *p_this )
free( psz_val );
#if defined (__i386__) || defined (__x86_64__)
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_sys->param.cpu &= ~X264_CPU_MMX;
if( !(i_cpu & CPU_CAPABILITY_MMXEXT) )
if( !vlc_CPU_MMXEXT() )
p_sys->param.cpu &= ~X264_CPU_MMXEXT;
if( !(i_cpu & CPU_CAPABILITY_SSE) )
if( !vlc_CPU_SSE() )
p_sys->param.cpu &= ~X264_CPU_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) )
p_sys->param.cpu &= ~X264_CPU_SSE2;
#endif
......
......@@ -383,11 +383,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX;
if( !(i_cpu & CPU_CAPABILITY_MMXEXT) )
if( !vlc_CPU_MMXEXT() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !(i_cpu & CPU_CAPABILITY_SSE) )
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
......@@ -804,11 +804,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX;
if( !(i_cpu & CPU_CAPABILITY_MMXEXT) )
if( !vlc_CPU_MMXEXT() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !(i_cpu & CPU_CAPABILITY_SSE) )
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
......
......@@ -79,7 +79,9 @@ static void DarkenField( picture_t *p_dst,
/* Bitwise ANDing with this clears the i_strength highest bits
of each byte */
#ifdef CAN_COMPILE_MMXEXT
unsigned u_cpu = vlc_CPU();
# ifndef __SSE__
const unsigned u_cpu = vlc_CPU();
# endif
uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
number of bits) */
#endif
......@@ -113,7 +115,9 @@ static void DarkenField( picture_t *p_dst,
int x = 0;
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
{
movq_m2r( i_strength_u64, mm1 );
movq_m2r( remove_high_u64, mm2 );
......@@ -128,11 +132,11 @@ static void DarkenField( picture_t *p_dst,
}
}
else
{
#endif
#if !defined (CAN_COMPILE_MMXEXT) || !defined (__SSE__)
{
for( ; x < w8; x += 8, ++po )
(*po) = ( ((*po) >> i_strength) & remove_high_u64 );
#ifdef CAN_COMPILE_MMXEXT
}
#endif
......@@ -174,7 +178,9 @@ static void DarkenField( picture_t *p_dst,
#ifdef CAN_COMPILE_MMXEXT
/* See also easy-to-read C version below. */
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
{
static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
movq_m2r( b128, mm5 );
......@@ -216,7 +222,9 @@ static void DarkenField( picture_t *p_dst,
} /* if process_chroma */
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
emms();
#endif
}
......
......@@ -535,8 +535,8 @@ static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
void RenderX( picture_t *p_outpic, picture_t *p_pic )
{
int i_plane;
#ifdef CAN_COMPILE_MMXEXT
unsigned u_cpu = vlc_CPU();
#if defined (CAN_COMPILE_MMXEXT) && !defined(__SSE__)
const unsigned u_cpu = vlc_CPU();
#endif
/* Copy image and skip lines */
......@@ -559,11 +559,15 @@ void RenderX( picture_t *p_outpic, picture_t *p_pic )
uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
else
#endif
#ifndef __SSE__
XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
#endif
}
/* Last line (C only)*/
......@@ -586,7 +590,9 @@ void RenderX( picture_t *p_outpic, picture_t *p_pic )
}
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
emms();
#endif
}
......@@ -641,7 +641,7 @@ int Open( vlc_object_t *p_this )
else
#endif
#if defined(CAN_COMPILE_MMXEXT)
if( chroma->pixel_size == 1 && (vlc_CPU() & CPU_CAPABILITY_MMXEXT) )
if( chroma->pixel_size == 1 && vlc_CPU_MMXEXT() )
{
p_sys->pf_merge = MergeMMXEXT;
p_sys->pf_end_merge = EndMMX;
......
......@@ -113,7 +113,7 @@ static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field )
* @param[in] p_pix_c Base pointer to the same block in current picture
* @param i_pitch_prev i_pitch of previous picture
* @param i_pitch_curr i_pitch of current picture
* @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
* @param b_mmx (vlc_CPU() & VLC_CPU_MMXEXT) or false.
* @param[out] pi_top 1 if top field of the block had motion, 0 if no
* @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
* @return 1 if the block had motion, 0 if no
......@@ -388,9 +388,9 @@ int EstimateNumBlocksWithMotion( const picture_t* p_prev,
/* We must tell our inline helper whether to use MMX acceleration. */
#ifdef CAN_COMPILE_MMXEXT
bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
const bool b_mmx = vlc_CPU_MMXEXT();
#else
bool b_mmx = false;
const bool b_mmx = false;
#endif
int i_score = 0;
......@@ -466,9 +466,11 @@ int CalculateInterlaceScore( const picture_t* p_pic_top,
int32_t i_score_c = 0; /* this counts as-is (used for non-MMX parts) */
#ifdef CAN_COMPILE_MMXEXT
unsigned u_cpu = vlc_CPU();
# ifndef __SSE__
const unsigned u_cpu = vlc_CPU();
if( u_cpu & CPU_CAPABILITY_MMXEXT )
if( u_cpu & VLC_CPU_MMXEXT )
# endif
pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
#endif
......@@ -512,7 +514,9 @@ int CalculateInterlaceScore( const picture_t* p_pic_top,
# of pixels < (2^32)/255
Note: calculates score * 255
*/
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
{
static const mmx_t b0 = { .uq = 0x0000000000000000ULL };
static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
......@@ -591,7 +595,9 @@ int CalculateInterlaceScore( const picture_t* p_pic_top,
}
#ifdef CAN_COMPILE_MMXEXT
if( u_cpu & CPU_CAPABILITY_MMXEXT )
# ifndef __SSE__
if( u_cpu & VLC_CPU_MMXEXT )
# endif
{
movd_r2m( mm7, i_score_mmx );
emms();
......
......@@ -133,21 +133,24 @@ static int Open(vlc_object_t *object)
cfg->thresh = 0.0;
cfg->radius = 0;
cfg->buf = NULL;
cfg->filter_line = filter_line_c;
cfg->blur_line = blur_line_c;
#if HAVE_SSE2 && HAVE_6REGS
if (vlc_CPU() & CPU_CAPABILITY_SSE2)
cfg->blur_line = blur_line_sse2;
else
#endif
#if HAVE_MMX2
if (vlc_CPU() & CPU_CAPABILITY_MMXEXT)
cfg->filter_line = filter_line_mmx2;
#endif
cfg->blur_line = blur_line_c;
#if HAVE_SSSE3
if (vlc_CPU() & CPU_CAPABILITY_SSSE3)
cfg->filter_line = filter_line_ssse3;
else
#endif
#if HAVE_MMX2
if (vlc_CPU_MMXEXT())
cfg->filter_line = filter_line_mmx2;
else
#endif
cfg->filter_line = filter_line_c;
filter->p_sys = sys;
filter->pf_video_filter = Filter;
......
......@@ -134,12 +134,11 @@ static int OpenPostproc( vlc_object_t *p_this )
/* Set CPU capabilities */
#if defined(__i386__) || defined(__x86_64__)
unsigned i_cpu = vlc_CPU();
if( vlc_CPU_MMX() )
i_flags |= PP_CPU_CAPS_MMX;
if( i_cpu & CPU_CAPABILITY_MMXEXT )
if( vlc_CPU_MMXEXT() )
i_flags |= PP_CPU_CAPS_MMX2;
if( i_cpu & CPU_CAPABILITY_3DNOW )
if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
i_flags |= PP_CPU_CAPS_3DNOW;
#elif defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__)
if( vlc_CPU_ALTIVEC() )
......
......@@ -232,14 +232,13 @@ static int GetSwsCpuMask(void)
int i_sws_cpu = 0;
#if defined(__i386__) || defined(__x86_64__)
const unsigned int i_cpu = vlc_CPU();
if( vlc_CPU_MMX() )
i_sws_cpu |= SWS_CPU_CAPS_MMX;
#if (LIBSWSCALE_VERSION_INT >= ((0<<16)+(5<<8)+0))
if( i_cpu & CPU_CAPABILITY_MMXEXT )
if( vlc_CPU_MMXEXT() )
i_sws_cpu |= SWS_CPU_CAPS_MMX2;
#endif
if( i_cpu & CPU_CAPABILITY_3DNOW )
if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
i_sws_cpu |= SWS_CPU_CAPS_3DNOW;
#elif defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__)
if( vlc_CPU_ALTIVEC() )
......
......@@ -223,19 +223,14 @@ void vlc_CPU_init (void)
# endif
i_capabilities |= VLC_CPU_MMX;
# if defined (__SSE__)
i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
# else
if( i_edx & 0x02000000 )
{
i_capabilities |= CPU_CAPABILITY_MMXEXT;
i_capabilities |= VLC_CPU_MMXEXT;
# ifdef CAN_COMPILE_SSE
if (vlc_CPU_check ("SSE", SSE_test))
i_capabilities |= CPU_CAPABILITY_SSE;
i_capabilities |= VLC_CPU_SSE;
# endif
}
# endif
# if defined (__SSE2__)
i_capabilities |= CPU_CAPABILITY_SSE2;
......@@ -289,9 +284,7 @@ void vlc_CPU_init (void)
# endif
if( b_amd && ( i_edx & 0x00400000 ) )
{
i_capabilities |= CPU_CAPABILITY_MMXEXT;
}
i_capabilities |= VLC_CPU_MMXEXT;
out:
#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
......@@ -353,15 +346,15 @@ void vlc_CPU_dump (vlc_object_t *obj)
p += sprintf (p, "%s ", (string) )
if (vlc_CPU_MMX()) p += sprintf (p, "MMX ");
PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE");
if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT ");
if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A");
PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
if (vlc_CPU_ALTIVEC()) p += sprintf (p, "AltiVec");
......
......@@ -69,12 +69,10 @@ static void vlc_CPU_init (void)
#elif defined (__i386__) || defined (__x86_64__)
if (!strcmp (cap, "mmx"))
core_caps |= VLC_CPU_MMX;
# ifndef __SSE__
if (!strcmp (cap, "sse"))
core_caps |= CPU_CAPABILITY_SSE | CPU_CAPABILITY_MMXEXT;
core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT;
if (!strcmp (cap, "mmxext"))
core_caps |= CPU_CAPABILITY_MMXEXT;
# endif
core_caps |= VLC_CPU_MMXEXT;
# ifndef __SSE2__
if (!strcmp (cap, "sse2"))
core_caps |= CPU_CAPABILITY_SSE2;
......@@ -119,9 +117,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__)
# ifdef __SSE__
all_caps |= CPU_CAPABILITY_SSE | CPU_CAPABILITY_MMXEXT;
# endif
# ifdef __SSE2__
all_caps |= CPU_CAPABILITY_SSE2;
# endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment