Commit 0ed12fd6 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Check for SSE4 at build time where possible

parent 58dd2253
......@@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void);
# define VLC_CPU_SSE2 128
# define VLC_CPU_SSE3 256
# define VLC_CPU_SSSE3 512
# define CPU_CAPABILITY_SSE4_1 (1<<10)
# define CPU_CAPABILITY_SSE4_2 (1<<11)
# define CPU_CAPABILITY_SSE4A (1<<12)
# define VLC_CPU_SSE4_1 1024
# define VLC_CPU_SSE4_2 2048
# define VLC_CPU_SSE4A 4096
# if defined (__MMX__)
# define vlc_CPU_MMX() (1)
......@@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void);
# define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0)
# endif
# ifdef __SSE4_1__
# define vlc_CPU_SSE4_1() (1)
# else
# define vlc_CPU_SSE4_1() ((vlc_CPU() & VLC_CPU_SSE4_1) != 0)
# endif
# ifdef __SSE4_2__
# define vlc_CPU_SSE4_2() (1)
# else
# define vlc_CPU_SSE4_2() ((vlc_CPU() & VLC_CPU_SSE4_2) != 0)
# endif
# ifdef __SSE4A__
# define vlc_CPU_SSE4A() (1)
# else
# define vlc_CPU_SSE4A() ((vlc_CPU() & VLC_CPU_SSE4A) != 0)
# endif
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2
......
......@@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this )
/* Set CPU capabilities */
p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__)
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
......@@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
......
......@@ -47,6 +47,11 @@
store " %%xmm4, 48(%[dst])\n" \
: : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
#ifndef __SSE4A__
# undef vlc_CPU_SSE4A
# define vlc_CPU_SSE4A() ((cpu & VLC_CPU_SSE4A) != 0)
#endif
#ifndef __SSSE3__
# undef vlc_CPU_SSSE3
# define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0)
......@@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
dst[x] = src[x];
#ifdef CAN_COMPILE_SSE4_1
if (cpu & CPU_CAPABILITY_SSE4_1) {
if (vlc_CPU_SSE4_1()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
......
......@@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this )
/* Set CPU capabilities */
p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__)
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
......@@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
......
......@@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
......@@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
......
......@@ -247,18 +247,14 @@ void vlc_CPU_init (void)
i_capabilities |= VLC_CPU_SSSE3;
# endif
# if defined (__SSE4_1__)
i_capabilities |= CPU_CAPABILITY_SSE4_1;
# elif defined (CAN_COMPILE_SSE4_1)
# if defined (CAN_COMPILE_SSE4_1)
if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test))
i_capabilities |= CPU_CAPABILITY_SSE4_1;
i_capabilities |= VLC_CPU_SSE4_1;
# endif
# if defined (__SSE4_2__)
i_capabilities |= CPU_CAPABILITY_SSE4_2;
# elif defined (CAN_COMPILE_SSE4_2)
# if defined (CAN_COMPILE_SSE4_2)
if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test))
i_capabilities |= CPU_CAPABILITY_SSE4_2;
i_capabilities |= VLC_CPU_SSE4_2;
# endif
/* test for additional capabilities */
......@@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj)
if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A");
if (vlc_CPU_SSE4_1()) p += sprintf (p, "SSE4.1 ");;
if (vlc_CPU_SSE4_2()) p += sprintf (p, "SSE4.2 ");;
if (vlc_CPU_SSE4A()) p += sprintf (p, "SSE4A ");;
PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
......
......@@ -79,16 +79,12 @@ static void vlc_CPU_init (void)
core_caps |= VLC_CPU_SSE3;
if (!strcmp (cap, "ssse3"))
core_caps |= VLC_CPU_SSSE3;
# ifndef __SSE4_1__
if (!strcmp (cap, "sse4_1"))
core_caps |= CPU_CAPABILITY_SSE4_1;
# endif
# ifndef __SSE4_2__
core_caps |= VLC_CPU_SSE4_1;
if (!strcmp (cap, "sse4_2"))
core_caps |= CPU_CAPABILITY_SSE4_1;
# endif
core_caps |= VLC_CPU_SSE4_1;
if (!strcmp (cap, "sse4a"))
core_caps |= CPU_CAPABILITY_SSE4A;
core_caps |= VLC_CPU_SSE4A;
# ifndef __3dNOW__
if (!strcmp (cap, "3dnow"))
core_caps |= CPU_CAPABILITY_3DNOW;
......@@ -111,12 +107,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__)
# ifdef __SSE4_1__
all_caps |= CPU_CAPABILITY_SSE4_1;
# endif
# ifdef __SSE4_2__
all_caps |= CPU_CAPABILITY_SSE4_2;
# endif
# ifdef __3dNOW__
all_caps |= CPU_CAPABILITY_3DNOW;
# endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment