Commit ed9a9f31 authored by Gabriel Bouvigne's avatar Gabriel Bouvigne Committed by Loren Merritt

some asm functions require aligned stack. disable these when compiling with msvc/icc.

parent e9369576
...@@ -51,6 +51,7 @@ const x264_cpu_name_t x264_cpu_names[] = { ...@@ -51,6 +51,7 @@ const x264_cpu_name_t x264_cpu_names[] = {
{"SSE4", X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4}, {"SSE4", X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4},
{"Cache32", X264_CPU_CACHELINE_32}, {"Cache32", X264_CPU_CACHELINE_32},
{"Cache64", X264_CPU_CACHELINE_64}, {"Cache64", X264_CPU_CACHELINE_64},
{"Slow_mod4_stack", X264_CPU_STACK_MOD4},
{"", 0}, {"", 0},
}; };
...@@ -172,6 +173,10 @@ uint32_t x264_cpu_detect( void ) ...@@ -172,6 +173,10 @@ uint32_t x264_cpu_detect( void )
fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" ); fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" );
} }
#ifdef BROKEN_STACK_ALIGNMENT
cpu |= X264_CPU_STACK_MOD4;
#endif
return cpu; return cpu;
} }
......
...@@ -43,7 +43,7 @@ void x264_stack_align( void (*func)(x264_t*), x264_t *arg ); ...@@ -43,7 +43,7 @@ void x264_stack_align( void (*func)(x264_t*), x264_t *arg );
#endif #endif
typedef struct { typedef struct {
const char name[12]; const char name[16];
int flags; int flags;
} x264_cpu_name_t; } x264_cpu_name_t;
extern const x264_cpu_name_t x264_cpu_names[]; extern const x264_cpu_name_t x264_cpu_names[];
......
...@@ -795,7 +795,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) ...@@ -795,7 +795,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext; pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext; pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
#endif #endif
if( cpu&X264_CPU_SSE2 ) if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
{ {
pf->deblock_v_luma = x264_deblock_v_luma_sse2; pf->deblock_v_luma = x264_deblock_v_luma_sse2;
pf->deblock_h_luma = x264_deblock_h_luma_sse2; pf->deblock_h_luma = x264_deblock_h_luma_sse2;
......
...@@ -133,4 +133,10 @@ ...@@ -133,4 +133,10 @@
/* FIXME: long isn't always the native register size (e.g. win64). */ /* FIXME: long isn't always the native register size (e.g. win64). */
#define WORD_SIZE sizeof(long) #define WORD_SIZE sizeof(long)
#if !defined(_WIN64) && !defined(__LP64__)
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#define BROKEN_STACK_ALIGNMENT /* define it if stack is not mod16 */
#endif
#endif
#endif /* X264_OSDEP_H */ #endif /* X264_OSDEP_H */
...@@ -299,11 +299,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) ...@@ -299,11 +299,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2; pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2;
pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2; if( !(cpu&X264_CPU_STACK_MOD4) )
pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_sse2; {
pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_sse2; pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2;
pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_sse2; pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_sse2;
pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_sse2; pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_sse2;
pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_sse2;
pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_sse2;
}
pf->hpel_filter = x264_hpel_filter_sse2; pf->hpel_filter = x264_hpel_filter_sse2;
pf->mc_chroma = x264_mc_chroma_sse2; pf->mc_chroma = x264_mc_chroma_sse2;
......
...@@ -59,6 +59,7 @@ typedef struct x264_t x264_t; ...@@ -59,6 +59,7 @@ typedef struct x264_t x264_t;
#define X264_CPU_SSSE3 0x000400 #define X264_CPU_SSSE3 0x000400
#define X264_CPU_PHADD_IS_FAST 0x000800 /* pre-Penryn Core2 have a uselessly slow PHADD instruction */ #define X264_CPU_PHADD_IS_FAST 0x000800 /* pre-Penryn Core2 have a uselessly slow PHADD instruction */
#define X264_CPU_SSE4 0x001000 /* SSE4.1 */ #define X264_CPU_SSE4 0x001000 /* SSE4.1 */
#define X264_CPU_STACK_MOD4 0x002000 /* if stack is only mod4 and not mod16 */
/* Analyse flags /* Analyse flags
*/ */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment