Compare revisions

Henrik Gramner · Anton Mitrofanov · Anton Mitrofanov · Anton Mitrofanov · Anton Mitrofanov · Anton Mitrofanov
--- a/Makefile
+++ b/Makefile
@@ -8,6 +8,9 @@ vpath %.S $(SRCPATH)
 vpath %.asm $(SRCPATH)
 vpath %.rc $(SRCPATH)
+CFLAGS += $(CFLAGSPROF)
+LDFLAGS += $(LDFLAGSPROF)
 GENERATED =
 all: default
@@ -246,6 +249,8 @@ $(LIBX264): $(GENERATED) .depend $(OBJS) $(OBJASM)
 $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
 	$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
+$(IMPLIBNAME): $(SONAME)
 ifneq ($(EXE),)
 .PHONY: x264 checkasm8 checkasm10 example
 x264: x264$(EXE)
@@ -266,6 +271,9 @@ checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
 example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
 	$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
+$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
+$(OBJCLI): CFLAGS += $(CFLAGSCLI)
 $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
 %.o: %.c
@@ -336,7 +344,7 @@ ifneq ($(wildcard .depend),)
 include .depend
 endif
-OBJPROF = $(OBJS) $(OBJCLI)
+OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI)
 # These should cover most of the important codepaths
 OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
 OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0  --slice-max-mbs 50
@@ -354,7 +362,7 @@ fprofiled:
 	@echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.'
 else
 fprofiled: clean
-	$(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
+	$(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)"
 	$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
 ifeq ($(COMPILER),CL)
 # Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted
@@ -362,7 +370,7 @@ ifeq ($(COMPILER),CL)
 else
 	rm -f $(OBJPROF)
 endif
-	$(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
+	$(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)"
 	rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
 endif

--- a/common/base.c
+++ b/common/base.c
@@ -99,13 +99,18 @@ void x264_log_internal( int i_level, const char *psz_fmt, ... )
 /****************************************************************************
 * x264_malloc:
 ****************************************************************************/
-void *x264_malloc( int i_size )
+void *x264_malloc( int64_t i_size )
 {
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    if( i_size < 0 || i_size > (SIZE_MAX - HUGE_PAGE_SIZE) /*|| i_size > (SIZE_MAX - NATIVE_ALIGN - sizeof(void **))*/ )
+    {
+        x264_log_internal( X264_LOG_ERROR, "invalid size of malloc: %"PRId64"\n", i_size );
+        return NULL;
+    }
    uint8_t *align_buf = NULL;
 #if HAVE_MALLOC_H
 #if HAVE_THP
-#define HUGE_PAGE_SIZE 2*1024*1024
-#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
    /* Attempt to allocate huge pages to reduce TLB misses. */
    if( i_size >= HUGE_PAGE_THRESHOLD )
    {
@@ -118,8 +123,6 @@ void *x264_malloc( int i_size )
        }
    }
    else
-#undef HUGE_PAGE_SIZE
-#undef HUGE_PAGE_THRESHOLD
 #endif
        align_buf = memalign( NATIVE_ALIGN, i_size );
 #else
@@ -132,8 +135,10 @@ void *x264_malloc( int i_size )
    }
 #endif
    if( !align_buf )
-        x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
+        x264_log_internal( X264_LOG_ERROR, "malloc of size %"PRId64" failed\n", i_size );
    return align_buf;
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
 }
 /****************************************************************************
@@ -196,7 +201,7 @@ error:
 /****************************************************************************
 * x264_picture_init:
 ****************************************************************************/
-static void picture_init( x264_picture_t *pic )
+REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
 {
    memset( pic, 0, sizeof( x264_picture_t ) );
    pic->i_type = X264_TYPE_AUTO;
@@ -204,15 +209,10 @@ static void picture_init( x264_picture_t *pic )
    pic->i_pic_struct = PIC_STRUCT_AUTO;
 }
-void x264_picture_init( x264_picture_t *pic )
-{
-    x264_stack_align( picture_init, pic );
-}
 /****************************************************************************
 * x264_picture_alloc:
 ****************************************************************************/
-static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
+REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
 {
    typedef struct
    {
@@ -243,16 +243,16 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
    int csp = i_csp & X264_CSP_MASK;
    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
        return -1;
-    picture_init( pic );
+    x264_picture_init( pic );
    pic->img.i_csp = i_csp;
    pic->img.i_plane = csp_tab[csp].planes;
    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
-    int plane_offset[3] = {0};
+    int64_t plane_offset[3] = {0};
-    int frame_size = 0;
+    int64_t frame_size = 0;
    for( int i = 0; i < pic->img.i_plane; i++ )
    {
        int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
-        int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
+        int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
        pic->img.i_stride[i] = stride;
        plane_offset[i] = frame_size;
        frame_size += plane_size;
@@ -265,15 +265,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
    return 0;
 }
-int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
-{
-    return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
-}
 /****************************************************************************
 * x264_picture_clean:
 ****************************************************************************/
-static void picture_clean( x264_picture_t *pic )
+REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
 {
    x264_free( pic->img.plane[0] );
@@ -281,15 +276,10 @@ static void picture_clean( x264_picture_t *pic )
    memset( pic, 0, sizeof( x264_picture_t ) );
 }
-void x264_picture_clean( x264_picture_t *pic )
-{
-    x264_stack_align( picture_clean, pic );
-}
 /****************************************************************************
 * x264_param_default:
 ****************************************************************************/
-static void param_default( x264_param_t *param )
+REALIGN_STACK void x264_param_default( x264_param_t *param )
 {
    /* */
    memset( param, 0, sizeof( x264_param_t ) );
@@ -434,11 +424,6 @@ static void param_default( x264_param_t *param )
    param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
 }
-void x264_param_default( x264_param_t *param )
-{
-    x264_stack_align( param_default, param );
-}
 static int param_apply_preset( x264_param_t *param, const char *preset )
 {
    char *end;
@@ -656,9 +641,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
    return 0;
 }
-static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
+REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
 {
-    param_default( param );
+    x264_param_default( param );
    if( preset && param_apply_preset( param, preset ) < 0 )
        return -1;
@@ -667,12 +652,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
    return 0;
 }
-int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
+REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
-{
-    return x264_stack_align( param_default_preset, param, preset, tune );
-}
-static void param_apply_fastfirstpass( x264_param_t *param )
 {
    /* Set faster options in case of turbo firstpass. */
    if( param->rc.b_stat_write && !param->rc.b_stat_read )
@@ -687,11 +667,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
    }
 }
-void x264_param_apply_fastfirstpass( x264_param_t *param )
-{
-    x264_stack_align( param_apply_fastfirstpass, param );
-}
 static int profile_string_to_int( const char *str )
 {
    if( !strcasecmp( str, "baseline" ) )
@@ -709,7 +684,7 @@ static int profile_string_to_int( const char *str )
    return -1;
 }
-static int param_apply_profile( x264_param_t *param, const char *profile )
+REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
 {
    if( !profile )
        return 0;
@@ -776,11 +751,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
    return 0;
 }
-int x264_param_apply_profile( x264_param_t *param, const char *profile )
-{
-    return x264_stack_align( param_apply_profile, param, profile );
-}
 static int parse_enum( const char *arg, const char * const *names, int *dst )
 {
    for( int i = 0; names[i]; i++ )
@@ -842,7 +812,7 @@ static double atof_internal( const char *str, int *b_error )
 #define atoi(str) atoi_internal( str, &b_error )
 #define atof(str) atof_internal( str, &b_error )
-static int param_parse( x264_param_t *p, const char *name, const char *value )
+REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
 {
    char *name_buf = NULL;
    int b_error = 0;
@@ -1343,11 +1313,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
    return b_error ? errortype : 0;
 }
-int x264_param_parse( x264_param_t *param, const char *name, const char *value )
-{
-    return x264_stack_align( param_parse, param, name, value );
-}
 /****************************************************************************
 * x264_param2string:
 ****************************************************************************/

--- a/common/base.h
+++ b/common/base.h
@@ -47,7 +47,6 @@
 #include <string.h>
 #include <assert.h>
 #include <limits.h>
-#include "x264.h"
 /****************************************************************************
 * Macros
@@ -256,23 +255,23 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
 /****************************************************************************
 * General functions
 ****************************************************************************/
-void x264_reduce_fraction( uint32_t *n, uint32_t *d );
+X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
-void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
+X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
-void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
+X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
-void x264_log_internal( int i_level, const char *psz_fmt, ... );
+X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
 /* x264_malloc : will do or emulate a memalign
 * you have to use x264_free for buffers allocated with x264_malloc */
-void *x264_malloc( int );
+X264_API void *x264_malloc( int64_t );
-void  x264_free( void * );
+X264_API void  x264_free( void * );
 /* x264_slurp_file: malloc space for the whole file and read it */
-char *x264_slurp_file( const char *filename );
+X264_API char *x264_slurp_file( const char *filename );
 /* x264_param2string: return a (malloced) string containing most of
 * the encoding options */
-char *x264_param2string( x264_param_t *p, int b_res );
+X264_API char *x264_param2string( x264_param_t *p, int b_res );
 /****************************************************************************
 * Macros
@@ -297,12 +296,12 @@ do {\
 #define PREALLOC_INIT\
    int    prealloc_idx = 0;\
-    size_t prealloc_size = 0;\
+    int64_t prealloc_size = 0;\
    uint8_t **preallocs[PREALLOC_BUF_SIZE];
 #define PREALLOC( var, size )\
 do {\
-    var = (void*)prealloc_size;\
+    var = (void*)(intptr_t)prealloc_size;\
    preallocs[prealloc_idx++] = (uint8_t**)&var;\
    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
 } while( 0 )

--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -45,7 +45,7 @@ static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
 #if HAVE_ARMV6
 #include "arm/bitstream.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #include "aarch64/bitstream.h"
 #endif
@@ -159,7 +159,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
    if( cpu&X264_CPU_NEON )
        pf->nal_escape = x264_nal_escape_neon;
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
        pf->nal_escape = x264_nal_escape_neon;
 #endif

--- a/common/cabac.h
+++ b/common/cabac.h
@@ -80,7 +80,7 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
 #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
 #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
-#elif defined(ARCH_AARCH64)
+#elif HAVE_AARCH64
 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
 #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
 #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm

--- a/common/cpu.c
+++ b/common/cpu.c
@@ -45,7 +45,7 @@
 const x264_cpu_name_t x264_cpu_names[] =
 {
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
 //  {"MMX",         X264_CPU_MMX},  // we don't support asm on mmx1 cpus anymore
 #define MMX2 X264_CPU_MMX|X264_CPU_MMX2
    {"MMX2",        MMX2},
@@ -97,7 +97,7 @@ const x264_cpu_name_t x264_cpu_names[] =
    {"", 0},
 };
-#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON)
+#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON)
 #include <signal.h>
 #include <setjmp.h>
 static sigjmp_buf jmpbuf;
@@ -298,7 +298,7 @@ uint32_t x264_cpu_detect( void )
    return cpu;
 }
-#elif ARCH_PPC && HAVE_ALTIVEC
+#elif HAVE_ALTIVEC
 #if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
 #include <sys/sysctl.h>
@@ -355,7 +355,7 @@ uint32_t x264_cpu_detect( void )
 }
 #endif
-#elif ARCH_ARM
+#elif HAVE_ARMV6
 void x264_cpu_neon_test( void );
 int x264_cpu_fast_neon_mrc_test( void );
@@ -363,7 +363,6 @@ int x264_cpu_fast_neon_mrc_test( void );
 uint32_t x264_cpu_detect( void )
 {
    int flags = 0;
-#if HAVE_ARMV6
    flags |= X264_CPU_ARMV6;
    // don't do this hack if compiled with -mfpu=neon
@@ -396,26 +395,25 @@ uint32_t x264_cpu_detect( void )
    flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0;
 #endif
    // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
-#endif
    return flags;
 }
-#elif ARCH_AARCH64
+#elif HAVE_AARCH64
 uint32_t x264_cpu_detect( void )
 {
+#if HAVE_NEON
    return X264_CPU_ARMV8 | X264_CPU_NEON;
+#else
+    return X264_CPU_ARMV8;
+#endif
 }
-#elif ARCH_MIPS
+#elif HAVE_MSA
 uint32_t x264_cpu_detect( void )
 {
-    uint32_t flags = 0;
+    return X264_CPU_MSA;
-#if HAVE_MSA
-    flags |= X264_CPU_MSA;
-#endif
-    return flags;
 }
 #else

--- a/common/cpu.h
+++ b/common/cpu.h
@@ -26,8 +26,8 @@
 #ifndef X264_CPU_H
 #define X264_CPU_H
-uint32_t x264_cpu_detect( void );
+X264_API uint32_t x264_cpu_detect( void );
-int      x264_cpu_num_processors( void );
+X264_API int      x264_cpu_num_processors( void );
 void     x264_cpu_emms( void );
 void     x264_cpu_sfence( void );
 #if HAVE_MMX
@@ -46,28 +46,11 @@ void     x264_cpu_sfence( void );
 #endif
 #define x264_sfence x264_cpu_sfence
-/* kludge:
- * gcc can't give variables any greater alignment than the stack frame has.
- * We need 32 byte alignment for AVX2, so here we make sure that the stack is
- * aligned to 32 bytes.
- * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
- * problem, but I don't want to require such a new version.
- * aligning to 32 bytes only works if the compiler supports keeping that
- * alignment between functions (osdep.h handles manual alignment of arrays
- * if it doesn't).
- */
-#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
-intptr_t x264_stack_align( void (*func)(), ... );
-#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
-#else
-#define x264_stack_align(func,...) func(__VA_ARGS__)
-#endif
 typedef struct
 {
    const char *name;
    uint32_t flags;
 } x264_cpu_name_t;
-extern const x264_cpu_name_t x264_cpu_names[];
+X264_API extern const x264_cpu_name_t x264_cpu_names[];
 #endif
--- a/common/dct.c
+++ b/common/dct.c
@@ -29,16 +29,16 @@
 #if HAVE_MMX
 #   include "x86/dct.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/dct.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/dct.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/dct.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/dct.h"
 #endif
@@ -682,7 +682,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
    }
 #endif
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
@@ -996,11 +996,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
    }
 #endif
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
-#if ARCH_AARCH64
+#if HAVE_AARCH64
        pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
        pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
        pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
@@ -1010,9 +1010,9 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
        pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
        pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
        pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
    }
-#endif // HAVE_ARMV6 || ARCH_AARCH64
+#endif // HAVE_ARMV6 || HAVE_AARCH64
 #endif // HIGH_BIT_DEPTH
    pf_interlaced->interleave_8x8_cavlc =
@@ -1065,13 +1065,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
 #endif // HIGH_BIT_DEPTH
 #endif
 #if !HIGH_BIT_DEPTH
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        pf_interlaced->interleave_8x8_cavlc =
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
    }
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
 #if HAVE_ALTIVEC
    if( cpu&X264_CPU_ALTIVEC )

--- a/common/deblock.c
+++ b/common/deblock.c
@@ -667,13 +667,13 @@ void x264_macroblock_deblock( x264_t *h )
 #if HAVE_MMX
 #include "x86/deblock.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #include "ppc/deblock.h"
 #endif
 #if HAVE_ARMV6
 #include "arm/deblock.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #include "aarch64/deblock.h"
 #endif
 #if HAVE_MSA
@@ -782,7 +782,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
    }
 #endif // HAVE_ALTIVEC
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        pf->deblock_luma[1] = x264_deblock_v_luma_neon;

--- a/common/frame.c
+++ b/common/frame.c
@@ -162,7 +162,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
    for( int p = 0; p < luma_plane_count; p++ )
    {
-        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
        if( h->param.analyse.i_subpel_refine && b_fdec )
            luma_plane_size *= 4;
@@ -205,7 +205,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
    {
        if( h->frames.b_have_lowres )
        {
-            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+            int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
            PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
@@ -244,7 +244,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
    for( int p = 0; p < luma_plane_count; p++ )
    {
-        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
        if( h->param.analyse.i_subpel_refine && b_fdec )
        {
            for( int i = 0; i < 4; i++ )
@@ -274,7 +274,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
    {
        if( h->frames.b_have_lowres )
        {
-            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+            int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
            for( int i = 0; i < 4; i++ )
                frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;

--- a/common/frame.h
+++ b/common/frame.h
@@ -261,13 +261,14 @@ void          x264_threadslice_cond_broadcast( x264_t *h, int pass );
 void          x264_threadslice_cond_wait( x264_t *h, int pass );
 #define x264_frame_push x264_template(frame_push)
-void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
+X264_API void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
 #define x264_frame_pop x264_template(frame_pop)
-x264_frame_t *x264_frame_pop( x264_frame_t **list );
+X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
 #define x264_frame_unshift x264_template(frame_unshift)
-void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
+X264_API void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
 #define x264_frame_shift x264_template(frame_shift)
-x264_frame_t *x264_frame_shift( x264_frame_t **list );
+X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
 #define x264_frame_push_unused x264_template(frame_push_unused)
 void          x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
 #define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)

--- a/common/mc.c
+++ b/common/mc.c
@@ -29,16 +29,16 @@
 #if HAVE_MMX
 #include "x86/mc.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #include "ppc/mc.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #include "arm/mc.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #include "aarch64/mc.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #include "mips/mc.h"
 #endif
@@ -680,7 +680,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
 #if HAVE_ARMV6
    x264_mc_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_mc_init_aarch64( cpu, pf );
 #endif
 #if HAVE_MSA

--- a/common/opencl.c
+++ b/common/opencl.c
@@ -131,8 +131,11 @@ static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char
    uint8_t *binary = NULL;
    fseek( fp, 0, SEEK_END );
-    size_t size = ftell( fp );
+    int64_t file_size = ftell( fp );
-    rewind( fp );
+    fseek( fp, 0, SEEK_SET );
+    if( file_size < 0 || file_size > SIZE_MAX )
+        goto fail;
+    size_t size = file_size;
    CHECKED_MALLOC( binary, size );
    if( fread( binary, 1, size, fp ) != size )

--- a/common/osdep.c
+++ b/common/osdep.c
@@ -27,11 +27,6 @@
 #include "osdep.h"
-#ifdef _WIN32
-#include <windows.h>
-#include <io.h>
-#endif
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -111,114 +106,3 @@ int x264_threading_init( void )
    return 0;
 }
 #endif
-#ifdef _WIN32
-/* Functions for dealing with Unicode on Windows. */
-FILE *x264_fopen( const char *filename, const char *mode )
-{
-    wchar_t filename_utf16[MAX_PATH];
-    wchar_t mode_utf16[16];
-    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
-        return _wfopen( filename_utf16, mode_utf16 );
-    return NULL;
-}
-int x264_rename( const char *oldname, const char *newname )
-{
-    wchar_t oldname_utf16[MAX_PATH];
-    wchar_t newname_utf16[MAX_PATH];
-    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
-    {
-        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
-        _wunlink( newname_utf16 );
-        return _wrename( oldname_utf16, newname_utf16 );
-    }
-    return -1;
-}
-int x264_stat( const char *path, x264_struct_stat *buf )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return _wstati64( path_utf16, buf );
-    return -1;
-}
-#if !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg )
-{
-    HANDLE console = NULL;
-    DWORD mode;
-    if( stream == stdout )
-        console = GetStdHandle( STD_OUTPUT_HANDLE );
-    else if( stream == stderr )
-        console = GetStdHandle( STD_ERROR_HANDLE );
-    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
-    if( GetConsoleMode( console, &mode ) )
-    {
-        char buf[4096];
-        wchar_t buf_utf16[4096];
-        va_list arg2;
-        va_copy( arg2, arg );
-        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
-        va_end( arg2 );
-        if( length > 0 && length < sizeof(buf) )
-        {
-            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
-            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
-            DWORD written;
-            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
-            return length;
-        }
-    }
-    return vfprintf( stream, format, arg );
-}
-int x264_is_pipe( const char *path )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return WaitNamedPipeW( path_utf16, 0 );
-    return 0;
-}
-#endif
-#if defined(_MSC_VER) && _MSC_VER < 1900
-/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
-int x264_snprintf( char *s, size_t n, const char *fmt, ... )
-{
-    va_list arg;
-    va_start( arg, fmt );
-    int length = x264_vsnprintf( s, n, fmt, arg );
-    va_end( arg );
-    return length;
-}
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
-{
-    int length = -1;
-    if( n )
-    {
-        va_list arg2;
-        va_copy( arg2, arg );
-        length = _vsnprintf( s, n, fmt, arg2 );
-        va_end( arg2 );
-        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
-        if( length < 0 || length >= n )
-            s[n-1] = '\0';
-    }
-    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
-    if( length < 0 )
-        return _vscprintf( fmt, arg );
-    return length;
-}
-#endif
-#endif
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -43,6 +43,13 @@
 #include <math.h>
 #endif
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+#include "x264.h"
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
@@ -54,12 +61,6 @@
 #define strncasecmp _strnicmp
 #define strtok_r strtok_s
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
-#if _MSC_VER < 1900
-int x264_snprintf( char *s, size_t n, const char *fmt, ... );
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
-#define snprintf  x264_snprintf
-#define vsnprintf x264_vsnprintf
-#endif
 #else
 #include <strings.h>
 #endif
@@ -76,14 +77,81 @@ int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
+#if defined(_MSC_VER) && _MSC_VER < 1900
+/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
+static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
+{
+    int length = -1;
+    if( n )
+    {
+        va_list arg2;
+        va_copy( arg2, arg );
+        length = _vsnprintf( s, n, fmt, arg2 );
+        va_end( arg2 );
+        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
+        if( length < 0 || length >= n )
+            s[n-1] = '\0';
+    }
+    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
+    if( length < 0 )
+        return _vscprintf( fmt, arg );
+    return length;
+}
+static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
+{
+    va_list arg;
+    va_start( arg, fmt );
+    int length = x264_vsnprintf( s, n, fmt, arg );
+    va_end( arg );
+    return length;
+}
+#define snprintf  x264_snprintf
+#define vsnprintf x264_vsnprintf
+#endif
 #ifdef _WIN32
 #define utf8_to_utf16( utf8, utf16 )\
    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
-FILE *x264_fopen( const char *filename, const char *mode );
-int x264_rename( const char *oldname, const char *newname );
+/* Functions for dealing with Unicode on Windows. */
+static inline FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+static inline int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
 #define x264_struct_stat struct _stati64
 #define x264_fstat _fstati64
-int x264_stat( const char *path, x264_struct_stat *buf );
+static inline int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
 #else
 #define x264_fopen       fopen
 #define x264_rename      rename
@@ -93,11 +161,49 @@ int x264_stat( const char *path, x264_struct_stat *buf );
 #endif
 /* mdate: return the current date in microsecond */
-int64_t x264_mdate( void );
+X264_API int64_t x264_mdate( void );
 #if defined(_WIN32) && !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg );
+static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
-int x264_is_pipe( const char *path );
+{
+    HANDLE console = NULL;
+    DWORD mode;
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+        va_list arg2;
+        va_copy( arg2, arg );
+        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
+        va_end( arg2 );
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+static inline int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
 #else
 #define x264_vfprintf vfprintf
 #define x264_is_pipe(x) 0
@@ -163,6 +269,12 @@ int x264_is_pipe( const char *path );
 #define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
 #endif
+#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
+#define REALIGN_STACK __attribute__((force_align_arg_pointer))
+#else
+#define REALIGN_STACK
+#endif
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
 #define UNUSED __attribute__((unused))
 #define ALWAYS_INLINE __attribute__((always_inline)) inline
@@ -247,7 +359,7 @@ static inline int x264_pthread_create( x264_pthread_t *t, void *a, void *(*f)(vo
 #endif
 #if HAVE_WIN32THREAD || PTW32_STATIC_LIB
-int x264_threading_init( void );
+X264_API int x264_threading_init( void );
 #else
 #define x264_threading_init() 0
 #endif

--- a/common/pixel.c
+++ b/common/pixel.c
@@ -31,18 +31,18 @@
 #   include "x86/pixel.h"
 #   include "x86/predict.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/pixel.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/pixel.h"
 #   include "arm/predict.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/pixel.h"
 #   include "aarch64/predict.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/pixel.h"
 #endif
@@ -508,7 +508,7 @@ SATD_X_DECL7( _avx512 )
 #endif
 #if !HIGH_BIT_DEPTH
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
 SATD_X_DECL7( _neon )
 #endif
 #endif // !HIGH_BIT_DEPTH
@@ -532,7 +532,7 @@ INTRA_MBCMP_8x8(sa8d,, _c )
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
-#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64)
+#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64)
 INTRA_MBCMP_8x8( sad, _neon, _neon )
 INTRA_MBCMP_8x8(sa8d, _neon, _neon )
 #endif
@@ -602,7 +602,7 @@ INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
 INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
 #endif
-#if !HIGH_BIT_DEPTH && ARCH_AARCH64
+#if !HIGH_BIT_DEPTH && HAVE_AARCH64
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _neon )
 INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _neon )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
@@ -1434,7 +1434,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
    }
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        INIT8( sad, _neon );
@@ -1475,7 +1475,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
        pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
        pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
    }
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
 #if HAVE_MSA
    if( cpu&X264_CPU_MSA )

--- a/common/predict.c
+++ b/common/predict.c
@@ -34,16 +34,16 @@
 #if HAVE_MMX
 #   include "x86/predict.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/predict.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/predict.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/predict.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/predict.h"
 #endif
@@ -906,7 +906,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
    x264_predict_16x16_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_predict_16x16_init_aarch64( cpu, pf );
 #endif
@@ -949,7 +949,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
    x264_predict_8x8c_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_predict_8x8c_init_aarch64( cpu, pf );
 #endif
@@ -981,7 +981,7 @@ void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] )
    x264_predict_8x16c_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_predict_8x16c_init_aarch64( cpu, pf );
 #endif
 }
@@ -1010,7 +1010,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_
    x264_predict_8x8_init_arm( cpu, pf, predict_filter );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
 #endif
@@ -1047,7 +1047,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
    x264_predict_4x4_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    x264_predict_4x4_init_aarch64( cpu, pf );
 #endif
 }

--- a/common/quant.c
+++ b/common/quant.c
@@ -31,16 +31,16 @@
 #if HAVE_MMX
 #include "x86/quant.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/quant.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/quant.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/quant.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/quant.h"
 #endif
@@ -756,7 +756,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
        pf->coeff_last8 = x264_coeff_last8_arm;
    }
 #endif
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
    if( cpu&X264_CPU_NEON )
    {
        pf->quant_2x2_dc   = x264_quant_2x2_dc_neon;
@@ -776,7 +776,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
        pf->decimate_score64 = x264_decimate_score64_neon;
    }
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
    if( cpu&X264_CPU_ARMV8 )
    {
        pf->coeff_last4 = x264_coeff_last4_aarch64;

--- a/common/tables.h
+++ b/common/tables.h
@@ -33,7 +33,7 @@ typedef struct
    uint8_t i_size;
 } vlc_t;
-extern const x264_level_t x264_levels[];
+X264_API extern const x264_level_t x264_levels[];
 extern const uint8_t x264_exp2_lut[64];
 extern const float   x264_log2_lut[128];

--- a/common/threadpool.c
+++ b/common/threadpool.c
@@ -47,7 +47,7 @@ struct x264_threadpool_t
    x264_sync_frame_list_t done;   /* list of jobs that have finished processing */
 };
-static void *threadpool_thread_internal( x264_threadpool_t *pool )
+REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
 {
    if( pool->init_func )
        pool->init_func( pool->init_arg );
@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
    return NULL;
 }
-static void *threadpool_thread( x264_threadpool_t *pool )
-{
-    return (void*)x264_stack_align( threadpool_thread_internal, pool );
-}
 int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
                          void (*init_func)(void *), void *init_arg )
 {
No results found