...
 
Commits (11)
......@@ -8,6 +8,9 @@ vpath %.S $(SRCPATH)
vpath %.asm $(SRCPATH)
vpath %.rc $(SRCPATH)
CFLAGS += $(CFLAGSPROF)
LDFLAGS += $(LDFLAGSPROF)
GENERATED =
all: default
......@@ -246,6 +249,8 @@ $(LIBX264): $(GENERATED) .depend $(OBJS) $(OBJASM)
$(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
$(IMPLIBNAME): $(SONAME)
ifneq ($(EXE),)
.PHONY: x264 checkasm8 checkasm10 example
x264: x264$(EXE)
......@@ -266,6 +271,9 @@ checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
$(OBJCLI): CFLAGS += $(CFLAGSCLI)
$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
%.o: %.c
......@@ -336,7 +344,7 @@ ifneq ($(wildcard .depend),)
include .depend
endif
OBJPROF = $(OBJS) $(OBJCLI)
OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI)
# These should cover most of the important codepaths
OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50
......@@ -354,7 +362,7 @@ fprofiled:
@echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.'
else
fprofiled: clean
$(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
$(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)"
$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
ifeq ($(COMPILER),CL)
# Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted
......@@ -362,7 +370,7 @@ ifeq ($(COMPILER),CL)
else
rm -f $(OBJPROF)
endif
$(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
$(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)"
rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
endif
......
......@@ -99,13 +99,18 @@ void x264_log_internal( int i_level, const char *psz_fmt, ... )
/****************************************************************************
* x264_malloc:
****************************************************************************/
void *x264_malloc( int i_size )
void *x264_malloc( int64_t i_size )
{
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
if( i_size < 0 || i_size > (SIZE_MAX - HUGE_PAGE_SIZE) /*|| i_size > (SIZE_MAX - NATIVE_ALIGN - sizeof(void **))*/ )
{
x264_log_internal( X264_LOG_ERROR, "invalid size of malloc: %"PRId64"\n", i_size );
return NULL;
}
uint8_t *align_buf = NULL;
#if HAVE_MALLOC_H
#if HAVE_THP
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
/* Attempt to allocate huge pages to reduce TLB misses. */
if( i_size >= HUGE_PAGE_THRESHOLD )
{
......@@ -118,8 +123,6 @@ void *x264_malloc( int i_size )
}
}
else
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
#endif
align_buf = memalign( NATIVE_ALIGN, i_size );
#else
......@@ -132,8 +135,10 @@ void *x264_malloc( int i_size )
}
#endif
if( !align_buf )
x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
x264_log_internal( X264_LOG_ERROR, "malloc of size %"PRId64" failed\n", i_size );
return align_buf;
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
}
/****************************************************************************
......@@ -196,7 +201,7 @@ error:
/****************************************************************************
* x264_picture_init:
****************************************************************************/
static void picture_init( x264_picture_t *pic )
REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
......@@ -204,15 +209,10 @@ static void picture_init( x264_picture_t *pic )
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
void x264_picture_init( x264_picture_t *pic )
{
x264_stack_align( picture_init, pic );
}
/****************************************************************************
* x264_picture_alloc:
****************************************************************************/
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
......@@ -243,16 +243,16 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
picture_init( pic );
x264_picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
int plane_offset[3] = {0};
int frame_size = 0;
int64_t plane_offset[3] = {0};
int64_t frame_size = 0;
for( int i = 0; i < pic->img.i_plane; i++ )
{
int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
pic->img.i_stride[i] = stride;
plane_offset[i] = frame_size;
frame_size += plane_size;
......@@ -265,15 +265,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
return 0;
}
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
static void picture_clean( x264_picture_t *pic )
REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
......@@ -281,15 +276,10 @@ static void picture_clean( x264_picture_t *pic )
memset( pic, 0, sizeof( x264_picture_t ) );
}
void x264_picture_clean( x264_picture_t *pic )
{
x264_stack_align( picture_clean, pic );
}
/****************************************************************************
* x264_param_default:
****************************************************************************/
static void param_default( x264_param_t *param )
REALIGN_STACK void x264_param_default( x264_param_t *param )
{
/* */
memset( param, 0, sizeof( x264_param_t ) );
......@@ -434,11 +424,6 @@ static void param_default( x264_param_t *param )
param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
}
void x264_param_default( x264_param_t *param )
{
x264_stack_align( param_default, param );
}
static int param_apply_preset( x264_param_t *param, const char *preset )
{
char *end;
......@@ -656,9 +641,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
return 0;
}
static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
param_default( param );
x264_param_default( param );
if( preset && param_apply_preset( param, preset ) < 0 )
return -1;
......@@ -667,12 +652,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
return 0;
}
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
return x264_stack_align( param_default_preset, param, preset, tune );
}
static void param_apply_fastfirstpass( x264_param_t *param )
REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
{
/* Set faster options in case of turbo firstpass. */
if( param->rc.b_stat_write && !param->rc.b_stat_read )
......@@ -687,11 +667,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
}
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
{
x264_stack_align( param_apply_fastfirstpass, param );
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
......@@ -709,7 +684,7 @@ static int profile_string_to_int( const char *str )
return -1;
}
static int param_apply_profile( x264_param_t *param, const char *profile )
REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
......@@ -776,11 +751,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
return 0;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
return x264_stack_align( param_apply_profile, param, profile );
}
static int parse_enum( const char *arg, const char * const *names, int *dst )
{
for( int i = 0; names[i]; i++ )
......@@ -842,7 +812,7 @@ static double atof_internal( const char *str, int *b_error )
#define atoi(str) atoi_internal( str, &b_error )
#define atof(str) atof_internal( str, &b_error )
static int param_parse( x264_param_t *p, const char *name, const char *value )
REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
{
char *name_buf = NULL;
int b_error = 0;
......@@ -1343,11 +1313,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
return b_error ? errortype : 0;
}
int x264_param_parse( x264_param_t *param, const char *name, const char *value )
{
return x264_stack_align( param_parse, param, name, value );
}
/****************************************************************************
* x264_param2string:
****************************************************************************/
......
......@@ -47,7 +47,6 @@
#include <string.h>
#include <assert.h>
#include <limits.h>
#include "x264.h"
/****************************************************************************
* Macros
......@@ -256,23 +255,23 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
/****************************************************************************
* General functions
****************************************************************************/
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
void x264_log_internal( int i_level, const char *psz_fmt, ... );
X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
/* x264_malloc : will do or emulate a memalign
* you have to use x264_free for buffers allocated with x264_malloc */
void *x264_malloc( int );
void x264_free( void * );
X264_API void *x264_malloc( int64_t );
X264_API void x264_free( void * );
/* x264_slurp_file: malloc space for the whole file and read it */
char *x264_slurp_file( const char *filename );
X264_API char *x264_slurp_file( const char *filename );
/* x264_param2string: return a (malloced) string containing most of
* the encoding options */
char *x264_param2string( x264_param_t *p, int b_res );
X264_API char *x264_param2string( x264_param_t *p, int b_res );
/****************************************************************************
* Macros
......@@ -297,12 +296,12 @@ do {\
#define PREALLOC_INIT\
int prealloc_idx = 0;\
size_t prealloc_size = 0;\
int64_t prealloc_size = 0;\
uint8_t **preallocs[PREALLOC_BUF_SIZE];
#define PREALLOC( var, size )\
do {\
var = (void*)prealloc_size;\
var = (void*)(intptr_t)prealloc_size;\
preallocs[prealloc_idx++] = (uint8_t**)&var;\
prealloc_size += ALIGN(size, NATIVE_ALIGN);\
} while( 0 )
......
......@@ -45,7 +45,7 @@ static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
#if HAVE_ARMV6
#include "arm/bitstream.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/bitstream.h"
#endif
......@@ -159,7 +159,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
......
......@@ -80,7 +80,7 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#elif defined(ARCH_AARCH64)
#elif HAVE_AARCH64
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
......
......@@ -45,7 +45,7 @@
const x264_cpu_name_t x264_cpu_names[] =
{
#if HAVE_MMX
#if ARCH_X86 || ARCH_X86_64
// {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore
#define MMX2 X264_CPU_MMX|X264_CPU_MMX2
{"MMX2", MMX2},
......@@ -97,7 +97,7 @@ const x264_cpu_name_t x264_cpu_names[] =
{"", 0},
};
#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON)
#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON)
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
......@@ -298,7 +298,7 @@ uint32_t x264_cpu_detect( void )
return cpu;
}
#elif ARCH_PPC && HAVE_ALTIVEC
#elif HAVE_ALTIVEC
#if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
#include <sys/sysctl.h>
......@@ -355,7 +355,7 @@ uint32_t x264_cpu_detect( void )
}
#endif
#elif ARCH_ARM
#elif HAVE_ARMV6
void x264_cpu_neon_test( void );
int x264_cpu_fast_neon_mrc_test( void );
......@@ -363,7 +363,6 @@ int x264_cpu_fast_neon_mrc_test( void );
uint32_t x264_cpu_detect( void )
{
int flags = 0;
#if HAVE_ARMV6
flags |= X264_CPU_ARMV6;
// don't do this hack if compiled with -mfpu=neon
......@@ -396,26 +395,25 @@ uint32_t x264_cpu_detect( void )
flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0;
#endif
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
#endif
return flags;
}
#elif ARCH_AARCH64
#elif HAVE_AARCH64
uint32_t x264_cpu_detect( void )
{
#if HAVE_NEON
return X264_CPU_ARMV8 | X264_CPU_NEON;
#else
return X264_CPU_ARMV8;
#endif
}
#elif ARCH_MIPS
#elif HAVE_MSA
uint32_t x264_cpu_detect( void )
{
uint32_t flags = 0;
#if HAVE_MSA
flags |= X264_CPU_MSA;
#endif
return flags;
return X264_CPU_MSA;
}
#else
......
......@@ -26,8 +26,8 @@
#ifndef X264_CPU_H
#define X264_CPU_H
uint32_t x264_cpu_detect( void );
int x264_cpu_num_processors( void );
X264_API uint32_t x264_cpu_detect( void );
X264_API int x264_cpu_num_processors( void );
void x264_cpu_emms( void );
void x264_cpu_sfence( void );
#if HAVE_MMX
......@@ -46,28 +46,11 @@ void x264_cpu_sfence( void );
#endif
#define x264_sfence x264_cpu_sfence
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
* We need 32 byte alignment for AVX2, so here we make sure that the stack is
* aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* aligning to 32 bytes only works if the compiler supports keeping that
* alignment between functions (osdep.h handles manual alignment of arrays
* if it doesn't).
*/
#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
intptr_t x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
typedef struct
{
const char *name;
uint32_t flags;
} x264_cpu_name_t;
extern const x264_cpu_name_t x264_cpu_names[];
X264_API extern const x264_cpu_name_t x264_cpu_names[];
#endif
......@@ -29,16 +29,16 @@
#if HAVE_MMX
# include "x86/dct.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/dct.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/dct.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/dct.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/dct.h"
#endif
......@@ -682,7 +682,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
dctf->sub4x4_dct = x264_sub4x4_dct_neon;
......@@ -996,11 +996,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#if ARCH_AARCH64
#if HAVE_AARCH64
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon;
pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon;
pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon;
......@@ -1010,9 +1010,9 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon;
pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon;
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
}
#endif // HAVE_ARMV6 || ARCH_AARCH64
#endif // HAVE_ARMV6 || HAVE_AARCH64
#endif // HIGH_BIT_DEPTH
pf_interlaced->interleave_8x8_cavlc =
......@@ -1065,13 +1065,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
#endif // HIGH_BIT_DEPTH
#endif
#if !HIGH_BIT_DEPTH
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_interlaced->interleave_8x8_cavlc =
pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
......
......@@ -667,13 +667,13 @@ void x264_macroblock_deblock( x264_t *h )
#if HAVE_MMX
#include "x86/deblock.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/deblock.h"
#endif
#if HAVE_ARMV6
#include "arm/deblock.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/deblock.h"
#endif
#if HAVE_MSA
......@@ -782,7 +782,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
}
#endif // HAVE_ALTIVEC
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->deblock_luma[1] = x264_deblock_v_luma_neon;
......
......@@ -162,7 +162,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
luma_plane_size *= 4;
......@@ -205,7 +205,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
......@@ -244,7 +244,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
{
for( int i = 0; i < 4; i++ )
......@@ -274,7 +274,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
for( int i = 0; i < 4; i++ )
frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
......
......@@ -261,13 +261,14 @@ void x264_threadslice_cond_broadcast( x264_t *h, int pass );
void x264_threadslice_cond_wait( x264_t *h, int pass );
#define x264_frame_push x264_template(frame_push)
void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_pop x264_template(frame_pop)
x264_frame_t *x264_frame_pop( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
#define x264_frame_unshift x264_template(frame_unshift)
void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_shift x264_template(frame_shift)
x264_frame_t *x264_frame_shift( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
#define x264_frame_push_unused x264_template(frame_push_unused)
void x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
#define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)
......
......@@ -29,16 +29,16 @@
#if HAVE_MMX
#include "x86/mc.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/mc.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
#include "arm/mc.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/mc.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
#include "mips/mc.h"
#endif
......@@ -680,7 +680,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
#if HAVE_ARMV6
x264_mc_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_mc_init_aarch64( cpu, pf );
#endif
#if HAVE_MSA
......
......@@ -131,8 +131,11 @@ static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char
uint8_t *binary = NULL;
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
rewind( fp );
int64_t file_size = ftell( fp );
fseek( fp, 0, SEEK_SET );
if( file_size < 0 || file_size > SIZE_MAX )
goto fail;
size_t size = file_size;
CHECKED_MALLOC( binary, size );
if( fread( binary, 1, size, fp ) != size )
......
......@@ -27,11 +27,6 @@
#include "osdep.h"
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#if SYS_WINDOWS
#include <sys/types.h>
#include <sys/timeb.h>
......@@ -111,114 +106,3 @@ int x264_threading_init( void )
return 0;
}
#endif
#ifdef _WIN32
/* Functions for dealing with Unicode on Windows. */
FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#if !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
#endif
#endif
......@@ -43,6 +43,13 @@
#include <math.h>
#endif
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#include "x264.h"
#if !HAVE_LOG2F
#define log2f(x) (logf(x)/0.693147180559945f)
#define log2(x) (log(x)/0.693147180559945)
......@@ -54,12 +61,6 @@
#define strncasecmp _strnicmp
#define strtok_r strtok_s
#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#if _MSC_VER < 1900
int x264_snprintf( char *s, size_t n, const char *fmt, ... );
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#else
#include <strings.h>
#endif
......@@ -76,14 +77,81 @@ int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define strtok_r(str,delim,save) strtok(str,delim)
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#ifdef _WIN32
#define utf8_to_utf16( utf8, utf16 )\
MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
FILE *x264_fopen( const char *filename, const char *mode );
int x264_rename( const char *oldname, const char *newname );
/* Functions for dealing with Unicode on Windows. */
static inline FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
static inline int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
#define x264_struct_stat struct _stati64
#define x264_fstat _fstati64
int x264_stat( const char *path, x264_struct_stat *buf );
static inline int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#else
#define x264_fopen fopen
#define x264_rename rename
......@@ -93,11 +161,49 @@ int x264_stat( const char *path, x264_struct_stat *buf );
#endif
/* mdate: return the current date in microsecond */
int64_t x264_mdate( void );
X264_API int64_t x264_mdate( void );
#if defined(_WIN32) && !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg );
int x264_is_pipe( const char *path );
static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
static inline int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#else
#define x264_vfprintf vfprintf
#define x264_is_pipe(x) 0
......@@ -163,6 +269,12 @@ int x264_is_pipe( const char *path );
#define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
#endif
#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
#define REALIGN_STACK __attribute__((force_align_arg_pointer))
#else
#define REALIGN_STACK
#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
#define ALWAYS_INLINE __attribute__((always_inline)) inline
......@@ -247,7 +359,7 @@ static inline int x264_pthread_create( x264_pthread_t *t, void *a, void *(*f)(vo
#endif
#if HAVE_WIN32THREAD || PTW32_STATIC_LIB
int x264_threading_init( void );
X264_API int x264_threading_init( void );
#else
#define x264_threading_init() 0
#endif
......
......@@ -31,18 +31,18 @@
# include "x86/pixel.h"
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/pixel.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/pixel.h"
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/pixel.h"
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/pixel.h"
#endif
......@@ -508,7 +508,7 @@ SATD_X_DECL7( _avx512 )
#endif
#if !HIGH_BIT_DEPTH
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
SATD_X_DECL7( _neon )
#endif
#endif // !HIGH_BIT_DEPTH
......@@ -532,7 +532,7 @@ INTRA_MBCMP_8x8(sa8d,, _c )
INTRA_MBCMP_8x8( sad, _mmx2, _c )
INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
#endif
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64)
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64)
INTRA_MBCMP_8x8( sad, _neon, _neon )
INTRA_MBCMP_8x8(sa8d, _neon, _neon )
#endif
......@@ -602,7 +602,7 @@ INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon )
#endif
#if !HIGH_BIT_DEPTH && ARCH_AARCH64
#if !HIGH_BIT_DEPTH && HAVE_AARCH64
INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon )
......@@ -1434,7 +1434,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
INIT8( sad, _neon );
......@@ -1475,7 +1475,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon;
pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_MSA
if( cpu&X264_CPU_MSA )
......
......@@ -34,16 +34,16 @@
#if HAVE_MMX
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/predict.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/predict.h"
#endif
......@@ -906,7 +906,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
x264_predict_16x16_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_16x16_init_aarch64( cpu, pf );
#endif
......@@ -949,7 +949,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x8c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8c_init_aarch64( cpu, pf );
#endif
......@@ -981,7 +981,7 @@ void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x16c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x16c_init_aarch64( cpu, pf );
#endif
}
......@@ -1010,7 +1010,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_
x264_predict_8x8_init_arm( cpu, pf, predict_filter );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
#endif
......@@ -1047,7 +1047,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
x264_predict_4x4_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_4x4_init_aarch64( cpu, pf );
#endif
}
......
......@@ -31,16 +31,16 @@
#if HAVE_MMX
#include "x86/quant.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/quant.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/quant.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/quant.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/quant.h"
#endif
......@@ -756,7 +756,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->coeff_last8 = x264_coeff_last8_arm;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->quant_2x2_dc = x264_quant_2x2_dc_neon;
......@@ -776,7 +776,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score64 = x264_decimate_score64_neon;
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_ARMV8 )
{
pf->coeff_last4 = x264_coeff_last4_aarch64;
......
......@@ -33,7 +33,7 @@ typedef struct
uint8_t i_size;
} vlc_t;
extern const x264_level_t x264_levels[];
X264_API extern const x264_level_t x264_levels[];
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
......
......@@ -47,7 +47,7 @@ struct x264_threadpool_t
x264_sync_frame_list_t done; /* list of jobs that have finished processing */
};
static void *threadpool_thread_internal( x264_threadpool_t *pool )
REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
{
if( pool->init_func )
pool->init_func( pool->init_arg );
......@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
return NULL;
}
static void *threadpool_thread( x264_threadpool_t *pool )
{
return (void*)x264_stack_align( threadpool_thread_internal, pool );
}
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg )
{
......
......@@ -30,14 +30,14 @@ typedef struct x264_threadpool_t x264_threadpool_t;
#if HAVE_THREAD
#define x264_threadpool_init x264_template(threadpool_init)
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
X264_API int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg );
#define x264_threadpool_run x264_template(threadpool_run)
void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
X264_API void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
#define x264_threadpool_wait x264_template(threadpool_wait)
void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
X264_API void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
#define x264_threadpool_delete x264_template(threadpool_delete)
void x264_threadpool_delete( x264_threadpool_t *pool );
X264_API void x264_threadpool_delete( x264_threadpool_t *pool );
#else
#define x264_threadpool_init(p,t,f,a) -1
#define x264_threadpool_run(p,f,a)
......
......@@ -78,33 +78,7 @@ cglobal cpu_sfence
sfence
ret
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args)
;-----------------------------------------------------------------------------
cvisible stack_align
mov rax, r0mp
mov r0, r1mp
mov r1, r2mp
mov r2, r3mp
mov r3, r4mp
mov r4, r5mp
push rbp
mov rbp, rsp
%if WIN64
sub rsp, 40 ; shadow space + r4
%endif
and rsp, ~(STACK_ALIGNMENT-1)
%if WIN64
mov [rsp+32], r4
%endif
call rax
leave
ret
%else
%if ARCH_X86_64 == 0
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
; return 0 if unsupported
......@@ -130,24 +104,4 @@ cglobal cpu_cpuid_test
pop ebx
popfd
ret
cvisible stack_align
push ebp
mov ebp, esp
sub esp, 20
and esp, ~(STACK_ALIGNMENT-1)
mov r0, [ebp+12]
mov r1, [ebp+16]
mov r2, [ebp+20]
mov [esp+ 0], r0
mov [esp+ 4], r1
mov [esp+ 8], r2
mov r0, [ebp+24]
mov r1, [ebp+28]
mov [esp+12], r0
mov [esp+16], r1
call [ebp+ 8]
leave
ret
%endif
......@@ -128,7 +128,7 @@ cl_ldflags() {
}
cc_check() {
if [ -z "$3" ]; then
if [ -z "$3$4" ]; then
if [ -z "$1$2" ]; then
log_check "whether $CC works"
elif [ -z "$1" ]; then
......@@ -138,7 +138,11 @@ cc_check() {
fi
elif [ -z "$1" ]; then
if [ -z "$2" ]; then
if [ -z "$3" ]; then
log_check "whether $CC supports $4"
else
log_check "whether $CC supports $3"
fi
else
log_check "whether $CC supports $3 with $2"
fi
......@@ -149,11 +153,14 @@ cc_check() {
for arg in $1; do
echo "#include <$arg>" >> conftest.c
done
if [ -n "$4" ]; then
echo "$4" >> conftest.c
fi
echo "int main (void) { $3 return 0; }" >> conftest.c
if [ $compiler_style = MS ]; then
cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
else
cc_cmd="$CC conftest.c $CFLAGS $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
cc_cmd="$CC conftest.c $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
fi
if $cc_cmd >conftest.log 2>&1; then
res=$?
......@@ -380,6 +387,8 @@ opencl="yes"
vsx="auto"
CFLAGS="$CFLAGS -Wall -I. -I\$(SRCPATH)"
CFLAGSSO="$CFLAGSSO"
CFLAGSCLI="$CFLAGSCLI"
LDFLAGS="$LDFLAGS"
LDFLAGSCLI="$LDFLAGSCLI"
ASFLAGS="$ASFLAGS -I. -I\$(SRCPATH)"
......@@ -394,7 +403,7 @@ NL="
"
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
......@@ -611,6 +620,15 @@ if [ $compiler = GNU ]; then
if cc_check '' -Werror=unknown-warning-option ; then
CHECK_CFLAGS="$CHECK_CFLAGS -Werror=unknown-warning-option"
fi
if cc_check '' -Werror=unknown-attributes ; then
CHECK_CFLAGS="$CHECK_CFLAGS -Werror=unknown-attributes"
fi
if cc_check '' -Werror=attributes ; then
CHECK_CFLAGS="$CHECK_CFLAGS -Werror=attributes"
fi
if cc_check '' -Werror=ignored-attributes ; then
CHECK_CFLAGS="$CHECK_CFLAGS -Werror=ignored-attributes"
fi
fi
libm=""
......@@ -886,7 +904,8 @@ if [ $shared = yes -a \( $ARCH = "X86_64" -o $ARCH = "PPC" -o $ARCH = "ALPHA" -o
pic="yes"
fi
if [ $compiler = GNU -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
if cc_check '' '' '' '__attribute__((force_align_arg_pointer))' ; then
if [ $compiler = GNU -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
if cc_check '' -mpreferred-stack-boundary=6 ; then
CFLAGS="$CFLAGS -mpreferred-stack-boundary=6"
stack_alignment=64
......@@ -902,7 +921,7 @@ if [ $compiler = GNU -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
stack_alignment=16
fi
fi
elif [ $compiler = ICC -a $ARCH = X86 ]; then
elif [ $compiler = ICC -a $ARCH = X86 ]; then
# icc on linux has various degrees of mod16 stack support
if [ $SYS = LINUX ]; then
# >= 12 defaults to a mod16 stack
......@@ -915,6 +934,7 @@ elif [ $compiler = ICC -a $ARCH = X86 ]; then
fi
# < 11 is completely incapable of keeping a mod16 stack
fi
fi
fi
if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
......@@ -951,8 +971,10 @@ fi
if [ $asm = auto -a $ARCH = AARCH64 ] ; then
if [ $compiler = CL ] && cpp_check '' '' 'defined(_M_ARM64)' ; then
define HAVE_AARCH64
define HAVE_NEON
elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then
define HAVE_AARCH64
define HAVE_NEON
ASFLAGS="$ASFLAGS -c"
else
......@@ -1007,6 +1029,7 @@ if [ "$cli_libx264" = "system" -a "$shared" != "yes" ] ; then
[ "$static" = "yes" ] && die "Option --system-libx264 can not be used together with --enable-static"
if pkg_check x264 ; then
X264_LIBS="$($PKGCONFIG --libs x264)"
X264_CFLAGS="$($PKGCONFIG --cflags x264)"
X264_INCLUDE_DIR="${X264_INCLUDE_DIR-$($PKGCONFIG --variable=includedir x264)}"
configure_system_override "$X264_INCLUDE_DIR" || die "Detection of system libx264 configuration failed"
else
......@@ -1308,6 +1331,10 @@ if cc_check '' -Wmaybe-uninitialized ; then
CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
fi
if [ $compiler = GNU ] && cc_check '' -fvisibility=hidden ; then
CFLAGS="$CFLAGS -fvisibility=hidden"
fi
if [ $compiler = ICC -o $compiler = ICL ] ; then
if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
define HAVE_INTEL_DISPATCHER
......@@ -1365,12 +1392,22 @@ EOF
${SRCPATH}/version.sh >> x264_config.h
if [ "$shared" = "yes" ]; then
CFLAGSSO="$CFLAGSSO -DX264_API_EXPORTS"
fi
if [ "$cli_libx264" = "system" ] ; then
if [ "$shared" = "yes" ]; then
if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then
CLI_LIBX264='$(IMPLIBNAME)'
else
CLI_LIBX264='$(SONAME)'
fi
CFLAGSCLI="$CFLAGSCLI -DX264_API_IMPORTS"
else
CLI_LIBX264=
LDFLAGSCLI="$X264_LIBS $LDFLAGSCLI"
CFLAGSCLI="$CFLAGSCLI $X264_CFLAGS"
cc_check 'stdint.h x264.h' '' 'x264_encoder_open(0);' || die "System libx264 can't be used for compilation of this version"
fi
else
......@@ -1409,7 +1446,11 @@ else # gcc/icc
LIBX264=libx264.a
[ -n "$RC" ] && RCFLAGS="$RCFLAGS -I. -o "
fi
[ $compiler != GNU ] && CFLAGS="$(cc_cflags $CFLAGS)"
if [ $compiler != GNU ]; then
CFLAGS="$(cc_cflags $CFLAGS)"
CFLAGSSO="$(cc_cflags $CFLAGSSO)"
CFLAGSCLI="$(cc_cflags $CFLAGSCLI)"
fi
if [ $compiler = ICC -o $compiler = ICL ]; then
# icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP
[ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__
......@@ -1448,13 +1489,17 @@ SYS_ARCH=$ARCH
SYS=$SYS
CC=$CC
CFLAGS=$CFLAGS
CFLAGSSO=$CFLAGSSO
CFLAGSCLI=$CFLAGSCLI
COMPILER=$compiler
COMPILER_STYLE=$compiler_style
DEPMM=$DEPMM
DEPMT=$DEPMT
LD=$LD
LDFLAGS=$LDFLAGS
LDFLAGSCLI=$LDFLAGSCLI
LIBX264=$LIBX264
CLI_LIBX264=$CLI_LIBX264
AR=$AR
RANLIB=$RANLIB
STRIP=$STRIP
......@@ -1490,14 +1535,7 @@ if [ "$shared" = "yes" ]; then
echo "SONAME=libx264-$API.dll" >> config.mak
if [ $compiler_style = MS ]; then
echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak
# GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations
# MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time
echo "SOFLAGS=-dll -def:x264.def -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
echo "EXPORTS" > x264.def
# export API functions
grep "^\(int\|void\|x264_t\).*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264.*\)(.*/\1/;s/open/open_$API/g" >> x264.def
# export API variables/data. must be flagged with the DATA keyword
grep "extern.*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264\w*\)\W.*/\1 DATA/;" >> x264.def
echo "SOFLAGS=-dll -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
else
echo 'IMPLIBNAME=libx264.dll.a' >> config.mak
echo "SOFLAGS=-shared -Wl,--out-implib,\$(IMPLIBNAME) $SOFLAGS" >> config.mak
......@@ -1524,9 +1562,6 @@ if [ "$static" = "yes" ]; then
echo 'install: install-lib-static' >> config.mak
fi
echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak
echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak
cat > x264.pc << EOF
prefix=$prefix
exec_prefix=$exec_prefix
......@@ -1538,7 +1573,7 @@ Description: H.264 (MPEG4 AVC) encoder library
Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//; s/ .*//')
Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
Cflags: -I$includedir
Cflags: -I$includedir $([ "$shared" = "yes" ] && echo "-DX264_API_IMPORTS")
EOF
filters="crop select_every"
......
......@@ -73,7 +73,7 @@ typedef struct x264_api_t
int (*encoder_invalidate_reference)( x264_t *, int64_t pts );
} x264_api_t;
static x264_api_t *encoder_open( x264_param_t *param )
REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param )
{
x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
if( !api )
......@@ -118,82 +118,77 @@ static x264_api_t *encoder_open( x264_param_t *param )
return NULL;
}
return api;
}
x264_t *x264_encoder_open( x264_param_t *param )
{
/* x264_t is opaque */
return (x264_t *)x264_stack_align( encoder_open, param );
return (x264_t *)api;
}
void x264_encoder_close( x264_t *h )
REALIGN_STACK void x264_encoder_close( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->encoder_close, api->x264 );
api->encoder_close( api->x264 );
free( api );
}
void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->nal_encode, api->x264, dst, nal );
api->nal_encode( api->x264, dst, nal );
}
int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
{
x264_api_t *api = (x264_api_t *)h;