Skip to content
Commits on Source (11)
......@@ -8,6 +8,9 @@ vpath %.S $(SRCPATH)
vpath %.asm $(SRCPATH)
vpath %.rc $(SRCPATH)
CFLAGS += $(CFLAGSPROF)
LDFLAGS += $(LDFLAGSPROF)
GENERATED =
all: default
......@@ -246,6 +249,8 @@ $(LIBX264): $(GENERATED) .depend $(OBJS) $(OBJASM)
$(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
$(IMPLIBNAME): $(SONAME)
ifneq ($(EXE),)
.PHONY: x264 checkasm8 checkasm10 example
x264: x264$(EXE)
......@@ -266,6 +271,9 @@ checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
$(OBJCLI): CFLAGS += $(CFLAGSCLI)
$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
%.o: %.c
......@@ -336,7 +344,7 @@ ifneq ($(wildcard .depend),)
include .depend
endif
OBJPROF = $(OBJS) $(OBJCLI)
OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI)
# These should cover most of the important codepaths
OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50
......@@ -354,7 +362,7 @@ fprofiled:
@echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.'
else
fprofiled: clean
$(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
$(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)"
$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
ifeq ($(COMPILER),CL)
# Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted
......@@ -362,7 +370,7 @@ ifeq ($(COMPILER),CL)
else
rm -f $(OBJPROF)
endif
$(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
$(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)"
rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
endif
......
......@@ -99,13 +99,18 @@ void x264_log_internal( int i_level, const char *psz_fmt, ... )
/****************************************************************************
* x264_malloc:
****************************************************************************/
void *x264_malloc( int i_size )
void *x264_malloc( int64_t i_size )
{
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
if( i_size < 0 || i_size > (SIZE_MAX - HUGE_PAGE_SIZE) /*|| i_size > (SIZE_MAX - NATIVE_ALIGN - sizeof(void **))*/ )
{
x264_log_internal( X264_LOG_ERROR, "invalid size of malloc: %"PRId64"\n", i_size );
return NULL;
}
uint8_t *align_buf = NULL;
#if HAVE_MALLOC_H
#if HAVE_THP
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
/* Attempt to allocate huge pages to reduce TLB misses. */
if( i_size >= HUGE_PAGE_THRESHOLD )
{
......@@ -118,8 +123,6 @@ void *x264_malloc( int i_size )
}
}
else
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
#endif
align_buf = memalign( NATIVE_ALIGN, i_size );
#else
......@@ -132,8 +135,10 @@ void *x264_malloc( int i_size )
}
#endif
if( !align_buf )
x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
x264_log_internal( X264_LOG_ERROR, "malloc of size %"PRId64" failed\n", i_size );
return align_buf;
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
}
/****************************************************************************
......@@ -196,7 +201,7 @@ error:
/****************************************************************************
* x264_picture_init:
****************************************************************************/
static void picture_init( x264_picture_t *pic )
REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
......@@ -204,15 +209,10 @@ static void picture_init( x264_picture_t *pic )
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
void x264_picture_init( x264_picture_t *pic )
{
x264_stack_align( picture_init, pic );
}
/****************************************************************************
* x264_picture_alloc:
****************************************************************************/
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
......@@ -243,16 +243,16 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
picture_init( pic );
x264_picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
int plane_offset[3] = {0};
int frame_size = 0;
int64_t plane_offset[3] = {0};
int64_t frame_size = 0;
for( int i = 0; i < pic->img.i_plane; i++ )
{
int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
pic->img.i_stride[i] = stride;
plane_offset[i] = frame_size;
frame_size += plane_size;
......@@ -265,15 +265,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
return 0;
}
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
static void picture_clean( x264_picture_t *pic )
REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
......@@ -281,15 +276,10 @@ static void picture_clean( x264_picture_t *pic )
memset( pic, 0, sizeof( x264_picture_t ) );
}
void x264_picture_clean( x264_picture_t *pic )
{
x264_stack_align( picture_clean, pic );
}
/****************************************************************************
* x264_param_default:
****************************************************************************/
static void param_default( x264_param_t *param )
REALIGN_STACK void x264_param_default( x264_param_t *param )
{
/* */
memset( param, 0, sizeof( x264_param_t ) );
......@@ -434,11 +424,6 @@ static void param_default( x264_param_t *param )
param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
}
void x264_param_default( x264_param_t *param )
{
x264_stack_align( param_default, param );
}
static int param_apply_preset( x264_param_t *param, const char *preset )
{
char *end;
......@@ -656,9 +641,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
return 0;
}
static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
param_default( param );
x264_param_default( param );
if( preset && param_apply_preset( param, preset ) < 0 )
return -1;
......@@ -667,12 +652,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
return 0;
}
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
return x264_stack_align( param_default_preset, param, preset, tune );
}
static void param_apply_fastfirstpass( x264_param_t *param )
REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
{
/* Set faster options in case of turbo firstpass. */
if( param->rc.b_stat_write && !param->rc.b_stat_read )
......@@ -687,11 +667,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
}
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
{
x264_stack_align( param_apply_fastfirstpass, param );
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
......@@ -709,7 +684,7 @@ static int profile_string_to_int( const char *str )
return -1;
}
static int param_apply_profile( x264_param_t *param, const char *profile )
REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
......@@ -776,11 +751,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
return 0;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
return x264_stack_align( param_apply_profile, param, profile );
}
static int parse_enum( const char *arg, const char * const *names, int *dst )
{
for( int i = 0; names[i]; i++ )
......@@ -842,7 +812,7 @@ static double atof_internal( const char *str, int *b_error )
#define atoi(str) atoi_internal( str, &b_error )
#define atof(str) atof_internal( str, &b_error )
static int param_parse( x264_param_t *p, const char *name, const char *value )
REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
{
char *name_buf = NULL;
int b_error = 0;
......@@ -1343,11 +1313,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
return b_error ? errortype : 0;
}
int x264_param_parse( x264_param_t *param, const char *name, const char *value )
{
return x264_stack_align( param_parse, param, name, value );
}
/****************************************************************************
* x264_param2string:
****************************************************************************/
......
......@@ -47,7 +47,6 @@
#include <string.h>
#include <assert.h>
#include <limits.h>
#include "x264.h"
/****************************************************************************
* Macros
......@@ -256,23 +255,23 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
/****************************************************************************
* General functions
****************************************************************************/
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
void x264_log_internal( int i_level, const char *psz_fmt, ... );
X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
/* x264_malloc : will do or emulate a memalign
* you have to use x264_free for buffers allocated with x264_malloc */
void *x264_malloc( int );
void x264_free( void * );
X264_API void *x264_malloc( int64_t );
X264_API void x264_free( void * );
/* x264_slurp_file: malloc space for the whole file and read it */
char *x264_slurp_file( const char *filename );
X264_API char *x264_slurp_file( const char *filename );
/* x264_param2string: return a (malloced) string containing most of
* the encoding options */
char *x264_param2string( x264_param_t *p, int b_res );
X264_API char *x264_param2string( x264_param_t *p, int b_res );
/****************************************************************************
* Macros
......@@ -297,12 +296,12 @@ do {\
#define PREALLOC_INIT\
int prealloc_idx = 0;\
size_t prealloc_size = 0;\
int64_t prealloc_size = 0;\
uint8_t **preallocs[PREALLOC_BUF_SIZE];
#define PREALLOC( var, size )\
do {\
var = (void*)prealloc_size;\
var = (void*)(intptr_t)prealloc_size;\
preallocs[prealloc_idx++] = (uint8_t**)&var;\
prealloc_size += ALIGN(size, NATIVE_ALIGN);\
} while( 0 )
......
......@@ -45,7 +45,7 @@ static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
#if HAVE_ARMV6
#include "arm/bitstream.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/bitstream.h"
#endif
......@@ -159,7 +159,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
......
......@@ -80,7 +80,7 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#elif defined(ARCH_AARCH64)
#elif HAVE_AARCH64
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
......
......@@ -45,7 +45,7 @@
const x264_cpu_name_t x264_cpu_names[] =
{
#if HAVE_MMX
#if ARCH_X86 || ARCH_X86_64
// {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore
#define MMX2 X264_CPU_MMX|X264_CPU_MMX2
{"MMX2", MMX2},
......@@ -97,7 +97,7 @@ const x264_cpu_name_t x264_cpu_names[] =
{"", 0},
};
#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON)
#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON)
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
......@@ -298,7 +298,7 @@ uint32_t x264_cpu_detect( void )
return cpu;
}
#elif ARCH_PPC && HAVE_ALTIVEC
#elif HAVE_ALTIVEC
#if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
#include <sys/sysctl.h>
......@@ -355,7 +355,7 @@ uint32_t x264_cpu_detect( void )
}
#endif
#elif ARCH_ARM
#elif HAVE_ARMV6
void x264_cpu_neon_test( void );
int x264_cpu_fast_neon_mrc_test( void );
......@@ -363,7 +363,6 @@ int x264_cpu_fast_neon_mrc_test( void );
uint32_t x264_cpu_detect( void )
{
int flags = 0;
#if HAVE_ARMV6
flags |= X264_CPU_ARMV6;
// don't do this hack if compiled with -mfpu=neon
......@@ -396,26 +395,25 @@ uint32_t x264_cpu_detect( void )
flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0;
#endif
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
#endif
return flags;
}
#elif ARCH_AARCH64
#elif HAVE_AARCH64
uint32_t x264_cpu_detect( void )
{
#if HAVE_NEON
return X264_CPU_ARMV8 | X264_CPU_NEON;
#else
return X264_CPU_ARMV8;
#endif
}
#elif ARCH_MIPS
#elif HAVE_MSA
uint32_t x264_cpu_detect( void )
{
uint32_t flags = 0;
#if HAVE_MSA
flags |= X264_CPU_MSA;
#endif
return flags;
return X264_CPU_MSA;
}
#else
......
......@@ -26,8 +26,8 @@
#ifndef X264_CPU_H
#define X264_CPU_H
uint32_t x264_cpu_detect( void );
int x264_cpu_num_processors( void );
X264_API uint32_t x264_cpu_detect( void );
X264_API int x264_cpu_num_processors( void );
void x264_cpu_emms( void );
void x264_cpu_sfence( void );
#if HAVE_MMX
......@@ -46,28 +46,11 @@ void x264_cpu_sfence( void );
#endif
#define x264_sfence x264_cpu_sfence
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
* We need 32 byte alignment for AVX2, so here we make sure that the stack is
* aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* aligning to 32 bytes only works if the compiler supports keeping that
* alignment between functions (osdep.h handles manual alignment of arrays
* if it doesn't).
*/
#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
intptr_t x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
typedef struct
{
const char *name;
uint32_t flags;
} x264_cpu_name_t;
extern const x264_cpu_name_t x264_cpu_names[];
X264_API extern const x264_cpu_name_t x264_cpu_names[];
#endif
......@@ -29,16 +29,16 @@
#if HAVE_MMX
# include "x86/dct.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/dct.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/dct.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/dct.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/dct.h"
#endif
......@@ -682,7 +682,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
dctf->sub4x4_dct = x264_sub4x4_dct_neon;
......@@ -996,11 +996,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#if ARCH_AARCH64
#if HAVE_AARCH64
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon;
pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon;
pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon;
......@@ -1010,9 +1010,9 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon;
pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon;
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
}
#endif // HAVE_ARMV6 || ARCH_AARCH64
#endif // HAVE_ARMV6 || HAVE_AARCH64
#endif // HIGH_BIT_DEPTH
pf_interlaced->interleave_8x8_cavlc =
......@@ -1065,13 +1065,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
#endif // HIGH_BIT_DEPTH
#endif
#if !HIGH_BIT_DEPTH
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_interlaced->interleave_8x8_cavlc =
pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
......
......@@ -667,13 +667,13 @@ void x264_macroblock_deblock( x264_t *h )
#if HAVE_MMX
#include "x86/deblock.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/deblock.h"
#endif
#if HAVE_ARMV6
#include "arm/deblock.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/deblock.h"
#endif
#if HAVE_MSA
......@@ -782,7 +782,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
}
#endif // HAVE_ALTIVEC
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->deblock_luma[1] = x264_deblock_v_luma_neon;
......
......@@ -162,7 +162,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
luma_plane_size *= 4;
......@@ -205,7 +205,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
......@@ -244,7 +244,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
{
for( int i = 0; i < 4; i++ )
......@@ -274,7 +274,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
for( int i = 0; i < 4; i++ )
frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
......
......@@ -261,13 +261,14 @@ void x264_threadslice_cond_broadcast( x264_t *h, int pass );
void x264_threadslice_cond_wait( x264_t *h, int pass );
#define x264_frame_push x264_template(frame_push)
void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_pop x264_template(frame_pop)
x264_frame_t *x264_frame_pop( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
#define x264_frame_unshift x264_template(frame_unshift)
void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_shift x264_template(frame_shift)
x264_frame_t *x264_frame_shift( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
#define x264_frame_push_unused x264_template(frame_push_unused)
void x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
#define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)
......
......@@ -29,16 +29,16 @@
#if HAVE_MMX
#include "x86/mc.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/mc.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
#include "arm/mc.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/mc.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
#include "mips/mc.h"
#endif
......@@ -680,7 +680,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
#if HAVE_ARMV6
x264_mc_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_mc_init_aarch64( cpu, pf );
#endif
#if HAVE_MSA
......
......@@ -131,8 +131,11 @@ static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char
uint8_t *binary = NULL;
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
rewind( fp );
int64_t file_size = ftell( fp );
fseek( fp, 0, SEEK_SET );
if( file_size < 0 || file_size > SIZE_MAX )
goto fail;
size_t size = file_size;
CHECKED_MALLOC( binary, size );
if( fread( binary, 1, size, fp ) != size )
......
......@@ -27,11 +27,6 @@
#include "osdep.h"
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#if SYS_WINDOWS
#include <sys/types.h>
#include <sys/timeb.h>
......@@ -111,114 +106,3 @@ int x264_threading_init( void )
return 0;
}
#endif
#ifdef _WIN32
/* Functions for dealing with Unicode on Windows. */
FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#if !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
#endif
#endif
......@@ -43,6 +43,13 @@
#include <math.h>
#endif
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#include "x264.h"
#if !HAVE_LOG2F
#define log2f(x) (logf(x)/0.693147180559945f)
#define log2(x) (log(x)/0.693147180559945)
......@@ -54,12 +61,6 @@
#define strncasecmp _strnicmp
#define strtok_r strtok_s
#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#if _MSC_VER < 1900
int x264_snprintf( char *s, size_t n, const char *fmt, ... );
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#else
#include <strings.h>
#endif
......@@ -76,14 +77,81 @@ int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define strtok_r(str,delim,save) strtok(str,delim)
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#ifdef _WIN32
#define utf8_to_utf16( utf8, utf16 )\
MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
FILE *x264_fopen( const char *filename, const char *mode );
int x264_rename( const char *oldname, const char *newname );
/* Functions for dealing with Unicode on Windows. */
static inline FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
static inline int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
#define x264_struct_stat struct _stati64
#define x264_fstat _fstati64
int x264_stat( const char *path, x264_struct_stat *buf );
static inline int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#else
#define x264_fopen fopen
#define x264_rename rename
......@@ -93,11 +161,49 @@ int x264_stat( const char *path, x264_struct_stat *buf );
#endif
/* mdate: return the current date in microsecond */
int64_t x264_mdate( void );
X264_API int64_t x264_mdate( void );
#if defined(_WIN32) && !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg );
int x264_is_pipe( const char *path );
static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
static inline int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#else
#define x264_vfprintf vfprintf
#define x264_is_pipe(x) 0
......@@ -163,6 +269,12 @@ int x264_is_pipe( const char *path );
#define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
#endif
#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
#define REALIGN_STACK __attribute__((force_align_arg_pointer))
#else
#define REALIGN_STACK
#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
#define ALWAYS_INLINE __attribute__((always_inline)) inline
......@@ -247,7 +359,7 @@ static inline int x264_pthread_create( x264_pthread_t *t, void *a, void *(*f)(vo
#endif
#if HAVE_WIN32THREAD || PTW32_STATIC_LIB
int x264_threading_init( void );
X264_API int x264_threading_init( void );
#else
#define x264_threading_init() 0
#endif
......
......@@ -31,18 +31,18 @@
# include "x86/pixel.h"
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/pixel.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/pixel.h"
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/pixel.h"
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/pixel.h"
#endif
......@@ -508,7 +508,7 @@ SATD_X_DECL7( _avx512 )
#endif
#if !HIGH_BIT_DEPTH
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
SATD_X_DECL7( _neon )
#endif
#endif // !HIGH_BIT_DEPTH
......@@ -532,7 +532,7 @@ INTRA_MBCMP_8x8(sa8d,, _c )
INTRA_MBCMP_8x8( sad, _mmx2, _c )
INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
#endif
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64)
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64)
INTRA_MBCMP_8x8( sad, _neon, _neon )
INTRA_MBCMP_8x8(sa8d, _neon, _neon )
#endif
......@@ -602,7 +602,7 @@ INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon )
#endif
#if !HIGH_BIT_DEPTH && ARCH_AARCH64
#if !HIGH_BIT_DEPTH && HAVE_AARCH64
INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon )
......@@ -1434,7 +1434,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
INIT8( sad, _neon );
......@@ -1475,7 +1475,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon;
pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_MSA
if( cpu&X264_CPU_MSA )
......
......@@ -34,16 +34,16 @@
#if HAVE_MMX
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/predict.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/predict.h"
#endif
......@@ -906,7 +906,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
x264_predict_16x16_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_16x16_init_aarch64( cpu, pf );
#endif
......@@ -949,7 +949,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x8c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8c_init_aarch64( cpu, pf );
#endif
......@@ -981,7 +981,7 @@ void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x16c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x16c_init_aarch64( cpu, pf );
#endif
}
......@@ -1010,7 +1010,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_
x264_predict_8x8_init_arm( cpu, pf, predict_filter );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
#endif
......@@ -1047,7 +1047,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
x264_predict_4x4_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_4x4_init_aarch64( cpu, pf );
#endif
}
......
......@@ -31,16 +31,16 @@
#if HAVE_MMX
#include "x86/quant.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/quant.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/quant.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/quant.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/quant.h"
#endif
......@@ -756,7 +756,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->coeff_last8 = x264_coeff_last8_arm;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->quant_2x2_dc = x264_quant_2x2_dc_neon;
......@@ -776,7 +776,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score64 = x264_decimate_score64_neon;
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_ARMV8 )
{
pf->coeff_last4 = x264_coeff_last4_aarch64;
......
......@@ -33,7 +33,7 @@ typedef struct
uint8_t i_size;
} vlc_t;
extern const x264_level_t x264_levels[];
X264_API extern const x264_level_t x264_levels[];
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
......
......@@ -47,7 +47,7 @@ struct x264_threadpool_t
x264_sync_frame_list_t done; /* list of jobs that have finished processing */
};
static void *threadpool_thread_internal( x264_threadpool_t *pool )
REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
{
if( pool->init_func )
pool->init_func( pool->init_arg );
......@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
return NULL;
}
static void *threadpool_thread( x264_threadpool_t *pool )
{
return (void*)x264_stack_align( threadpool_thread_internal, pool );
}
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg )
{
......