Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • videolan/x264
  • EwoutH/x264
  • gramner/x264
  • BugMaster/x264
  • MaskRay/x264
  • thresh/x264
  • tpm/x264
  • wolfired/x264
  • ifb/x264
  • robinstorm/x264
  • ltnokiago/x264
  • janne/x264
  • Kromjunya/x264
  • trisnaayu0596/x264
  • felipegarcia1402/x264
  • coder2004/x264
  • philou/x264
  • walagnatalia/x264
  • DonDiego/x264
  • JHammler/x264
  • qyot27/x264
  • dwbuiten/x264
  • Kagami/x264
  • andriy-andreyev/x264
  • gxw/x264
  • trofi/x264
  • kierank/x264
  • aureliendavid/x264
  • galad/x264
  • roommini/x264
  • ocrete/x264
  • mstorsjo/x264
  • yinsj0116/x264
  • mamonet/x264
  • 1div0/x264
  • ko1265/x264
  • sergiomb2/x264
  • xutongda/x264
  • wenzhiwu/x264
  • arrowd/x264
  • FranceBB/x264
  • ziemek99/x264
  • longervision/x264
  • xopok/x264
  • jbk/x264
  • szatmary/x264
  • pekdon/x264
  • Jiangguyu/x264
  • jrtc27/x264
  • kankanol1/x264
  • gxwLite/x264
  • brad/x264
  • Gc6026/x264
  • jdek/x264
  • appcrash/x264
  • tguillem/x264
  • As/x264
  • wevian/x264
  • wangluls/x264
  • RellikJaeger/x264
  • hum/x264
  • rogerhardiman/x264
  • jankowalski12611/x264
  • zhijie1996/x264
  • yinshiyou/x264
  • Freed-Wu/x264
  • yajcoca/x264
  • bUd/x264
  • chienvannguyen2020/x264
  • nurbinakhatun386/x264
  • Siberiawind/x-264-meson
  • HecaiYuan/x264
  • david.chen/x264
  • Ytsejam76/x264
  • robUx4/x264
  • zhaoshiz/x-264-arm64ec
  • yintong.ustc/x-264-bd-ventana
  • nekobasu/x264
  • Courmisch/x264
  • BD-qjy/x264
  • quink/x264
  • markos/x264
82 results
Show changes
Commits on Source (11)
......@@ -8,6 +8,9 @@ vpath %.S $(SRCPATH)
vpath %.asm $(SRCPATH)
vpath %.rc $(SRCPATH)
CFLAGS += $(CFLAGSPROF)
LDFLAGS += $(LDFLAGSPROF)
GENERATED =
all: default
......@@ -246,6 +249,8 @@ $(LIBX264): $(GENERATED) .depend $(OBJS) $(OBJASM)
$(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
$(IMPLIBNAME): $(SONAME)
ifneq ($(EXE),)
.PHONY: x264 checkasm8 checkasm10 example
x264: x264$(EXE)
......@@ -266,6 +271,9 @@ checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
$(OBJCLI): CFLAGS += $(CFLAGSCLI)
$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
%.o: %.c
......@@ -336,7 +344,7 @@ ifneq ($(wildcard .depend),)
include .depend
endif
OBJPROF = $(OBJS) $(OBJCLI)
OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI)
# These should cover most of the important codepaths
OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50
......@@ -354,7 +362,7 @@ fprofiled:
@echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.'
else
fprofiled: clean
$(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
$(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)"
$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
ifeq ($(COMPILER),CL)
# Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted
......@@ -362,7 +370,7 @@ ifeq ($(COMPILER),CL)
else
rm -f $(OBJPROF)
endif
$(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
$(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)"
rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
endif
......
......@@ -99,13 +99,18 @@ void x264_log_internal( int i_level, const char *psz_fmt, ... )
/****************************************************************************
* x264_malloc:
****************************************************************************/
void *x264_malloc( int i_size )
void *x264_malloc( int64_t i_size )
{
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
if( i_size < 0 || i_size > (SIZE_MAX - HUGE_PAGE_SIZE) /*|| i_size > (SIZE_MAX - NATIVE_ALIGN - sizeof(void **))*/ )
{
x264_log_internal( X264_LOG_ERROR, "invalid size of malloc: %"PRId64"\n", i_size );
return NULL;
}
uint8_t *align_buf = NULL;
#if HAVE_MALLOC_H
#if HAVE_THP
#define HUGE_PAGE_SIZE 2*1024*1024
#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
/* Attempt to allocate huge pages to reduce TLB misses. */
if( i_size >= HUGE_PAGE_THRESHOLD )
{
......@@ -118,8 +123,6 @@ void *x264_malloc( int i_size )
}
}
else
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
#endif
align_buf = memalign( NATIVE_ALIGN, i_size );
#else
......@@ -132,8 +135,10 @@ void *x264_malloc( int i_size )
}
#endif
if( !align_buf )
x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
x264_log_internal( X264_LOG_ERROR, "malloc of size %"PRId64" failed\n", i_size );
return align_buf;
#undef HUGE_PAGE_SIZE
#undef HUGE_PAGE_THRESHOLD
}
/****************************************************************************
......@@ -196,7 +201,7 @@ error:
/****************************************************************************
* x264_picture_init:
****************************************************************************/
static void picture_init( x264_picture_t *pic )
REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
......@@ -204,15 +209,10 @@ static void picture_init( x264_picture_t *pic )
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
void x264_picture_init( x264_picture_t *pic )
{
x264_stack_align( picture_init, pic );
}
/****************************************************************************
* x264_picture_alloc:
****************************************************************************/
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
......@@ -243,16 +243,16 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
picture_init( pic );
x264_picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
int plane_offset[3] = {0};
int frame_size = 0;
int64_t plane_offset[3] = {0};
int64_t frame_size = 0;
for( int i = 0; i < pic->img.i_plane; i++ )
{
int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
pic->img.i_stride[i] = stride;
plane_offset[i] = frame_size;
frame_size += plane_size;
......@@ -265,15 +265,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
return 0;
}
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
static void picture_clean( x264_picture_t *pic )
REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
......@@ -281,15 +276,10 @@ static void picture_clean( x264_picture_t *pic )
memset( pic, 0, sizeof( x264_picture_t ) );
}
void x264_picture_clean( x264_picture_t *pic )
{
x264_stack_align( picture_clean, pic );
}
/****************************************************************************
* x264_param_default:
****************************************************************************/
static void param_default( x264_param_t *param )
REALIGN_STACK void x264_param_default( x264_param_t *param )
{
/* */
memset( param, 0, sizeof( x264_param_t ) );
......@@ -434,11 +424,6 @@ static void param_default( x264_param_t *param )
param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
}
void x264_param_default( x264_param_t *param )
{
x264_stack_align( param_default, param );
}
static int param_apply_preset( x264_param_t *param, const char *preset )
{
char *end;
......@@ -656,9 +641,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
return 0;
}
static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
param_default( param );
x264_param_default( param );
if( preset && param_apply_preset( param, preset ) < 0 )
return -1;
......@@ -667,12 +652,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
return 0;
}
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
return x264_stack_align( param_default_preset, param, preset, tune );
}
static void param_apply_fastfirstpass( x264_param_t *param )
REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
{
/* Set faster options in case of turbo firstpass. */
if( param->rc.b_stat_write && !param->rc.b_stat_read )
......@@ -687,11 +667,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
}
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
{
x264_stack_align( param_apply_fastfirstpass, param );
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
......@@ -709,7 +684,7 @@ static int profile_string_to_int( const char *str )
return -1;
}
static int param_apply_profile( x264_param_t *param, const char *profile )
REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
......@@ -776,11 +751,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
return 0;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
return x264_stack_align( param_apply_profile, param, profile );
}
static int parse_enum( const char *arg, const char * const *names, int *dst )
{
for( int i = 0; names[i]; i++ )
......@@ -842,7 +812,7 @@ static double atof_internal( const char *str, int *b_error )
#define atoi(str) atoi_internal( str, &b_error )
#define atof(str) atof_internal( str, &b_error )
static int param_parse( x264_param_t *p, const char *name, const char *value )
REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
{
char *name_buf = NULL;
int b_error = 0;
......@@ -1343,11 +1313,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
return b_error ? errortype : 0;
}
int x264_param_parse( x264_param_t *param, const char *name, const char *value )
{
return x264_stack_align( param_parse, param, name, value );
}
/****************************************************************************
* x264_param2string:
****************************************************************************/
......
......@@ -47,7 +47,6 @@
#include <string.h>
#include <assert.h>
#include <limits.h>
#include "x264.h"
/****************************************************************************
* Macros
......@@ -256,23 +255,23 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
/****************************************************************************
* General functions
****************************************************************************/
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
void x264_log_internal( int i_level, const char *psz_fmt, ... );
X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
/* x264_malloc : will do or emulate a memalign
* you have to use x264_free for buffers allocated with x264_malloc */
void *x264_malloc( int );
void x264_free( void * );
X264_API void *x264_malloc( int64_t );
X264_API void x264_free( void * );
/* x264_slurp_file: malloc space for the whole file and read it */
char *x264_slurp_file( const char *filename );
X264_API char *x264_slurp_file( const char *filename );
/* x264_param2string: return a (malloced) string containing most of
* the encoding options */
char *x264_param2string( x264_param_t *p, int b_res );
X264_API char *x264_param2string( x264_param_t *p, int b_res );
/****************************************************************************
* Macros
......@@ -297,12 +296,12 @@ do {\
#define PREALLOC_INIT\
int prealloc_idx = 0;\
size_t prealloc_size = 0;\
int64_t prealloc_size = 0;\
uint8_t **preallocs[PREALLOC_BUF_SIZE];
#define PREALLOC( var, size )\
do {\
var = (void*)prealloc_size;\
var = (void*)(intptr_t)prealloc_size;\
preallocs[prealloc_idx++] = (uint8_t**)&var;\
prealloc_size += ALIGN(size, NATIVE_ALIGN);\
} while( 0 )
......
......@@ -45,7 +45,7 @@ static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
#if HAVE_ARMV6
#include "arm/bitstream.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/bitstream.h"
#endif
......@@ -159,7 +159,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
#endif
......
......@@ -80,7 +80,7 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#elif defined(ARCH_AARCH64)
#elif HAVE_AARCH64
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
......
......@@ -45,7 +45,7 @@
const x264_cpu_name_t x264_cpu_names[] =
{
#if HAVE_MMX
#if ARCH_X86 || ARCH_X86_64
// {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore
#define MMX2 X264_CPU_MMX|X264_CPU_MMX2
{"MMX2", MMX2},
......@@ -97,7 +97,7 @@ const x264_cpu_name_t x264_cpu_names[] =
{"", 0},
};
#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON)
#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON)
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
......@@ -298,7 +298,7 @@ uint32_t x264_cpu_detect( void )
return cpu;
}
#elif ARCH_PPC && HAVE_ALTIVEC
#elif HAVE_ALTIVEC
#if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
#include <sys/sysctl.h>
......@@ -355,7 +355,7 @@ uint32_t x264_cpu_detect( void )
}
#endif
#elif ARCH_ARM
#elif HAVE_ARMV6
void x264_cpu_neon_test( void );
int x264_cpu_fast_neon_mrc_test( void );
......@@ -363,7 +363,6 @@ int x264_cpu_fast_neon_mrc_test( void );
uint32_t x264_cpu_detect( void )
{
int flags = 0;
#if HAVE_ARMV6
flags |= X264_CPU_ARMV6;
// don't do this hack if compiled with -mfpu=neon
......@@ -396,26 +395,25 @@ uint32_t x264_cpu_detect( void )
flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0;
#endif
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
#endif
return flags;
}
#elif ARCH_AARCH64
#elif HAVE_AARCH64
uint32_t x264_cpu_detect( void )
{
#if HAVE_NEON
return X264_CPU_ARMV8 | X264_CPU_NEON;
#else
return X264_CPU_ARMV8;
#endif
}
#elif ARCH_MIPS
#elif HAVE_MSA
uint32_t x264_cpu_detect( void )
{
uint32_t flags = 0;
#if HAVE_MSA
flags |= X264_CPU_MSA;
#endif
return flags;
return X264_CPU_MSA;
}
#else
......
......@@ -26,8 +26,8 @@
#ifndef X264_CPU_H
#define X264_CPU_H
uint32_t x264_cpu_detect( void );
int x264_cpu_num_processors( void );
X264_API uint32_t x264_cpu_detect( void );
X264_API int x264_cpu_num_processors( void );
void x264_cpu_emms( void );
void x264_cpu_sfence( void );
#if HAVE_MMX
......@@ -46,28 +46,11 @@ void x264_cpu_sfence( void );
#endif
#define x264_sfence x264_cpu_sfence
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
* We need 32 byte alignment for AVX2, so here we make sure that the stack is
* aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* aligning to 32 bytes only works if the compiler supports keeping that
* alignment between functions (osdep.h handles manual alignment of arrays
* if it doesn't).
*/
#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
intptr_t x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
typedef struct
{
const char *name;
uint32_t flags;
} x264_cpu_name_t;
extern const x264_cpu_name_t x264_cpu_names[];
X264_API extern const x264_cpu_name_t x264_cpu_names[];
#endif
......@@ -29,16 +29,16 @@
#if HAVE_MMX
# include "x86/dct.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/dct.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/dct.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/dct.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/dct.h"
#endif
......@@ -682,7 +682,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
dctf->sub4x4_dct = x264_sub4x4_dct_neon;
......@@ -996,11 +996,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#if ARCH_AARCH64
#if HAVE_AARCH64
pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon;
pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon;
pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon;
......@@ -1010,9 +1010,9 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon;
pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon;
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
}
#endif // HAVE_ARMV6 || ARCH_AARCH64
#endif // HAVE_ARMV6 || HAVE_AARCH64
#endif // HIGH_BIT_DEPTH
pf_interlaced->interleave_8x8_cavlc =
......@@ -1065,13 +1065,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
#endif // HIGH_BIT_DEPTH
#endif
#if !HIGH_BIT_DEPTH
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf_interlaced->interleave_8x8_cavlc =
pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
......
......@@ -667,13 +667,13 @@ void x264_macroblock_deblock( x264_t *h )
#if HAVE_MMX
#include "x86/deblock.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/deblock.h"
#endif
#if HAVE_ARMV6
#include "arm/deblock.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/deblock.h"
#endif
#if HAVE_MSA
......@@ -782,7 +782,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
}
#endif // HAVE_ALTIVEC
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->deblock_luma[1] = x264_deblock_v_luma_neon;
......
......@@ -162,7 +162,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
luma_plane_size *= 4;
......@@ -205,7 +205,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
......@@ -244,7 +244,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
for( int p = 0; p < luma_plane_count; p++ )
{
int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
if( h->param.analyse.i_subpel_refine && b_fdec )
{
for( int i = 0; i < 4; i++ )
......@@ -274,7 +274,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
{
if( h->frames.b_have_lowres )
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
for( int i = 0; i < 4; i++ )
frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
......
......@@ -261,13 +261,14 @@ void x264_threadslice_cond_broadcast( x264_t *h, int pass );
void x264_threadslice_cond_wait( x264_t *h, int pass );
#define x264_frame_push x264_template(frame_push)
void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_pop x264_template(frame_pop)
x264_frame_t *x264_frame_pop( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
#define x264_frame_unshift x264_template(frame_unshift)
void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
X264_API void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
#define x264_frame_shift x264_template(frame_shift)
x264_frame_t *x264_frame_shift( x264_frame_t **list );
X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
#define x264_frame_push_unused x264_template(frame_push_unused)
void x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
#define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)
......
......@@ -29,16 +29,16 @@
#if HAVE_MMX
#include "x86/mc.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
#include "ppc/mc.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
#include "arm/mc.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
#include "aarch64/mc.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
#include "mips/mc.h"
#endif
......@@ -680,7 +680,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
#if HAVE_ARMV6
x264_mc_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_mc_init_aarch64( cpu, pf );
#endif
#if HAVE_MSA
......
......@@ -131,8 +131,11 @@ static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char
uint8_t *binary = NULL;
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
rewind( fp );
int64_t file_size = ftell( fp );
fseek( fp, 0, SEEK_SET );
if( file_size < 0 || file_size > SIZE_MAX )
goto fail;
size_t size = file_size;
CHECKED_MALLOC( binary, size );
if( fread( binary, 1, size, fp ) != size )
......
......@@ -27,11 +27,6 @@
#include "osdep.h"
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#if SYS_WINDOWS
#include <sys/types.h>
#include <sys/timeb.h>
......@@ -111,114 +106,3 @@ int x264_threading_init( void )
return 0;
}
#endif
#ifdef _WIN32
/* Functions for dealing with Unicode on Windows. */
FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#if !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
#endif
#endif
......@@ -43,6 +43,13 @@
#include <math.h>
#endif
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#endif
#include "x264.h"
#if !HAVE_LOG2F
#define log2f(x) (logf(x)/0.693147180559945f)
#define log2(x) (log(x)/0.693147180559945)
......@@ -54,12 +61,6 @@
#define strncasecmp _strnicmp
#define strtok_r strtok_s
#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#if _MSC_VER < 1900
int x264_snprintf( char *s, size_t n, const char *fmt, ... );
int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#else
#include <strings.h>
#endif
......@@ -76,14 +77,81 @@ int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
#define strtok_r(str,delim,save) strtok(str,delim)
#endif
#if defined(_MSC_VER) && _MSC_VER < 1900
/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
{
int length = -1;
if( n )
{
va_list arg2;
va_copy( arg2, arg );
length = _vsnprintf( s, n, fmt, arg2 );
va_end( arg2 );
/* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
if( length < 0 || length >= n )
s[n-1] = '\0';
}
/* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
if( length < 0 )
return _vscprintf( fmt, arg );
return length;
}
static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
{
va_list arg;
va_start( arg, fmt );
int length = x264_vsnprintf( s, n, fmt, arg );
va_end( arg );
return length;
}
#define snprintf x264_snprintf
#define vsnprintf x264_vsnprintf
#endif
#ifdef _WIN32
#define utf8_to_utf16( utf8, utf16 )\
MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
FILE *x264_fopen( const char *filename, const char *mode );
int x264_rename( const char *oldname, const char *newname );
/* Functions for dealing with Unicode on Windows. */
static inline FILE *x264_fopen( const char *filename, const char *mode )
{
wchar_t filename_utf16[MAX_PATH];
wchar_t mode_utf16[16];
if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
return _wfopen( filename_utf16, mode_utf16 );
return NULL;
}
static inline int x264_rename( const char *oldname, const char *newname )
{
wchar_t oldname_utf16[MAX_PATH];
wchar_t newname_utf16[MAX_PATH];
if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
{
/* POSIX says that rename() removes the destination, but Win32 doesn't. */
_wunlink( newname_utf16 );
return _wrename( oldname_utf16, newname_utf16 );
}
return -1;
}
#define x264_struct_stat struct _stati64
#define x264_fstat _fstati64
int x264_stat( const char *path, x264_struct_stat *buf );
static inline int x264_stat( const char *path, x264_struct_stat *buf )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return _wstati64( path_utf16, buf );
return -1;
}
#else
#define x264_fopen fopen
#define x264_rename rename
......@@ -93,11 +161,49 @@ int x264_stat( const char *path, x264_struct_stat *buf );
#endif
/* mdate: return the current date in microsecond */
int64_t x264_mdate( void );
X264_API int64_t x264_mdate( void );
#if defined(_WIN32) && !HAVE_WINRT
int x264_vfprintf( FILE *stream, const char *format, va_list arg );
int x264_is_pipe( const char *path );
static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
{
HANDLE console = NULL;
DWORD mode;
if( stream == stdout )
console = GetStdHandle( STD_OUTPUT_HANDLE );
else if( stream == stderr )
console = GetStdHandle( STD_ERROR_HANDLE );
/* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
if( GetConsoleMode( console, &mode ) )
{
char buf[4096];
wchar_t buf_utf16[4096];
va_list arg2;
va_copy( arg2, arg );
int length = vsnprintf( buf, sizeof(buf), format, arg2 );
va_end( arg2 );
if( length > 0 && length < sizeof(buf) )
{
/* WriteConsoleW is the most reliable way to output Unicode to a console. */
int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
DWORD written;
WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
return length;
}
}
return vfprintf( stream, format, arg );
}
static inline int x264_is_pipe( const char *path )
{
wchar_t path_utf16[MAX_PATH];
if( utf8_to_utf16( path, path_utf16 ) )
return WaitNamedPipeW( path_utf16, 0 );
return 0;
}
#else
#define x264_vfprintf vfprintf
#define x264_is_pipe(x) 0
......@@ -163,6 +269,12 @@ int x264_is_pipe( const char *path );
#define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
#endif
#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
#define REALIGN_STACK __attribute__((force_align_arg_pointer))
#else
#define REALIGN_STACK
#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
#define ALWAYS_INLINE __attribute__((always_inline)) inline
......@@ -247,7 +359,7 @@ static inline int x264_pthread_create( x264_pthread_t *t, void *a, void *(*f)(vo
#endif
#if HAVE_WIN32THREAD || PTW32_STATIC_LIB
int x264_threading_init( void );
X264_API int x264_threading_init( void );
#else
#define x264_threading_init() 0
#endif
......
......@@ -31,18 +31,18 @@
# include "x86/pixel.h"
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/pixel.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/pixel.h"
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/pixel.h"
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/pixel.h"
#endif
......@@ -508,7 +508,7 @@ SATD_X_DECL7( _avx512 )
#endif
#if !HIGH_BIT_DEPTH
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
SATD_X_DECL7( _neon )
#endif
#endif // !HIGH_BIT_DEPTH
......@@ -532,7 +532,7 @@ INTRA_MBCMP_8x8(sa8d,, _c )
INTRA_MBCMP_8x8( sad, _mmx2, _c )
INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
#endif
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64)
#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64)
INTRA_MBCMP_8x8( sad, _neon, _neon )
INTRA_MBCMP_8x8(sa8d, _neon, _neon )
#endif
......@@ -602,7 +602,7 @@ INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon )
#endif
#if !HIGH_BIT_DEPTH && ARCH_AARCH64
#if !HIGH_BIT_DEPTH && HAVE_AARCH64
INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon )
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon )
......@@ -1434,7 +1434,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
INIT8( sad, _neon );
......@@ -1475,7 +1475,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon;
pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
}
#endif // ARCH_AARCH64
#endif // HAVE_AARCH64
#if HAVE_MSA
if( cpu&X264_CPU_MSA )
......
......@@ -34,16 +34,16 @@
#if HAVE_MMX
# include "x86/predict.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/predict.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/predict.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/predict.h"
#endif
......@@ -906,7 +906,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
x264_predict_16x16_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_16x16_init_aarch64( cpu, pf );
#endif
......@@ -949,7 +949,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x8c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8c_init_aarch64( cpu, pf );
#endif
......@@ -981,7 +981,7 @@ void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] )
x264_predict_8x16c_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x16c_init_aarch64( cpu, pf );
#endif
}
......@@ -1010,7 +1010,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_
x264_predict_8x8_init_arm( cpu, pf, predict_filter );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
#endif
......@@ -1047,7 +1047,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
x264_predict_4x4_init_arm( cpu, pf );
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
x264_predict_4x4_init_aarch64( cpu, pf );
#endif
}
......
......@@ -31,16 +31,16 @@
#if HAVE_MMX
#include "x86/quant.h"
#endif
#if ARCH_PPC
#if HAVE_ALTIVEC
# include "ppc/quant.h"
#endif
#if ARCH_ARM
#if HAVE_ARMV6
# include "arm/quant.h"
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
# include "aarch64/quant.h"
#endif
#if ARCH_MIPS
#if HAVE_MSA
# include "mips/quant.h"
#endif
......@@ -756,7 +756,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->coeff_last8 = x264_coeff_last8_arm;
}
#endif
#if HAVE_ARMV6 || ARCH_AARCH64
#if HAVE_ARMV6 || HAVE_AARCH64
if( cpu&X264_CPU_NEON )
{
pf->quant_2x2_dc = x264_quant_2x2_dc_neon;
......@@ -776,7 +776,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score64 = x264_decimate_score64_neon;
}
#endif
#if ARCH_AARCH64
#if HAVE_AARCH64
if( cpu&X264_CPU_ARMV8 )
{
pf->coeff_last4 = x264_coeff_last4_aarch64;
......
......@@ -33,7 +33,7 @@ typedef struct
uint8_t i_size;
} vlc_t;
extern const x264_level_t x264_levels[];
X264_API extern const x264_level_t x264_levels[];
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
......
......@@ -47,7 +47,7 @@ struct x264_threadpool_t
x264_sync_frame_list_t done; /* list of jobs that have finished processing */
};
static void *threadpool_thread_internal( x264_threadpool_t *pool )
REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
{
if( pool->init_func )
pool->init_func( pool->init_arg );
......@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
return NULL;
}
static void *threadpool_thread( x264_threadpool_t *pool )
{
return (void*)x264_stack_align( threadpool_thread_internal, pool );
}
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg )
{
......