Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • videolan/x264
  • EwoutH/x264
  • gramner/x264
  • BugMaster/x264
  • MaskRay/x264
  • thresh/x264
  • tpm/x264
  • wolfired/x264
  • ifb/x264
  • robinstorm/x264
  • ltnokiago/x264
  • janne/x264
  • Kromjunya/x264
  • trisnaayu0596/x264
  • felipegarcia1402/x264
  • coder2004/x264
  • philou/x264
  • walagnatalia/x264
  • DonDiego/x264
  • JHammler/x264
  • qyot27/x264
  • dwbuiten/x264
  • Kagami/x264
  • andriy-andreyev/x264
  • gxw/x264
  • trofi/x264
  • kierank/x264
  • aureliendavid/x264
  • galad/x264
  • roommini/x264
  • ocrete/x264
  • mstorsjo/x264
  • yinsj0116/x264
  • mamonet/x264
  • 1div0/x264
  • ko1265/x264
  • sergiomb2/x264
  • xutongda/x264
  • wenzhiwu/x264
  • arrowd/x264
  • FranceBB/x264
  • ziemek99/x264
  • longervision/x264
  • xopok/x264
  • jbk/x264
  • szatmary/x264
  • pekdon/x264
  • Jiangguyu/x264
  • jrtc27/x264
  • kankanol1/x264
  • gxwLite/x264
  • brad/x264
  • Gc6026/x264
  • jdek/x264
  • appcrash/x264
  • tguillem/x264
  • As/x264
  • wevian/x264
  • wangluls/x264
  • RellikJaeger/x264
  • hum/x264
  • rogerhardiman/x264
  • jankowalski12611/x264
  • zhijie1996/x264
  • yinshiyou/x264
  • Freed-Wu/x264
  • yajcoca/x264
  • bUd/x264
  • chienvannguyen2020/x264
  • nurbinakhatun386/x264
  • Siberiawind/x-264-meson
  • HecaiYuan/x264
  • david.chen/x264
  • Ytsejam76/x264
  • robUx4/x264
  • zhaoshiz/x-264-arm64ec
  • yintong.ustc/x-264-bd-ventana
  • nekobasu/x264
  • Courmisch/x264
  • BD-qjy/x264
  • quink/x264
  • markos/x264
82 results
Show changes
Commits on Source (2)
  • Martin Storsjö's avatar
    configure: Check for support for AArch64 SVE and SVE2 · db9bc75b
    Martin Storsjö authored
    We don't expect the user to build the whole x264 codebase with
    SVE/SVE2 enabled, as we only enable this feature for the assembly
    files that use it, in order to have binaries that are portable
    and enable the SVE codepaths at runtime if supported.
    db9bc75b
  • Martin Storsjö's avatar
    Add cpu flags and runtime detection of SVE and SVE2 · 9c3c7168
    Martin Storsjö authored
    We could also use HWCAP_SVE and HWCAP2_SVE2 for detecting this,
    but these might not be available in all userland headers, while
    HWCAP_CPUID is available much earlier.
    
    The register ID_AA64ZFR0_EL1, which indicates if SVE2 is available,
    can only be accessed if SVE is available. If not building all the
    C code with SVE enabled (which could make it impossible to run on
    on HW without SVE), binutils refuses to assemble an instruction
    reading ID_AA64ZFR0_EL1 - but if referring to it with the technical
    name S3_0_C0_C4_4, it can be assembled even without any extra
    extensions enabled.
    9c3c7168
......@@ -96,6 +96,8 @@ const x264_cpu_name_t x264_cpu_names[] =
#elif ARCH_AARCH64
{"ARMv8", X264_CPU_ARMV8},
{"NEON", X264_CPU_NEON},
{"SVE", X264_CPU_SVE},
{"SVE2", X264_CPU_SVE2},
#elif ARCH_MIPS
{"MSA", X264_CPU_MSA},
#elif ARCH_LOONGARCH
......@@ -418,13 +420,62 @@ uint32_t x264_cpu_detect( void )
#elif HAVE_AARCH64
#ifdef __linux__
#include <sys/auxv.h>
#define get_cpu_feature_reg( reg, val ) \
__asm__( "mrs %0, " #reg : "=r" ( val ) )
static uint32_t detect_flags( void )
{
uint32_t flags = 0;
#if defined( AT_HWCAP ) && defined( HWCAP_CPUID )
unsigned long hwcap = getauxval( AT_HWCAP );
if ( hwcap & HWCAP_CPUID ) {
// We could check for support directly with HWCAP_SVE and HWCAP2_SVE2,
// but those were added into headers much later. By using direct
// register access, we can detect these features even if compiled with
// slightly older userland headers.
// https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html
uint64_t tmp;
get_cpu_feature_reg( ID_AA64PFR0_EL1, tmp );
if ( ( ( tmp >> 32 ) & 0xf ) == 0x1 ) {
flags |= X264_CPU_SVE;
get_cpu_feature_reg( S3_0_C0_C4_4, tmp ); // ID_AA64ZFR0_EL1
if ( ( ( tmp >> 0 ) & 0xf ) == 0x1 )
flags |= X264_CPU_SVE2;
}
}
#endif
return flags;
}
#endif
uint32_t x264_cpu_detect( void )
{
uint32_t flags = X264_CPU_ARMV8;
#if HAVE_NEON
return X264_CPU_ARMV8 | X264_CPU_NEON;
#else
return X264_CPU_ARMV8;
flags |= X264_CPU_NEON;
#endif
// If these features are enabled unconditionally in the compiler, we can
// assume that they are available.
#ifdef __ARM_FEATURE_SVE
flags |= X264_CPU_SVE;
#endif
#ifdef __ARM_FEATURE_SVE2
flags |= X264_CPU_SVE2;
#endif
// Where possible, try to do runtime detection as well.
#ifdef __linux__
flags |= detect_flags();
#endif
return flags;
}
#elif HAVE_MSA
......
......@@ -411,7 +411,8 @@ NL="
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10 \
SVE SVE2"
# parse options
......@@ -1003,6 +1004,8 @@ if [ $asm = auto -a $ARCH = AARCH64 ] ; then
elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then
define HAVE_AARCH64
define HAVE_NEON
cc_check '' '' '__asm__(".arch armv8.2-a+sve \n ptrue p0.b, vl16");' && define HAVE_SVE
cc_check '' '' '__asm__(".arch armv8.2-a+sve2 \n smlalb z10.s, z2.h, z1.h");' && define HAVE_SVE2
ASFLAGS="$ASFLAGS -c"
else
echo "no NEON support, try adding -mfpu=neon to CFLAGS"
......
......@@ -214,6 +214,8 @@ static void print_bench(void)
b->cpu&X264_CPU_NEON ? "neon" :
b->cpu&X264_CPU_ARMV6 ? "armv6" :
#elif ARCH_AARCH64
b->cpu&X264_CPU_SVE2 ? "sve2" :
b->cpu&X264_CPU_SVE ? "sve" :
b->cpu&X264_CPU_NEON ? "neon" :
b->cpu&X264_CPU_ARMV8 ? "armv8" :
#elif ARCH_MIPS
......@@ -2979,6 +2981,10 @@ static int check_all_flags( void )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_ARMV8, "ARMv8" );
if( cpu_detect & X264_CPU_NEON )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_NEON, "NEON" );
if( cpu_detect & X264_CPU_SVE )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SVE, "SVE" );
if( cpu_detect & X264_CPU_SVE2 )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SVE2, "SVE2" );
#elif ARCH_MIPS
if( cpu_detect & X264_CPU_MSA )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_MSA, "MSA" );
......
......@@ -177,6 +177,8 @@ typedef struct x264_nal_t
#define X264_CPU_NEON 0x0000002U /* ARM NEON */
#define X264_CPU_FAST_NEON_MRC 0x0000004U /* Transfer from NEON to ARM register is fast (Cortex-A9) */
#define X264_CPU_ARMV8 0x0000008U
#define X264_CPU_SVE 0x0000010U /* AArch64 SVE */
#define X264_CPU_SVE2 0x0000020U /* AArch64 SVE2 */
/* MIPS */
#define X264_CPU_MSA 0x0000001U /* MIPS MSA */
......