Commit cc002bd5 authored by Rong Yan's avatar Rong Yan Committed by Henrik Gramner

ppc: Add little-endian PowerPC support

parent 145f3a62
......@@ -264,7 +264,7 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix
vec_u8_t lv = vec_ld(0, dest); \
vec_u8_t dstv = vec_perm(lv, zero_u8v, (vec_u8_t)perm_ldv); \
vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \
vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \
vec_u16_t dst16 = vec_u8_to_u16_h(dstv); \
vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \
vec_u8_t idstsum8 = vec_s16_to_u8(idstsum); \
/* unaligned store */ \
......@@ -384,7 +384,7 @@ void x264_add16x16_idct_altivec( uint8_t *p_dst, int16_t dct[16][16] )
vec_u8_t lv = vec_ld( 7, dest ); \
vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \
vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \
vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \
vec_u16_t dst16 = vec_u8_to_u16_h(dstv); \
vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \
vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \
/* unaligned store */ \
......
This diff is collapsed.
......@@ -81,10 +81,17 @@ typedef union {
/***********************************************************************
* 8 <-> 16 bits conversions
**********************************************************************/
#ifdef WORDS_BIGENDIAN
#define vec_u8_to_u16_h(v) (vec_u16_t) vec_mergeh( zero_u8v, (vec_u8_t) v )
#define vec_u8_to_u16_l(v) (vec_u16_t) vec_mergel( zero_u8v, (vec_u8_t) v )
#define vec_u8_to_s16_h(v) (vec_s16_t) vec_mergeh( zero_u8v, (vec_u8_t) v )
#define vec_u8_to_s16_l(v) (vec_s16_t) vec_mergel( zero_u8v, (vec_u8_t) v )
#else
#define vec_u8_to_u16_h(v) (vec_u16_t) vec_mergeh( (vec_u8_t) v, zero_u8v )
#define vec_u8_to_u16_l(v) (vec_u16_t) vec_mergel( (vec_u8_t) v, zero_u8v )
#define vec_u8_to_s16_h(v) (vec_s16_t) vec_mergeh( (vec_u8_t) v, zero_u8v )
#define vec_u8_to_s16_l(v) (vec_s16_t) vec_mergel( (vec_u8_t) v, zero_u8v )
#endif
#define vec_u8_to_u16(v) vec_u8_to_u16_h(v)
#define vec_u8_to_s16(v) vec_u8_to_s16_h(v)
......@@ -96,10 +103,17 @@ typedef union {
/***********************************************************************
* 16 <-> 32 bits conversions
**********************************************************************/
#ifdef WORDS_BIGENDIAN
#define vec_u16_to_u32_h(v) (vec_u32_t) vec_mergeh( zero_u16v, (vec_u16_t) v )
#define vec_u16_to_u32_l(v) (vec_u32_t) vec_mergel( zero_u16v, (vec_u16_t) v )
#define vec_u16_to_s32_h(v) (vec_s32_t) vec_mergeh( zero_u16v, (vec_u16_t) v )
#define vec_u16_to_s32_l(v) (vec_s32_t) vec_mergel( zero_u16v, (vec_u16_t) v )
#else
#define vec_u16_to_u32_h(v) (vec_u32_t) vec_mergeh( (vec_u16_t) v, zero_u16v )
#define vec_u16_to_u32_l(v) (vec_u32_t) vec_mergel( (vec_u16_t) v, zero_u16v )
#define vec_u16_to_s32_h(v) (vec_s32_t) vec_mergeh( (vec_u16_t) v, zero_u16v )
#define vec_u16_to_s32_l(v) (vec_s32_t) vec_mergel( (vec_u16_t) v, zero_u16v )
#endif
#define vec_u16_to_u32(v) vec_u16_to_u32_h(v)
#define vec_u16_to_s32(v) vec_u16_to_s32_h(v)
......
......@@ -251,6 +251,14 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64]
vec_st(dctv, 8*y, dct); \
}
#ifdef WORDS_BIGENDIAN
#define VEC_MULE vec_mule
#define VEC_MULO vec_mulo
#else
#define VEC_MULE vec_mulo
#define VEC_MULO vec_mule
#endif
#define DEQUANT_SHR() \
{ \
dctv = vec_ld(8*y, dct); \
......@@ -259,14 +267,14 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64]
mf1v = vec_ld(16*y, dequant_mf[i_mf]); \
mf2v = vec_ld(16+16*y, dequant_mf[i_mf]); \
\
multEvenvA = vec_mule(dct1v, (vec_s16_t)mf1v); \
multOddvA = vec_mulo(dct1v, (vec_s16_t)mf1v); \
multEvenvA = VEC_MULE(dct1v, (vec_s16_t)mf1v); \
multOddvA = VEC_MULO(dct1v, (vec_s16_t)mf1v); \
temp1v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \
temp1v = vec_add(temp1v, fv); \
temp1v = vec_sra(temp1v, i_qbitsv); \
\
multEvenvA = vec_mule(dct2v, (vec_s16_t)mf2v); \
multOddvA = vec_mulo(dct2v, (vec_s16_t)mf2v); \
multEvenvA = VEC_MULE(dct2v, (vec_s16_t)mf2v); \
multOddvA = VEC_MULO(dct2v, (vec_s16_t)mf2v); \
temp2v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \
temp2v = vec_add(temp2v, fv); \
temp2v = vec_sra(temp2v, i_qbitsv); \
......
......@@ -979,6 +979,9 @@ EOF
ppc64:Linux:*:*)
echo powerpc64-unknown-linux-gnu
exit ;;
ppc64le:Linux:*:*)
echo powerpc64le-unknown-linux-gnu
exit ;;
ppc:Linux:*:*)
echo powerpc-unknown-linux-gnu
exit ;;
......
......@@ -711,7 +711,7 @@ case $host_cpu in
ASFLAGS="$ASFLAGS -f elf64"
fi
;;
powerpc|powerpc64)
powerpc*)
ARCH="PPC"
if [ $asm = auto ] ; then
define HAVE_ALTIVEC
......@@ -881,11 +881,13 @@ define STACK_ALIGNMENT $stack_alignment
ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
# skip endianness check for Intel Compiler and MSVS, as all supported platforms are little. each have flags that will cause the check to fail as well
CPU_ENDIAN="little-endian"
if [ $compiler = GNU ]; then
echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
$CC $CFLAGS conftest.c -c -o conftest.o 2>/dev/null || die "endian test failed"
if (${cross_prefix}strings -a conftest.o | grep -q BIGE) && (${cross_prefix}strings -a conftest.o | grep -q FPendian) ; then
define WORDS_BIGENDIAN
CPU_ENDIAN="big-endian"
elif !(${cross_prefix}strings -a conftest.o | grep -q EGIB && ${cross_prefix}strings -a conftest.o | grep -q naidnePF) ; then
die "endian test failed"
fi
......@@ -1403,6 +1405,7 @@ gpl_filters=""
cat > conftest.log <<EOF
platform: $ARCH
byte order: $CPU_ENDIAN
system: $SYS
cli: $cli
libx264: $cli_libx264
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment