Commit 0d111333 authored by Luca Barbato's avatar Luca Barbato Committed by Anton Mitrofanov
Browse files

ppc: Use xxpermdi in VEC_STORE8

Around a ~2% speedup to the overall encoding for --slow.
parent 18262ee3
......@@ -51,7 +51,6 @@ static inline void pixel_avg2_w8_altivec( uint8_t *dst, intptr_t i_dst,
uint8_t *src2, int i_height )
{
vec_u8_t src1v, src2v;
PREP_STORE8;
for( int y = 0; y < i_height; y++ )
{
......@@ -525,7 +524,6 @@ static void mc_chroma_8xh_altivec( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_
srcp = &src[i_src_stride];
LOAD_ZERO;
PREP_STORE8;
vec_u16_t coeff0v, coeff1v, coeff2v, coeff3v;
vec_u8_t src0v_8, src1v_8, src2v_8, src3v_8;
vec_u8_t dstuv, dstvv;
......@@ -1098,7 +1096,6 @@ static void mc_weight_w8_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, in
const x264_weight_t *weight, int i_height )
{
LOAD_ZERO;
PREP_STORE8;
vec_u8_t srcv;
vec_s16_t weightv;
vec_s16_t scalev, offsetv, denomv, roundv;
......
......@@ -146,19 +146,10 @@ typedef union {
#define vec_s32_to_u16(v) vec_packsu( v, zero_s32v )
/***********************************************************************
* PREP_STORE##n: declares required vectors to store n bytes to a
* potentially unaligned address
* VEC_STORE##n: stores n bytes from vector v to address p
**********************************************************************/
#define PREP_STORE8 \
vec_u8_t _tmp3v; \
vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F } \
#define VEC_STORE8( v, p ) \
_tmp3v = vec_vsx_ld( 0, p ); \
v = vec_perm( v, _tmp3v, mask ); \
vec_vsx_st( v, 0, p )
#define VEC_STORE8( v, p ) \
vec_vsx_st( vec_xxpermdi( v, vec_vsx_ld( 0, p ), 1 ), 0, p )
/***********************************************************************
* VEC_TRANSPOSE_8
......
......@@ -58,8 +58,6 @@ static void predict_8x8c_p_altivec( uint8_t *src )
vec_s16_t induc_v = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7);
vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);
PREP_STORE8;
for( int i = 0; i < 8; ++i )
{
vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);
......@@ -67,7 +65,6 @@ static void predict_8x8c_p_altivec( uint8_t *src )
VEC_STORE8(com_sat_v, &src[0]);
src += FDEC_STRIDE;
add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment