Commit 3ffbfed7 authored by Manuel Rommel's avatar Manuel Rommel Committed by Fiona Glaser

Fix Altivec SATD with small strides

Fixes chroma ME and some of lookahead on PPC.
parent 33edb51f
......@@ -283,19 +283,19 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
temp4v, temp5v, temp6v, temp7v;
vec_s32_t satdv;
PREP_LOAD_SRC( pix1 );
vec_u8_t _offset1v_ = vec_lvsl(0, pix2);
vec_u8_t _offset2v_ = vec_lvsl(0, pix2 + i_pix2);
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset2v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset2v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset2v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1v );
VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset2v );
vec_u8_t _offset1_1v_ = vec_lvsl(0, pix1);
vec_u8_t _offset1_2v_ = vec_lvsl(0, pix1 + i_pix1);
vec_u8_t _offset2_1v_ = vec_lvsl(0, pix2);
vec_u8_t _offset2_2v_ = vec_lvsl(0, pix2 + i_pix2);
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff0v, offset1_1v, offset2_1v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff1v, offset1_2v, offset2_2v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff2v, offset1_1v, offset2_1v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff3v, offset1_2v, offset2_2v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff4v, offset1_1v, offset2_1v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff5v, offset1_2v, offset2_2v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff6v, offset1_1v, offset2_1v );
VEC_DIFF_H_OFFSET( pix1, i_pix1, pix2, i_pix2, 8, diff7v, offset1_2v, offset2_2v );
VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
temp0v, temp1v, temp2v, temp3v );
......
......@@ -269,6 +269,16 @@ typedef union {
p1 += i1; \
p2 += i2
#define VEC_DIFF_H_OFFSET(p1,i1,p2,i2,n,d,g1,g2) \
pix1v = (vec_s16_t)vec_perm( vec_ld( 0, p1 ), zero_u8v, _##g1##_ );\
pix1v = vec_u8_to_s16( pix1v ); \
VEC_LOAD( p2, pix2v, n, vec_s16_t, g2); \
pix2v = vec_u8_to_s16( pix2v ); \
d = vec_sub( pix1v, pix2v ); \
p1 += i1; \
p2 += i2
/***********************************************************************
* VEC_DIFF_HL
***********************************************************************
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment