Commit 77c52be0 authored by Henrik Gramner's avatar Henrik Gramner

Reorder the dav1d_filter_intra_taps array

Ordering the elements this way is more SIMD-friendly.
parent 93c4bea2
......@@ -553,7 +553,7 @@ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
filt_idx &= 511;
assert(filt_idx < 5);
const int8_t (*const filter)[8] = dav1d_filter_intra_taps[filt_idx];
const int8_t *const filter = dav1d_filter_intra_taps[filt_idx];
int x, y;
ptrdiff_t left_stride;
const pixel *left, *topleft, *top;
......@@ -568,19 +568,18 @@ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
const int p1 = top[0], p2 = top[1], p3 = top[2], p4 = top[3];
const int p5 = left[0 * left_stride], p6 = left[1 * left_stride];
pixel *ptr = &dst[x];
const int8_t (*flt_ptr)[8] = filter;
const int8_t *flt_ptr = filter;
for (int yy = 0; yy < 2; yy++) {
for (int xx = 0; xx < 4; xx++, flt_ptr++) {
int acc = flt_ptr[0][0] * p0 + flt_ptr[0][1] * p1 +
flt_ptr[0][2] * p2 + flt_ptr[0][3] * p3 +
flt_ptr[0][4] * p4 + flt_ptr[0][5] * p5 +
flt_ptr[0][6] * p6;
for (int xx = 0; xx < 4; xx++, flt_ptr += 2) {
int acc = flt_ptr[ 0] * p0 + flt_ptr[ 1] * p1 +
flt_ptr[16] * p2 + flt_ptr[17] * p3 +
flt_ptr[32] * p4 + flt_ptr[33] * p5 +
flt_ptr[48] * p6;
ptr[xx] = iclip_pixel((acc + 8) >> 4);
}
ptr += PXSTRIDE(stride);
}
left = &dst[x + 4 - 1];
left_stride = PXSTRIDE(stride);
top += 4;
......
......@@ -781,51 +781,51 @@ const int16_t dav1d_dr_intra_derivative[90] = {
3, 0, 0, // 87, ...
};
const int8_t dav1d_filter_intra_taps[5][8][8] = {
const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = {
{
{ -6, 10, 0, 0, 0, 12, 0, 0 },
{ -5, 2, 10, 0, 0, 9, 0, 0 },
{ -3, 1, 1, 10, 0, 7, 0, 0 },
{ -3, 1, 1, 2, 10, 5, 0, 0 },
{ -4, 6, 0, 0, 0, 2, 12, 0 },
{ -3, 2, 6, 0, 0, 2, 9, 0 },
{ -3, 2, 2, 6, 0, 2, 7, 0 },
{ -3, 1, 2, 2, 6, 3, 5, 0 },
-6, 10, -5, 2, -3, 1, -3, 1,
-4, 6, -3, 2, -3, 2, -3, 1,
0, 0, 10, 0, 1, 10, 1, 2,
0, 0, 6, 0, 2, 6, 2, 2,
0, 12, 0, 9, 0, 7, 10, 5,
0, 2, 0, 2, 0, 2, 6, 3,
0, 0, 0, 0, 0, 0, 0, 0,
12, 0, 9, 0, 7, 0, 5, 0
}, {
{ -10, 16, 0, 0, 0, 10, 0, 0 },
{ -6, 0, 16, 0, 0, 6, 0, 0 },
{ -4, 0, 0, 16, 0, 4, 0, 0 },
{ -2, 0, 0, 0, 16, 2, 0, 0 },
{ -10, 16, 0, 0, 0, 0, 10, 0 },
{ -6, 0, 16, 0, 0, 0, 6, 0 },
{ -4, 0, 0, 16, 0, 0, 4, 0 },
{ -2, 0, 0, 0, 16, 0, 2, 0 },
-10, 16, -6, 0, -4, 0, -2, 0,
-10, 16, -6, 0, -4, 0, -2, 0,
0, 0, 16, 0, 0, 16, 0, 0,
0, 0, 16, 0, 0, 16, 0, 0,
0, 10, 0, 6, 0, 4, 16, 2,
0, 0, 0, 0, 0, 0, 16, 0,
0, 0, 0, 0, 0, 0, 0, 0,
10, 0, 6, 0, 4, 0, 2, 0
}, {
{ -8, 8, 0, 0, 0, 16, 0, 0 },
{ -8, 0, 8, 0, 0, 16, 0, 0 },
{ -8, 0, 0, 8, 0, 16, 0, 0 },
{ -8, 0, 0, 0, 8, 16, 0, 0 },
{ -4, 4, 0, 0, 0, 0, 16, 0 },
{ -4, 0, 4, 0, 0, 0, 16, 0 },
{ -4, 0, 0, 4, 0, 0, 16, 0 },
{ -4, 0, 0, 0, 4, 0, 16, 0 },
-8, 8, -8, 0, -8, 0, -8, 0,
-4, 4, -4, 0, -4, 0, -4, 0,
0, 0, 8, 0, 0, 8, 0, 0,
0, 0, 4, 0, 0, 4, 0, 0,
0, 16, 0, 16, 0, 16, 8, 16,
0, 0, 0, 0, 0, 0, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0,
16, 0, 16, 0, 16, 0, 16, 0
}, {
{ -2, 8, 0, 0, 0, 10, 0, 0 },
{ -1, 3, 8, 0, 0, 6, 0, 0 },
{ -1, 2, 3, 8, 0, 4, 0, 0 },
{ 0, 1, 2, 3, 8, 2, 0, 0 },
{ -1, 4, 0, 0, 0, 3, 10, 0 },
{ -1, 3, 4, 0, 0, 4, 6, 0 },
{ -1, 2, 3, 4, 0, 4, 4, 0 },
{ -1, 2, 2, 3, 4, 3, 3, 0 },
-2, 8, -1, 3, -1, 2, 0, 1,
-1, 4, -1, 3, -1, 2, -1, 2,
0, 0, 8, 0, 3, 8, 2, 3,
0, 0, 4, 0, 3, 4, 2, 3,
0, 10, 0, 6, 0, 4, 8, 2,
0, 3, 0, 4, 0, 4, 4, 3,
0, 0, 0, 0, 0, 0, 0, 0,
10, 0, 6, 0, 4, 0, 3, 0
}, {
{ -12, 14, 0, 0, 0, 14, 0, 0 },
{ -10, 0, 14, 0, 0, 12, 0, 0 },
{ -9, 0, 0, 14, 0, 11, 0, 0 },
{ -8, 0, 0, 0, 14, 10, 0, 0 },
{ -10, 12, 0, 0, 0, 0, 14, 0 },
{ -9, 1, 12, 0, 0, 0, 12, 0 },
{ -8, 0, 0, 12, 0, 1, 11, 0 },
{ -7, 0, 0, 1, 12, 1, 9, 0 },
-12, 14, -10, 0, -9, 0, -8, 0,
-10, 12, -9, 1, -8, 0, -7, 0,
0, 0, 14, 0, 0, 14, 0, 0,
0, 0, 12, 0, 0, 12, 0, 1,
0, 14, 0, 12, 0, 11, 14, 10,
0, 0, 0, 0, 0, 1, 12, 1,
0, 0, 0, 0, 0, 0, 0, 0,
14, 0, 12, 0, 11, 0, 9, 0
}
};
......@@ -115,6 +115,6 @@ extern const int8_t dav1d_mc_warp_filter[][8];
extern const uint8_t dav1d_sm_weights[128];
extern const int16_t dav1d_dr_intra_derivative[90];
extern const int8_t dav1d_filter_intra_taps[5][8][8];
extern const int8_t dav1d_filter_intra_taps[5][64];
#endif /* __DAV1D_SRC_TABLES_H__ */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment