Commit 14072e73 authored by Henrik Gramner's avatar Henrik Gramner

Downshift mc subpel multiplier constants

Downshift all the constants by one, and reduce the rounding shift by one.
This is mathematically equivalent since all constants are a multiple of two,
but allows for using 16-bit intermediates in the 1st pass of the 8-tap filter.
parent 8bfd7f2f
......@@ -102,7 +102,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
......@@ -111,7 +111,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 11);
dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
mid_ptr += 128;
dst += dst_stride;
......@@ -119,7 +119,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
} else {
do {
for (int x = 0; x < w; x++) {
const int px = FILTER_8TAP_RND(src, x, fh, 1, 3);
const int px = FILTER_8TAP_RND(src, x, fh, 1, 2);
dst[x] = iclip_pixel((px + 8) >> 4);
}
......@@ -130,7 +130,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
} else if (fv) {
do {
for (int x = 0; x < w; x++)
dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 7);
dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
dst += dst_stride;
src += src_stride;
......@@ -155,7 +155,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
......@@ -164,7 +164,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 7);
tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);
mid_ptr += 128;
tmp += w;
......@@ -172,7 +172,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
} else {
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
tmp += w;
src += src_stride;
......@@ -181,7 +181,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
} else if (fv) {
do {
for (int x = 0; x < w; x++)
tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 3);
tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 2);
tmp += w;
src += src_stride;
......
......@@ -29,6 +29,8 @@
#include <stdint.h>
#include "common/attributes.h"
#include "src/levels.h"
#include "src/tables.h"
......@@ -562,89 +564,88 @@ const int16_t sgr_one_by_x[] = {
293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
};
const int8_t dav1d_mc_subpel_filters[5][15][8] = {
ALIGN(const int8_t dav1d_mc_subpel_filters[5][15][8], 8 ) = {
[FILTER_8TAP_REGULAR] = {
{ 0, 2, -6, 126, 8, -2, 0, 0 },
{ 0, 2, -10, 122, 18, -4, 0, 0 },
{ 0, 2, -12, 116, 28, -8, 2, 0 },
{ 0, 2, -14, 110, 38, -10, 2, 0 },
{ 0, 2, -14, 102, 48, -12, 2, 0 },
{ 0, 2, -16, 94, 58, -12, 2, 0 },
{ 0, 2, -14, 84, 66, -12, 2, 0 },
{ 0, 2, -14, 76, 76, -14, 2, 0 },
{ 0, 2, -12, 66, 84, -14, 2, 0 },
{ 0, 2, -12, 58, 94, -16, 2, 0 },
{ 0, 2, -12, 48, 102, -14, 2, 0 },
{ 0, 2, -10, 38, 110, -14, 2, 0 },
{ 0, 2, -8, 28, 116, -12, 2, 0 },
{ 0, 0, -4, 18, 122, -10, 2, 0 },
{ 0, 0, -2, 8, 126, -6, 2, 0 }
}, [FILTER_8TAP_SHARP] = {
{ -2, 2, -6, 126, 8, -2, 2, 0 },
{ -2, 6, -12, 124, 16, -6, 4, -2 },
{ -2, 8, -18, 120, 26, -10, 6, -2 },
{ -4, 10, -22, 116, 38, -14, 6, -2 },
{ -4, 10, -22, 108, 48, -18, 8, -2 },
{ -4, 10, -24, 100, 60, -20, 8, -2 },
{ -4, 10, -24, 90, 70, -22, 10, -2 },
{ -4, 12, -24, 80, 80, -24, 12, -4 },
{ -2, 10, -22, 70, 90, -24, 10, -4 },
{ -2, 8, -20, 60, 100, -24, 10, -4 },
{ -2, 8, -18, 48, 108, -22, 10, -4 },
{ -2, 6, -14, 38, 116, -22, 10, -4 },
{ -2, 6, -10, 26, 120, -18, 8, -2 },
{ -2, 4, -6, 16, 124, -12, 6, -2 },
{ 0, 2, -2, 8, 126, -6, 2, -2 }
{ 0, 1, -3, 63, 4, -1, 0, 0 },
{ 0, 1, -5, 61, 9, -2, 0, 0 },
{ 0, 1, -6, 58, 14, -4, 1, 0 },
{ 0, 1, -7, 55, 19, -5, 1, 0 },
{ 0, 1, -7, 51, 24, -6, 1, 0 },
{ 0, 1, -8, 47, 29, -6, 1, 0 },
{ 0, 1, -7, 42, 33, -6, 1, 0 },
{ 0, 1, -7, 38, 38, -7, 1, 0 },
{ 0, 1, -6, 33, 42, -7, 1, 0 },
{ 0, 1, -6, 29, 47, -8, 1, 0 },
{ 0, 1, -6, 24, 51, -7, 1, 0 },
{ 0, 1, -5, 19, 55, -7, 1, 0 },
{ 0, 1, -4, 14, 58, -6, 1, 0 },
{ 0, 0, -2, 9, 61, -5, 1, 0 },
{ 0, 0, -1, 4, 63, -3, 1, 0 }
}, [FILTER_8TAP_SMOOTH] = {
{ 0, 2, 28, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 },
{ 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 },
{ 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 },
{ 0, -2, 16, 54, 48, 12, 0, 0 },
{ 0, -2, 14, 52, 52, 14, -2, 0 },
{ 0, 0, 12, 48, 54, 16, -2, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 },
{ 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 },
{ 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 },
{ 0, 0, 2, 34, 62, 28, 2, 0 },
},
{ 0, 1, 14, 31, 17, 1, 0, 0 },
{ 0, 0, 13, 31, 18, 2, 0, 0 },
{ 0, 0, 11, 31, 20, 2, 0, 0 },
{ 0, 0, 10, 30, 21, 3, 0, 0 },
{ 0, 0, 9, 29, 22, 4, 0, 0 },
{ 0, 0, 8, 28, 23, 5, 0, 0 },
{ 0, -1, 8, 27, 24, 6, 0, 0 },
{ 0, -1, 7, 26, 26, 7, -1, 0 },
{ 0, 0, 6, 24, 27, 8, -1, 0 },
{ 0, 0, 5, 23, 28, 8, 0, 0 },
{ 0, 0, 4, 22, 29, 9, 0, 0 },
{ 0, 0, 3, 21, 30, 10, 0, 0 },
{ 0, 0, 2, 20, 31, 11, 0, 0 },
{ 0, 0, 2, 18, 31, 13, 0, 0 },
{ 0, 0, 1, 17, 31, 14, 1, 0 }
}, [FILTER_8TAP_SHARP] = {
{ -1, 1, -3, 63, 4, -1, 1, 0 },
{ -1, 3, -6, 62, 8, -3, 2, -1 },
{ -1, 4, -9, 60, 13, -5, 3, -1 },
{ -2, 5, -11, 58, 19, -7, 3, -1 },
{ -2, 5, -11, 54, 24, -9, 4, -1 },
{ -2, 5, -12, 50, 30, -10, 4, -1 },
{ -2, 5, -12, 45, 35, -11, 5, -1 },
{ -2, 6, -12, 40, 40, -12, 6, -2 },
{ -1, 5, -11, 35, 45, -12, 5, -2 },
{ -1, 4, -10, 30, 50, -12, 5, -2 },
{ -1, 4, -9, 24, 54, -11, 5, -2 },
{ -1, 3, -7, 19, 58, -11, 5, -2 },
{ -1, 3, -5, 13, 60, -9, 4, -1 },
{ -1, 2, -3, 8, 62, -6, 3, -1 },
{ 0, 1, -1, 4, 63, -3, 1, -1 }
/* width <= 4 */
[3 + FILTER_8TAP_REGULAR] = {
{ 0, 0, -4, 126, 8, -2, 0, 0 },
{ 0, 0, -8, 122, 18, -4, 0, 0 },
{ 0, 0, -10, 116, 28, -6, 0, 0 },
{ 0, 0, -12, 110, 38, -8, 0, 0 },
{ 0, 0, -12, 102, 48, -10, 0, 0 },
{ 0, 0, -14, 94, 58, -10, 0, 0 },
{ 0, 0, -12, 84, 66, -10, 0, 0 },
{ 0, 0, -12, 76, 76, -12, 0, 0 },
{ 0, 0, -10, 66, 84, -12, 0, 0 },
{ 0, 0, -10, 58, 94, -14, 0, 0 },
{ 0, 0, -10, 48, 102, -12, 0, 0 },
{ 0, 0, -8, 38, 110, -12, 0, 0 },
{ 0, 0, -6, 28, 116, -10, 0, 0 },
{ 0, 0, -4, 18, 122, -8, 0, 0 },
{ 0, 0, -2, 8, 126, -4, 0, 0 }
}, [3 + FILTER_8TAP_REGULAR] = {
{ 0, 0, -2, 63, 4, -1, 0, 0 },
{ 0, 0, -4, 61, 9, -2, 0, 0 },
{ 0, 0, -5, 58, 14, -3, 0, 0 },
{ 0, 0, -6, 55, 19, -4, 0, 0 },
{ 0, 0, -6, 51, 24, -5, 0, 0 },
{ 0, 0, -7, 47, 29, -5, 0, 0 },
{ 0, 0, -6, 42, 33, -5, 0, 0 },
{ 0, 0, -6, 38, 38, -6, 0, 0 },
{ 0, 0, -5, 33, 42, -6, 0, 0 },
{ 0, 0, -5, 29, 47, -7, 0, 0 },
{ 0, 0, -5, 24, 51, -6, 0, 0 },
{ 0, 0, -4, 19, 55, -6, 0, 0 },
{ 0, 0, -3, 14, 58, -5, 0, 0 },
{ 0, 0, -2, 9, 61, -4, 0, 0 },
{ 0, 0, -1, 4, 63, -2, 0, 0 }
}, [3 + FILTER_8TAP_SMOOTH] = {
{ 0, 0, 30, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 },
{ 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 },
{ 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 },
{ 0, 0, 14, 54, 48, 12, 0, 0 },
{ 0, 0, 12, 52, 52, 12, 0, 0 },
{ 0, 0, 12, 48, 54, 14, 0, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 },
{ 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 },
{ 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 },
{ 0, 0, 2, 34, 62, 30, 0, 0 }
{ 0, 0, 15, 31, 17, 1, 0, 0 },
{ 0, 0, 13, 31, 18, 2, 0, 0 },
{ 0, 0, 11, 31, 20, 2, 0, 0 },
{ 0, 0, 10, 30, 21, 3, 0, 0 },
{ 0, 0, 9, 29, 22, 4, 0, 0 },
{ 0, 0, 8, 28, 23, 5, 0, 0 },
{ 0, 0, 7, 27, 24, 6, 0, 0 },
{ 0, 0, 6, 26, 26, 6, 0, 0 },
{ 0, 0, 6, 24, 27, 7, 0, 0 },
{ 0, 0, 5, 23, 28, 8, 0, 0 },
{ 0, 0, 4, 22, 29, 9, 0, 0 },
{ 0, 0, 3, 21, 30, 10, 0, 0 },
{ 0, 0, 2, 20, 31, 11, 0, 0 },
{ 0, 0, 2, 18, 31, 13, 0, 0 },
{ 0, 0, 1, 17, 31, 15, 0, 0 }
}
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment