Commit bfdfd1aa authored by Ronald S. Bultje's avatar Ronald S. Bultje

Simplify SGR C code

- remove unused entry from tables.h;
- use non-sized types for scalar values;
- reduce size of intermediate tables from int32 to int16.
parent 7d1d7d28
......@@ -408,7 +408,7 @@ static void boxsum5sqr(int32_t *dst, const pixel *const src, const int w,
}
}
static void selfguided_filter(int32_t *dst, const pixel *src,
static void selfguided_filter(int16_t *dst, const pixel *src,
const ptrdiff_t src_stride, const int w,
const int h, const int n, const int s)
{
......@@ -441,8 +441,8 @@ static void selfguided_filter(int32_t *dst, const pixel *src,
const int b =
(BB[i] + (1 << (BITDEPTH - 8) >> 1)) >> (BITDEPTH - 8);
const uint32_t p = (a * n >= b * b) * (a * n - b * b);
const uint32_t z = (p * s + (1 << 19)) >> 20;
const unsigned p = a * n - b * b;
const unsigned z = (p * s + (1 << 19)) >> 20;
const int x = dav1d_sgr_x_by_xplus1[imin(z, 255)];
// This is where we invert A and B, so that B is of size coef.
......@@ -462,8 +462,8 @@ static void selfguided_filter(int32_t *dst, const pixel *src,
P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 5)
for (; j < h - 1; j+=2) {
for (int i = 0; i < w; i++) {
const int32_t a = SIX_NEIGHBORS(B, i);
const int32_t b = SIX_NEIGHBORS(A, i);
const int a = SIX_NEIGHBORS(B, i);
const int b = SIX_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
......@@ -471,8 +471,8 @@ static void selfguided_filter(int32_t *dst, const pixel *src,
B += REST_UNIT_STRIDE;
A += REST_UNIT_STRIDE;
for (int i = 0; i < w; i++) {
const int32_t a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
const int32_t b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
dst[i] = (a * src[i] + b + (1 << 7)) >> 8;
}
dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
......@@ -482,8 +482,8 @@ static void selfguided_filter(int32_t *dst, const pixel *src,
}
if (j + 1 == h) { // Last row, when number of rows is odd
for (int i = 0; i < w; i++) {
const int32_t a = SIX_NEIGHBORS(B, i);
const int32_t b = SIX_NEIGHBORS(A, i);
const int a = SIX_NEIGHBORS(B, i);
const int b = SIX_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
}
......@@ -495,8 +495,8 @@ static void selfguided_filter(int32_t *dst, const pixel *src,
P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 3)
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
const int32_t a = EIGHT_NEIGHBORS(B, i);
const int32_t b = EIGHT_NEIGHBORS(A, i);
const int a = EIGHT_NEIGHBORS(B, i);
const int b = EIGHT_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
dst += 384;
......@@ -522,7 +522,7 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
// Selfguided filter outputs to a maximum stripe height of 64 and a
// maximum restoration width of 384 (256 * 1.5)
int32_t dst[64 * 384];
int16_t dst[64 * 384];
// both r1 and r0 can't be zero
if (!dav1d_sgr_params[sgr_idx][0]) {
......@@ -531,8 +531,8 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
const int w1 = (1 << 7) - sgr_w[1];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
const int32_t u = (p[i] << 4);
const int32_t v = (u << 7) + w1 * (dst[j * 384 + i] - u);
const int u = (p[i] << 4);
const int v = (u << 7) + w1 * (dst[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
......@@ -543,14 +543,14 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
const int w0 = sgr_w[0];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
const int32_t u = (p[i] << 4);
const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u);
const int u = (p[i] << 4);
const int v = (u << 7) + w0 * (dst[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
}
} else {
int32_t dst1[64 * 384];
int16_t dst1[64 * 384];
const int s0 = dav1d_sgr_params[sgr_idx][2];
const int s1 = dav1d_sgr_params[sgr_idx][3];
const int w0 = sgr_w[0];
......@@ -559,9 +559,9 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h, 9, s1);
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
const int32_t u = (p[i] << 4);
const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
w1 * (dst1[j * 384 + i] - u);
const int u = (p[i] << 4);
const int v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
w1 * (dst1[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
......
......@@ -108,7 +108,6 @@ extern const WarpedMotionParams dav1d_default_wm_params;
extern const int16_t dav1d_sgr_params[16][4];
extern const int16_t dav1d_sgr_x_by_xplus1[256];
extern const int16_t dav1d_sgr_one_by_x[25];
extern const int8_t dav1d_mc_subpel_filters[5][15][8];
extern const int8_t dav1d_mc_warp_filter[][8];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment