Commit 7aea6858 authored by Ronald S. Bultje's avatar Ronald S. Bultje

av1: do C inverse transforms in int32_t precision

Fixes C part of #321.
parent cdf4a3bc
Pipeline #12186 passed with stages
in 7 minutes and 44 seconds
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* Copyright © 2018-2019, VideoLAN and dav1d authors
* Copyright © 2018-2019, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
......@@ -30,7 +30,9 @@
#include <stddef.h>
#include <stdint.h>
#include "common/attributes.h"
#include "common/intops.h"
#include "src/itx_1d.h"
#define CLIP(a) iclip(a, min, max)
......@@ -60,9 +62,9 @@
* wrap around.
*/
static void NOINLINE
inv_dct4_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_dct4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
......@@ -79,14 +81,14 @@ inv_dct4_1d(const coef *const in, const ptrdiff_t in_s,
out[3 * out_s] = CLIP(t0 - t3);
}
static void NOINLINE
inv_dct8_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_dct8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
coef tmp[4];
int32_t tmp[4];
inv_dct4_1d(in, in_s * 2, tmp, 1, max);
dav1d_inv_dct4_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[1 * in_s], in3 = in[3 * in_s];
const int in5 = in[5 * in_s], in7 = in[7 * in_s];
......@@ -114,14 +116,14 @@ inv_dct8_1d(const coef *const in, const ptrdiff_t in_s,
out[7 * out_s] = CLIP(tmp[0] - t7);
}
static void NOINLINE
inv_dct16_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_dct16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
coef tmp[8];
int32_t tmp[8];
inv_dct8_1d(in, in_s * 2, tmp, 1, max);
dav1d_inv_dct8_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
......@@ -183,14 +185,14 @@ inv_dct16_1d(const coef *const in, const ptrdiff_t in_s,
out[15 * out_s] = CLIP(tmp[0] - t15a);
}
static void NOINLINE
inv_dct32_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_dct32_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
coef tmp[16];
int32_t tmp[16];
inv_dct16_1d(in, in_s * 2, tmp, 1, max);
dav1d_inv_dct16_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
......@@ -330,14 +332,14 @@ inv_dct32_1d(const coef *const in, const ptrdiff_t in_s,
out[31 * out_s] = CLIP(tmp[ 0] - t31);
}
static void NOINLINE
inv_dct64_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_dct64_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
coef tmp[32];
int32_t tmp[32];
inv_dct32_1d(in, in_s * 2, tmp, 1, max);
dav1d_inv_dct32_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
......@@ -655,9 +657,9 @@ inv_dct64_1d(const coef *const in, const ptrdiff_t in_s,
out[63 * out_s] = CLIP(tmp[ 0] - t63a);
}
static void NOINLINE
inv_adst4_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int range)
void dav1d_inv_adst4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int range)
{
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
const int in2 = in[2 * in_s], in3 = in[3 * in_s];
......@@ -674,9 +676,9 @@ inv_adst4_1d(const coef *const in, const ptrdiff_t in_s,
in0 + in2 - in1;
}
static void NOINLINE
inv_adst8_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_adst8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
......@@ -723,9 +725,9 @@ inv_adst8_1d(const coef *const in, const ptrdiff_t in_s,
out[5 * out_s] = -(((t6 - t7) * 181 + 128) >> 8);
}
static void NOINLINE
inv_adst16_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int max)
void dav1d_inv_adst16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int max)
{
const int min = -max - 1;
const int in0 = in[ 0 * in_s], in1 = in[ 1 * in_s];
......@@ -834,10 +836,11 @@ inv_adst16_1d(const coef *const in, const ptrdiff_t in_s,
}
#define flip_inv_adst(sz) \
static void inv_flipadst##sz##_1d(const coef *const in, const ptrdiff_t in_s, \
coef *const out, const ptrdiff_t out_s, const int range) \
void dav1d_inv_flipadst##sz##_1d_c(const int32_t *const in, const ptrdiff_t in_s, \
int32_t *const out, const ptrdiff_t out_s, \
const int range) \
{ \
inv_adst##sz##_1d(in, in_s, &out[(sz - 1) * out_s], -out_s, range); \
dav1d_inv_adst##sz##_1d_c(in, in_s, &out[(sz - 1) * out_s], -out_s, range); \
}
flip_inv_adst(4)
......@@ -846,42 +849,41 @@ flip_inv_adst(16)
#undef flip_inv_adst
static void NOINLINE
inv_identity4_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int range)
void dav1d_inv_identity4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int range)
{
for (int i = 0; i < 4; i++)
out[out_s * i] = in[in_s * i] + ((in[in_s * i] * 1697 + 2048) >> 12);
}
static void NOINLINE
inv_identity8_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int range)
void dav1d_inv_identity8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int range)
{
for (int i = 0; i < 8; i++)
out[out_s * i] = in[in_s * i] * 2;
}
static void NOINLINE
inv_identity16_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int range)
void dav1d_inv_identity16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int range)
{
for (int i = 0; i < 16; i++)
out[out_s * i] = 2 * in[in_s * i] + ((in[in_s * i] * 1697 + 1024) >> 11);
}
static void NOINLINE
inv_identity32_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s, const int range)
void dav1d_inv_identity32_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int range)
{
for (int i = 0; i < 32; i++)
out[out_s * i] = in[in_s * i] * 4;
}
static void NOINLINE
inv_wht4_1d(const coef *const in, const ptrdiff_t in_s,
coef *const out, const ptrdiff_t out_s,
const int pass)
void dav1d_inv_wht4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
int32_t *const out, const ptrdiff_t out_s,
const int pass)
{
const int sh = 2 * !pass;
const int in0 = in[0 * in_s] >> sh, in1 = in[1 * in_s] >> sh;
......
/*
* Copyright © 2018-2019, VideoLAN and dav1d authors
* Copyright © 2018-2019, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stddef.h>
#include <stdint.h>
#ifndef DAV1D_SRC_ITX_1D_H
#define DAV1D_SRC_ITX_1D_H
#define decl_itx_1d_fn(name) \
void (name)(const int32_t *in, ptrdiff_t in_s, \
int32_t *out, ptrdiff_t out_s, const int range)
typedef decl_itx_1d_fn(*itx_1d_fn);
decl_itx_1d_fn(dav1d_inv_dct4_1d_c);
decl_itx_1d_fn(dav1d_inv_dct8_1d_c);
decl_itx_1d_fn(dav1d_inv_dct16_1d_c);
decl_itx_1d_fn(dav1d_inv_dct32_1d_c);
decl_itx_1d_fn(dav1d_inv_dct64_1d_c);
decl_itx_1d_fn(dav1d_inv_adst4_1d_c);
decl_itx_1d_fn(dav1d_inv_adst8_1d_c);
decl_itx_1d_fn(dav1d_inv_adst16_1d_c);
decl_itx_1d_fn(dav1d_inv_flipadst4_1d_c);
decl_itx_1d_fn(dav1d_inv_flipadst8_1d_c);
decl_itx_1d_fn(dav1d_inv_flipadst16_1d_c);
decl_itx_1d_fn(dav1d_inv_identity4_1d_c);
decl_itx_1d_fn(dav1d_inv_identity8_1d_c);
decl_itx_1d_fn(dav1d_inv_identity16_1d_c);
decl_itx_1d_fn(dav1d_inv_identity32_1d_c);
decl_itx_1d_fn(dav1d_inv_wht4_1d_c);
#endif /* DAV1D_SRC_ITX_1D_H */
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* Copyright © 2018-2019, VideoLAN and dav1d authors
* Copyright © 2018-2019, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
......@@ -35,11 +35,7 @@
#include "common/intops.h"
#include "src/itx.h"
#include "src/itx_1d.c"
typedef void (*itx_1d_fn)(const coef *in, ptrdiff_t in_s,
coef *out, ptrdiff_t out_s, const int range);
#include "src/itx_1d.h"
static void NOINLINE
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
......@@ -73,29 +69,21 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
int32_t tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int row_clip_max = (1 << (bitdepth + 8 - 1)) - 1;
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
for (i = 0; i < sh; i++) {
if (w != sw || is_rect2) {
for (j = 0; j < sw; j++) {
in_mem[j] = coeff[i + j * sh];
if (is_rect2)
in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
}
first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
} else {
first_1d_fn(&coeff[i], sh, &tmp[i * w], 1, row_clip_max);
for (j = 0; j < sw; j++) {
in_mem[j] = coeff[i + j * sh];
if (is_rect2)
in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
}
first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
for (j = 0; j < w; j++)
#if BITDEPTH == 8
tmp[i * w + j] = (tmp[i * w + j] + rnd) >> shift;
#else
tmp[i * w + j] = iclip((tmp[i * w + j] + rnd) >> shift,
-col_clip_max - 1, col_clip_max);
#endif
}
if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp));
......@@ -118,8 +106,8 @@ inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
HIGHBD_DECL_SUFFIX) \
{ \
inv_txfm_add_c(dst, stride, coeff, eob, w, h, shift, \
inv_##type1##w##_1d, inv_##type2##h##_1d, has_dconly \
HIGHBD_TAIL_SUFFIX); \
dav1d_inv_##type1##w##_1d_c, dav1d_inv_##type2##h##_1d_c, \
has_dconly HIGHBD_TAIL_SUFFIX); \
}
#define inv_txfm_fn64(w, h, shift) \
......@@ -176,15 +164,18 @@ static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
const int bitdepth = bitdepth_from_max(bitdepth_max);
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
coef tmp[4 * 4], out[4];
int32_t tmp[4 * 4], out[4], in_mem[4];
for (int i = 0; i < 4; i++)
inv_wht4_1d(&coeff[i], 4, &tmp[i * 4], 1, 0);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++)
in_mem[j] = coeff[i + j * 4];
dav1d_inv_wht4_1d_c(in_mem, 1, &tmp[i * 4], 1, 0);
}
for (int k = 0; k < 4 * 4; k++)
tmp[k] = iclip(tmp[k], col_clip_min, col_clip_max);
for (int i = 0; i < 4; i++) {
inv_wht4_1d(&tmp[i], 4, out, 1, 1);
dav1d_inv_wht4_1d_c(&tmp[i], 4, out, 1, 1);
for (int j = 0; j < 4; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] + out[j]);
......
......@@ -35,6 +35,7 @@ libdav1d_sources = files(
'dequant_tables.c',
'getbits.c',
'intra_edge.c',
'itx_1d.c',
'lf_mask.c',
'log.c',
'msac.c',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment