Commit 2e6c8a92 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Change type of MC intermediates from coef to int16_t

Coef was originally chosen to accomodate 12 bits/component with
4 extra precision intermediates + some under/overflow range, but
it turns out that 12 bits/component only uses 2 extra precision
intermediates, so we don't need coef.
parent 012fced2
......@@ -278,7 +278,7 @@ struct Dav1dTileContext {
uint8_t *pal_idx;
int16_t *ac;
pixel *interintra, *lap;
coef *compinter;
int16_t *compinter;
} scratch;
ALIGN(uint8_t scratch_seg_mask[128 * 128], 32);
......
......@@ -124,7 +124,7 @@ int dav1d_open(Dav1dContext **const c_out,
t->f = f;
t->cf = dav1d_alloc_aligned(32 * 32 * sizeof(int32_t), 32);
if (!t->cf) goto error;
t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 8, 32);
t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 4, 32);
if (!t->scratch.mem) goto error;
memset(t->cf, 0, 32 * 32 * sizeof(int32_t));
t->emu_edge =
......
......@@ -54,40 +54,40 @@ void (name)(pixel *dst, ptrdiff_t dst_stride, \
typedef decl_warp8x8_fn(*warp8x8_fn);
#define decl_mct_fn(name) \
void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \
void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my)
typedef decl_mct_fn(*mct_fn);
#define decl_mct_scaled_fn(name) \
void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \
void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy)
typedef decl_mct_scaled_fn(*mct_scaled_fn);
#define decl_warp8x8t_fn(name) \
void (name)(coef *tmp, const ptrdiff_t tmp_stride, \
void (name)(int16_t *tmp, const ptrdiff_t tmp_stride, \
const pixel *src, ptrdiff_t src_stride, \
const int16_t *abcd, int mx, int my)
typedef decl_warp8x8t_fn(*warp8x8t_fn);
#define decl_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h)
const int16_t *tmp1, const int16_t *tmp2, int w, int h)
typedef decl_avg_fn(*avg_fn);
#define decl_w_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, int weight)
const int16_t *tmp1, const int16_t *tmp2, int w, int h, int weight)
typedef decl_w_avg_fn(*w_avg_fn);
#define decl_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, \
const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
const uint8_t *mask)
typedef decl_mask_fn(*mask_fn);
#define decl_w_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, \
const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
uint8_t *mask, int sign)
typedef decl_w_mask_fn(*w_mask_fn);
......
......@@ -50,7 +50,7 @@ put_c(pixel *dst, const ptrdiff_t dst_stride,
}
static NOINLINE void
prep_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,
prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,
const int w, int h)
{
do {
......@@ -105,7 +105,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
if (fh) {
if (fv) {
int tmp_h = h + 7;
coef mid[128 * 135], *mid_ptr = mid;
int16_t mid[128 * 135], *mid_ptr = mid;
src -= src_stride * 3;
do {
......@@ -154,7 +154,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
const int dx, const int dy, const int filter_type)
{
int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
coef mid[128 * (256 + 7)], *mid_ptr = mid;
int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride);
src -= src_stride * 3;
......@@ -191,7 +191,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
}
static NOINLINE void
prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my,
const int filter_type)
{
......@@ -201,7 +201,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
if (fh) {
if (fv) {
int tmp_h = h + 7;
coef mid[128 * 135], *mid_ptr = mid;
int16_t mid[128 * 135], *mid_ptr = mid;
src -= src_stride * 3;
do {
......@@ -242,12 +242,12 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
}
static NOINLINE void
prep_8tap_scaled_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
const int dx, const int dy, const int filter_type)
{
int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
coef mid[128 * (256 + 7)], *mid_ptr = mid;
int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride);
src -= src_stride * 3;
......@@ -304,7 +304,7 @@ static void put_8tap_##type##_scaled_c(pixel *const dst, \
put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
type_h | (type_v << 2)); \
} \
static void prep_8tap_##type##_c(coef *const tmp, \
static void prep_8tap_##type##_c(int16_t *const tmp, \
const pixel *const src, \
const ptrdiff_t src_stride, \
const int w, const int h, \
......@@ -313,7 +313,7 @@ static void prep_8tap_##type##_c(coef *const tmp, \
prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
type_h | (type_v << 2)); \
} \
static void prep_8tap_##type##_scaled_c(coef *const tmp, \
static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \
const pixel *const src, \
const ptrdiff_t src_stride, \
const int w, const int h, \
......@@ -352,7 +352,7 @@ static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride,
if (mx) {
if (my) {
coef mid[128 * 129], *mid_ptr = mid;
int16_t mid[128 * 129], *mid_ptr = mid;
int tmp_h = h + 1;
do {
......@@ -398,7 +398,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
const int dx, const int dy)
{
int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
coef mid[128 * (256 + 1)], *mid_ptr = mid;
int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
do {
int x;
......@@ -429,7 +429,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
} while (--h);
}
static void prep_bilin_c(coef *tmp,
static void prep_bilin_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my)
{
......@@ -437,7 +437,7 @@ static void prep_bilin_c(coef *tmp,
if (mx) {
if (my) {
coef mid[128 * 129], *mid_ptr = mid;
int16_t mid[128 * 129], *mid_ptr = mid;
int tmp_h = h + 1;
do {
......@@ -477,13 +477,13 @@ static void prep_bilin_c(coef *tmp,
prep_c(tmp, src, src_stride, w, h);
}
static void prep_bilin_scaled_c(coef *tmp,
static void prep_bilin_scaled_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
const int dx, const int dy)
{
int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
coef mid[128 * (256 + 1)], *mid_ptr = mid;
int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
do {
int x;
......@@ -515,7 +515,7 @@ static void prep_bilin_scaled_c(coef *tmp,
}
static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h)
const int16_t *tmp1, const int16_t *tmp2, const int w, int h)
{
do {
for (int x = 0; x < w; x++)
......@@ -528,7 +528,7 @@ static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
}
static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
const int weight)
{
do {
......@@ -543,7 +543,7 @@ static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
}
static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
const uint8_t *mask)
{
do {
......@@ -601,7 +601,7 @@ static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
}
static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
uint8_t *mask, const int sign,
const int ss_hor, const int ss_ver)
{
......@@ -642,7 +642,7 @@ static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
#define w_mask_fns(ssn, ss_hor, ss_ver) \
static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \
const coef *const tmp1, const coef *const tmp2, \
const int16_t *const tmp1, const int16_t *const tmp2, \
const int w, const int h, uint8_t *mask, \
const int sign) \
{ \
......@@ -675,7 +675,7 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride,
const int16_t *const abcd, int mx, int my)
{
coef mid[15 * 8], *mid_ptr = mid;
int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride);
for (int y = 0; y < 15; y++, mx += abcd[1]) {
......@@ -702,11 +702,11 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
}
}
static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride,
static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride,
const pixel *src, const ptrdiff_t src_stride,
const int16_t *const abcd, int mx, int my)
{
coef mid[15 * 8], *mid_ptr = mid;
int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride);
for (int y = 0; y < 15; y++, mx += abcd[1]) {
......
......@@ -493,7 +493,7 @@ void bytefn(dav1d_read_coef_blocks)(Dav1dTileContext *const t,
}
static int mc(Dav1dTileContext *const t,
pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride,
pixel *const dst8, int16_t *const dst16, const ptrdiff_t dst_stride,
const int bw4, const int bh4,
const int bx, const int by, const int pl,
const mv mv, const Dav1dThreadPicture *const refp, const int refidx,
......@@ -671,7 +671,7 @@ static int obmc(Dav1dTileContext *const t,
}
static int warp_affine(Dav1dTileContext *const t,
pixel *dst8, coef *dst16, const ptrdiff_t dstride,
pixel *dst8, int16_t *dst16, const ptrdiff_t dstride,
const uint8_t *const b_dim, const int pl,
const Dav1dThreadPicture *const refp,
const Dav1dWarpedMotionParams *const wmp)
......@@ -1357,7 +1357,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
} else {
const enum Filter2d filter_2d = b->filter2d;
// Maximum super block size is 128x128
coef (*tmp)[128 * 128] = (coef (*)[128 * 128]) t->scratch.compinter;
int16_t (*tmp)[128 * 128] = (int16_t (*)[128 * 128]) t->scratch.compinter;
int jnt_weight;
uint8_t *const seg_mask = t->scratch_seg_mask;
const uint8_t *mask;
......@@ -1372,8 +1372,6 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
b->mv[i], refp, b->ref[i], filter_2d);
if (DEBUG_BLOCK_INFO)
coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");
if (res) return res;
}
}
......
......@@ -80,14 +80,14 @@ static void check_mc(Dav1dMCDSPContext *const c) {
static void check_mct(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(pixel, src_buf, 135 * 135,);
ALIGN_STK_32(coef, c_tmp, 128 * 128,);
ALIGN_STK_32(coef, a_tmp, 128 * 128,);
ALIGN_STK_32(int16_t, c_tmp, 128 * 128,);
ALIGN_STK_32(int16_t, a_tmp, 128 * 128,);
const pixel *src = src_buf + 135 * 3 + 3;
for (int i = 0; i < 135 * 135; i++)
src_buf[i] = rand();
declare_func(void, coef *tmp, const pixel *src, ptrdiff_t src_stride,
declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
int w, int h, int mx, int my);
for (int filter = 0; filter < N_2D_FILTERS; filter++)
......@@ -113,7 +113,7 @@ static void check_mct(Dav1dMCDSPContext *const c) {
}
static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
coef (*const tmp)[128 * 128])
int16_t (*const tmp)[128 * 128])
{
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 135 * 135; j++)
......@@ -125,14 +125,14 @@ static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
}
static void check_avg(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]);
ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,
const coef *tmp2, int w, int h);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const int16_t *tmp2, int w, int h);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH))
......@@ -149,14 +149,14 @@ static void check_avg(Dav1dMCDSPContext *const c) {
}
static void check_w_avg(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]);
ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,
const coef *tmp2, int w, int h, int weight);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const int16_t *tmp2, int w, int h, int weight);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH))
......@@ -175,7 +175,7 @@ static void check_w_avg(Dav1dMCDSPContext *const c) {
}
static void check_mask(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]);
ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
ALIGN_STK_32(uint8_t, mask, 128 * 128,);
......@@ -184,8 +184,8 @@ static void check_mask(Dav1dMCDSPContext *const c) {
for (int i = 0; i < 128 * 128; i++)
mask[i] = rand() % 65;
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,
const coef *tmp2, int w, int h, const uint8_t *mask);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const int16_t *tmp2, int w, int h, const uint8_t *mask);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH))
......@@ -202,7 +202,7 @@ static void check_mask(Dav1dMCDSPContext *const c) {
}
static void check_w_mask(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]);
ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
ALIGN_STK_32(uint8_t, c_mask, 128 * 128,);
......@@ -210,8 +210,8 @@ static void check_w_mask(Dav1dMCDSPContext *const c) {
init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,
const coef *tmp2, int w, int h, uint8_t *mask, int sign);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const int16_t *tmp2, int w, int h, uint8_t *mask, int sign);
static const uint16_t ss[] = { 444, 422, 420 };
......@@ -360,13 +360,13 @@ static void check_warp8x8(Dav1dMCDSPContext *const c) {
static void check_warp8x8t(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(pixel, src_buf, 15 * 15,);
ALIGN_STK_32(coef, c_tmp, 8 * 8,);
ALIGN_STK_32(coef, a_tmp, 8 * 8,);
ALIGN_STK_32(int16_t, c_tmp, 8 * 8,);
ALIGN_STK_32(int16_t, a_tmp, 8 * 8,);
int16_t abcd[4];
const pixel *src = src_buf + 15 * 3 + 3;
const ptrdiff_t src_stride = 15 * sizeof(pixel);
declare_func(void, coef *tmp, ptrdiff_t tmp_stride, const pixel *src,
declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
ptrdiff_t src_stride, const int16_t *abcd, int mx, int my);
if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment