Commit 2e6c8a92 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Change type of MC intermediates from coef to int16_t

Coef was originally chosen to accomodate 12 bits/component with
4 extra precision intermediates + some under/overflow range, but
it turns out that 12 bits/component only uses 2 extra precision
intermediates, so we don't need coef.
parent 012fced2
...@@ -278,7 +278,7 @@ struct Dav1dTileContext { ...@@ -278,7 +278,7 @@ struct Dav1dTileContext {
uint8_t *pal_idx; uint8_t *pal_idx;
int16_t *ac; int16_t *ac;
pixel *interintra, *lap; pixel *interintra, *lap;
coef *compinter; int16_t *compinter;
} scratch; } scratch;
ALIGN(uint8_t scratch_seg_mask[128 * 128], 32); ALIGN(uint8_t scratch_seg_mask[128 * 128], 32);
......
...@@ -124,7 +124,7 @@ int dav1d_open(Dav1dContext **const c_out, ...@@ -124,7 +124,7 @@ int dav1d_open(Dav1dContext **const c_out,
t->f = f; t->f = f;
t->cf = dav1d_alloc_aligned(32 * 32 * sizeof(int32_t), 32); t->cf = dav1d_alloc_aligned(32 * 32 * sizeof(int32_t), 32);
if (!t->cf) goto error; if (!t->cf) goto error;
t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 8, 32); t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 4, 32);
if (!t->scratch.mem) goto error; if (!t->scratch.mem) goto error;
memset(t->cf, 0, 32 * 32 * sizeof(int32_t)); memset(t->cf, 0, 32 * 32 * sizeof(int32_t));
t->emu_edge = t->emu_edge =
......
...@@ -54,40 +54,40 @@ void (name)(pixel *dst, ptrdiff_t dst_stride, \ ...@@ -54,40 +54,40 @@ void (name)(pixel *dst, ptrdiff_t dst_stride, \
typedef decl_warp8x8_fn(*warp8x8_fn); typedef decl_warp8x8_fn(*warp8x8_fn);
#define decl_mct_fn(name) \ #define decl_mct_fn(name) \
void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \ void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my) int w, int h, int mx, int my)
typedef decl_mct_fn(*mct_fn); typedef decl_mct_fn(*mct_fn);
#define decl_mct_scaled_fn(name) \ #define decl_mct_scaled_fn(name) \
void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \ void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy) int w, int h, int mx, int my, int dx, int dy)
typedef decl_mct_scaled_fn(*mct_scaled_fn); typedef decl_mct_scaled_fn(*mct_scaled_fn);
#define decl_warp8x8t_fn(name) \ #define decl_warp8x8t_fn(name) \
void (name)(coef *tmp, const ptrdiff_t tmp_stride, \ void (name)(int16_t *tmp, const ptrdiff_t tmp_stride, \
const pixel *src, ptrdiff_t src_stride, \ const pixel *src, ptrdiff_t src_stride, \
const int16_t *abcd, int mx, int my) const int16_t *abcd, int mx, int my)
typedef decl_warp8x8t_fn(*warp8x8t_fn); typedef decl_warp8x8t_fn(*warp8x8t_fn);
#define decl_avg_fn(name) \ #define decl_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \ void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h) const int16_t *tmp1, const int16_t *tmp2, int w, int h)
typedef decl_avg_fn(*avg_fn); typedef decl_avg_fn(*avg_fn);
#define decl_w_avg_fn(name) \ #define decl_w_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \ void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, int weight) const int16_t *tmp1, const int16_t *tmp2, int w, int h, int weight)
typedef decl_w_avg_fn(*w_avg_fn); typedef decl_w_avg_fn(*w_avg_fn);
#define decl_mask_fn(name) \ #define decl_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \ void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, \ const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
const uint8_t *mask) const uint8_t *mask)
typedef decl_mask_fn(*mask_fn); typedef decl_mask_fn(*mask_fn);
#define decl_w_mask_fn(name) \ #define decl_w_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \ void (name)(pixel *dst, ptrdiff_t dst_stride, \
const coef *tmp1, const coef *tmp2, int w, int h, \ const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
uint8_t *mask, int sign) uint8_t *mask, int sign)
typedef decl_w_mask_fn(*w_mask_fn); typedef decl_w_mask_fn(*w_mask_fn);
......
...@@ -50,7 +50,7 @@ put_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -50,7 +50,7 @@ put_c(pixel *dst, const ptrdiff_t dst_stride,
} }
static NOINLINE void static NOINLINE void
prep_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride, prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,
const int w, int h) const int w, int h)
{ {
do { do {
...@@ -105,7 +105,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride, ...@@ -105,7 +105,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
if (fh) { if (fh) {
if (fv) { if (fv) {
int tmp_h = h + 7; int tmp_h = h + 7;
coef mid[128 * 135], *mid_ptr = mid; int16_t mid[128 * 135], *mid_ptr = mid;
src -= src_stride * 3; src -= src_stride * 3;
do { do {
...@@ -154,7 +154,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -154,7 +154,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
const int dx, const int dy, const int filter_type) const int dx, const int dy, const int filter_type)
{ {
int tmp_h = (((h - 1) * dy + my) >> 10) + 8; int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
coef mid[128 * (256 + 7)], *mid_ptr = mid; int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride); src_stride = PXSTRIDE(src_stride);
src -= src_stride * 3; src -= src_stride * 3;
...@@ -191,7 +191,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -191,7 +191,7 @@ put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
} }
static NOINLINE void static NOINLINE void
prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride, prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my, const int w, int h, const int mx, const int my,
const int filter_type) const int filter_type)
{ {
...@@ -201,7 +201,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride, ...@@ -201,7 +201,7 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
if (fh) { if (fh) {
if (fv) { if (fv) {
int tmp_h = h + 7; int tmp_h = h + 7;
coef mid[128 * 135], *mid_ptr = mid; int16_t mid[128 * 135], *mid_ptr = mid;
src -= src_stride * 3; src -= src_stride * 3;
do { do {
...@@ -242,12 +242,12 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride, ...@@ -242,12 +242,12 @@ prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
} }
static NOINLINE void static NOINLINE void
prep_8tap_scaled_c(coef *tmp, const pixel *src, ptrdiff_t src_stride, prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my, const int w, int h, const int mx, int my,
const int dx, const int dy, const int filter_type) const int dx, const int dy, const int filter_type)
{ {
int tmp_h = (((h - 1) * dy + my) >> 10) + 8; int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
coef mid[128 * (256 + 7)], *mid_ptr = mid; int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride); src_stride = PXSTRIDE(src_stride);
src -= src_stride * 3; src -= src_stride * 3;
...@@ -304,7 +304,7 @@ static void put_8tap_##type##_scaled_c(pixel *const dst, \ ...@@ -304,7 +304,7 @@ static void put_8tap_##type##_scaled_c(pixel *const dst, \
put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
type_h | (type_v << 2)); \ type_h | (type_v << 2)); \
} \ } \
static void prep_8tap_##type##_c(coef *const tmp, \ static void prep_8tap_##type##_c(int16_t *const tmp, \
const pixel *const src, \ const pixel *const src, \
const ptrdiff_t src_stride, \ const ptrdiff_t src_stride, \
const int w, const int h, \ const int w, const int h, \
...@@ -313,7 +313,7 @@ static void prep_8tap_##type##_c(coef *const tmp, \ ...@@ -313,7 +313,7 @@ static void prep_8tap_##type##_c(coef *const tmp, \
prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
type_h | (type_v << 2)); \ type_h | (type_v << 2)); \
} \ } \
static void prep_8tap_##type##_scaled_c(coef *const tmp, \ static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \
const pixel *const src, \ const pixel *const src, \
const ptrdiff_t src_stride, \ const ptrdiff_t src_stride, \
const int w, const int h, \ const int w, const int h, \
...@@ -352,7 +352,7 @@ static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, ...@@ -352,7 +352,7 @@ static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride,
if (mx) { if (mx) {
if (my) { if (my) {
coef mid[128 * 129], *mid_ptr = mid; int16_t mid[128 * 129], *mid_ptr = mid;
int tmp_h = h + 1; int tmp_h = h + 1;
do { do {
...@@ -398,7 +398,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, ...@@ -398,7 +398,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
const int dx, const int dy) const int dx, const int dy)
{ {
int tmp_h = (((h - 1) * dy + my) >> 10) + 2; int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
coef mid[128 * (256 + 1)], *mid_ptr = mid; int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
do { do {
int x; int x;
...@@ -429,7 +429,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, ...@@ -429,7 +429,7 @@ static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
} while (--h); } while (--h);
} }
static void prep_bilin_c(coef *tmp, static void prep_bilin_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my) const int w, int h, const int mx, const int my)
{ {
...@@ -437,7 +437,7 @@ static void prep_bilin_c(coef *tmp, ...@@ -437,7 +437,7 @@ static void prep_bilin_c(coef *tmp,
if (mx) { if (mx) {
if (my) { if (my) {
coef mid[128 * 129], *mid_ptr = mid; int16_t mid[128 * 129], *mid_ptr = mid;
int tmp_h = h + 1; int tmp_h = h + 1;
do { do {
...@@ -477,13 +477,13 @@ static void prep_bilin_c(coef *tmp, ...@@ -477,13 +477,13 @@ static void prep_bilin_c(coef *tmp,
prep_c(tmp, src, src_stride, w, h); prep_c(tmp, src, src_stride, w, h);
} }
static void prep_bilin_scaled_c(coef *tmp, static void prep_bilin_scaled_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my, const int w, int h, const int mx, int my,
const int dx, const int dy) const int dx, const int dy)
{ {
int tmp_h = (((h - 1) * dy + my) >> 10) + 2; int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
coef mid[128 * (256 + 1)], *mid_ptr = mid; int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
do { do {
int x; int x;
...@@ -515,7 +515,7 @@ static void prep_bilin_scaled_c(coef *tmp, ...@@ -515,7 +515,7 @@ static void prep_bilin_scaled_c(coef *tmp,
} }
static void avg_c(pixel *dst, const ptrdiff_t dst_stride, static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h) const int16_t *tmp1, const int16_t *tmp2, const int w, int h)
{ {
do { do {
for (int x = 0; x < w; x++) for (int x = 0; x < w; x++)
...@@ -528,7 +528,7 @@ static void avg_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -528,7 +528,7 @@ static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
} }
static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h, const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
const int weight) const int weight)
{ {
do { do {
...@@ -543,7 +543,7 @@ static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -543,7 +543,7 @@ static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
} }
static void mask_c(pixel *dst, const ptrdiff_t dst_stride, static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h, const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
const uint8_t *mask) const uint8_t *mask)
{ {
do { do {
...@@ -601,7 +601,7 @@ static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, ...@@ -601,7 +601,7 @@ static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
} }
static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h, const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
uint8_t *mask, const int sign, uint8_t *mask, const int sign,
const int ss_hor, const int ss_ver) const int ss_hor, const int ss_ver)
{ {
...@@ -642,7 +642,7 @@ static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -642,7 +642,7 @@ static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
#define w_mask_fns(ssn, ss_hor, ss_ver) \ #define w_mask_fns(ssn, ss_hor, ss_ver) \
static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \
const coef *const tmp1, const coef *const tmp2, \ const int16_t *const tmp1, const int16_t *const tmp2, \
const int w, const int h, uint8_t *mask, \ const int w, const int h, uint8_t *mask, \
const int sign) \ const int sign) \
{ \ { \
...@@ -675,7 +675,7 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -675,7 +675,7 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride, const pixel *src, const ptrdiff_t src_stride,
const int16_t *const abcd, int mx, int my) const int16_t *const abcd, int mx, int my)
{ {
coef mid[15 * 8], *mid_ptr = mid; int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride); src -= 3 * PXSTRIDE(src_stride);
for (int y = 0; y < 15; y++, mx += abcd[1]) { for (int y = 0; y < 15; y++, mx += abcd[1]) {
...@@ -702,11 +702,11 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -702,11 +702,11 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
} }
} }
static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride, static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride,
const pixel *src, const ptrdiff_t src_stride, const pixel *src, const ptrdiff_t src_stride,
const int16_t *const abcd, int mx, int my) const int16_t *const abcd, int mx, int my)
{ {
coef mid[15 * 8], *mid_ptr = mid; int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride); src -= 3 * PXSTRIDE(src_stride);
for (int y = 0; y < 15; y++, mx += abcd[1]) { for (int y = 0; y < 15; y++, mx += abcd[1]) {
......
...@@ -493,7 +493,7 @@ void bytefn(dav1d_read_coef_blocks)(Dav1dTileContext *const t, ...@@ -493,7 +493,7 @@ void bytefn(dav1d_read_coef_blocks)(Dav1dTileContext *const t,
} }
static int mc(Dav1dTileContext *const t, static int mc(Dav1dTileContext *const t,
pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride, pixel *const dst8, int16_t *const dst16, const ptrdiff_t dst_stride,
const int bw4, const int bh4, const int bw4, const int bh4,
const int bx, const int by, const int pl, const int bx, const int by, const int pl,
const mv mv, const Dav1dThreadPicture *const refp, const int refidx, const mv mv, const Dav1dThreadPicture *const refp, const int refidx,
...@@ -671,7 +671,7 @@ static int obmc(Dav1dTileContext *const t, ...@@ -671,7 +671,7 @@ static int obmc(Dav1dTileContext *const t,
} }
static int warp_affine(Dav1dTileContext *const t, static int warp_affine(Dav1dTileContext *const t,
pixel *dst8, coef *dst16, const ptrdiff_t dstride, pixel *dst8, int16_t *dst16, const ptrdiff_t dstride,
const uint8_t *const b_dim, const int pl, const uint8_t *const b_dim, const int pl,
const Dav1dThreadPicture *const refp, const Dav1dThreadPicture *const refp,
const Dav1dWarpedMotionParams *const wmp) const Dav1dWarpedMotionParams *const wmp)
...@@ -1357,7 +1357,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize ...@@ -1357,7 +1357,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
} else { } else {
const enum Filter2d filter_2d = b->filter2d; const enum Filter2d filter_2d = b->filter2d;
// Maximum super block size is 128x128 // Maximum super block size is 128x128
coef (*tmp)[128 * 128] = (coef (*)[128 * 128]) t->scratch.compinter; int16_t (*tmp)[128 * 128] = (int16_t (*)[128 * 128]) t->scratch.compinter;
int jnt_weight; int jnt_weight;
uint8_t *const seg_mask = t->scratch_seg_mask; uint8_t *const seg_mask = t->scratch_seg_mask;
const uint8_t *mask; const uint8_t *mask;
...@@ -1372,8 +1372,6 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize ...@@ -1372,8 +1372,6 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
} else { } else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0, res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
b->mv[i], refp, b->ref[i], filter_2d); b->mv[i], refp, b->ref[i], filter_2d);
if (DEBUG_BLOCK_INFO)
coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");
if (res) return res; if (res) return res;
} }
} }
......
...@@ -80,14 +80,14 @@ static void check_mc(Dav1dMCDSPContext *const c) { ...@@ -80,14 +80,14 @@ static void check_mc(Dav1dMCDSPContext *const c) {
static void check_mct(Dav1dMCDSPContext *const c) { static void check_mct(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(pixel, src_buf, 135 * 135,); ALIGN_STK_32(pixel, src_buf, 135 * 135,);
ALIGN_STK_32(coef, c_tmp, 128 * 128,); ALIGN_STK_32(int16_t, c_tmp, 128 * 128,);
ALIGN_STK_32(coef, a_tmp, 128 * 128,); ALIGN_STK_32(int16_t, a_tmp, 128 * 128,);
const pixel *src = src_buf + 135 * 3 + 3; const pixel *src = src_buf + 135 * 3 + 3;
for (int i = 0; i < 135 * 135; i++) for (int i = 0; i < 135 * 135; i++)
src_buf[i] = rand(); src_buf[i] = rand();
declare_func(void, coef *tmp, const pixel *src, ptrdiff_t src_stride, declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
int w, int h, int mx, int my); int w, int h, int mx, int my);
for (int filter = 0; filter < N_2D_FILTERS; filter++) for (int filter = 0; filter < N_2D_FILTERS; filter++)
...@@ -113,7 +113,7 @@ static void check_mct(Dav1dMCDSPContext *const c) { ...@@ -113,7 +113,7 @@ static void check_mct(Dav1dMCDSPContext *const c) {
} }
static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf, static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
coef (*const tmp)[128 * 128]) int16_t (*const tmp)[128 * 128])
{ {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
for (int j = 0; j < 135 * 135; j++) for (int j = 0; j < 135 * 135; j++)
...@@ -125,14 +125,14 @@ static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf, ...@@ -125,14 +125,14 @@ static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
} }
static void check_avg(Dav1dMCDSPContext *const c) { static void check_avg(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]); ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,); ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,); ALIGN_STK_32(pixel, a_dst, 128 * 128,);
init_tmp(c, c_dst, tmp); init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1, declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const coef *tmp2, int w, int h); const int16_t *tmp2, int w, int h);
for (int w = 4; w <= 128; w <<= 1) for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH))
...@@ -149,14 +149,14 @@ static void check_avg(Dav1dMCDSPContext *const c) { ...@@ -149,14 +149,14 @@ static void check_avg(Dav1dMCDSPContext *const c) {
} }
static void check_w_avg(Dav1dMCDSPContext *const c) { static void check_w_avg(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]); ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,); ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,); ALIGN_STK_32(pixel, a_dst, 128 * 128,);
init_tmp(c, c_dst, tmp); init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1, declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const coef *tmp2, int w, int h, int weight); const int16_t *tmp2, int w, int h, int weight);
for (int w = 4; w <= 128; w <<= 1) for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH))
...@@ -175,7 +175,7 @@ static void check_w_avg(Dav1dMCDSPContext *const c) { ...@@ -175,7 +175,7 @@ static void check_w_avg(Dav1dMCDSPContext *const c) {
} }
static void check_mask(Dav1dMCDSPContext *const c) { static void check_mask(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]); ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,); ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,); ALIGN_STK_32(pixel, a_dst, 128 * 128,);
ALIGN_STK_32(uint8_t, mask, 128 * 128,); ALIGN_STK_32(uint8_t, mask, 128 * 128,);
...@@ -184,8 +184,8 @@ static void check_mask(Dav1dMCDSPContext *const c) { ...@@ -184,8 +184,8 @@ static void check_mask(Dav1dMCDSPContext *const c) {
for (int i = 0; i < 128 * 128; i++) for (int i = 0; i < 128 * 128; i++)
mask[i] = rand() % 65; mask[i] = rand() % 65;
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1, declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const coef *tmp2, int w, int h, const uint8_t *mask); const int16_t *tmp2, int w, int h, const uint8_t *mask);
for (int w = 4; w <= 128; w <<= 1) for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH))
...@@ -202,7 +202,7 @@ static void check_mask(Dav1dMCDSPContext *const c) { ...@@ -202,7 +202,7 @@ static void check_mask(Dav1dMCDSPContext *const c) {
} }
static void check_w_mask(Dav1dMCDSPContext *const c) { static void check_w_mask(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(coef, tmp, 2, [128 * 128]); ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);
ALIGN_STK_32(pixel, c_dst, 135 * 135,); ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,); ALIGN_STK_32(pixel, a_dst, 128 * 128,);
ALIGN_STK_32(uint8_t, c_mask, 128 * 128,); ALIGN_STK_32(uint8_t, c_mask, 128 * 128,);
...@@ -210,8 +210,8 @@ static void check_w_mask(Dav1dMCDSPContext *const c) { ...@@ -210,8 +210,8 @@ static void check_w_mask(Dav1dMCDSPContext *const c) {
init_tmp(c, c_dst, tmp); init_tmp(c, c_dst, tmp);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1, declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
const coef *tmp2, int w, int h, uint8_t *mask, int sign); const int16_t *tmp2, int w, int h, uint8_t *mask, int sign);
static const uint16_t ss[] = { 444, 422, 420 }; static const uint16_t ss[] = { 444, 422, 420 };
...@@ -360,13 +360,13 @@ static void check_warp8x8(Dav1dMCDSPContext *const c) { ...@@ -360,13 +360,13 @@ static void check_warp8x8(Dav1dMCDSPContext *const c) {
static void check_warp8x8t(Dav1dMCDSPContext *const c) { static void check_warp8x8t(Dav1dMCDSPContext *const c) {
ALIGN_STK_32(pixel, src_buf, 15 * 15,); ALIGN_STK_32(pixel, src_buf, 15 * 15,);
ALIGN_STK_32(coef, c_tmp, 8 * 8,); ALIGN_STK_32(int16_t, c_tmp, 8 * 8,);
ALIGN_STK_32(coef, a_tmp, 8 * 8,); ALIGN_STK_32(int16_t, a_tmp, 8 * 8,);
int16_t abcd[4]; int16_t abcd[4];
const pixel *src = src_buf + 15 * 3 + 3; const pixel *src = src_buf + 15 * 3 + 3;
const ptrdiff_t src_stride = 15 * sizeof(pixel); const ptrdiff_t src_stride = 15 * sizeof(pixel);
declare_func(void, coef *tmp, ptrdiff_t tmp_stride, const pixel *src, declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
ptrdiff_t src_stride, const int16_t *abcd, int mx, int my); ptrdiff_t src_stride, const int16_t *abcd, int mx, int my);
if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) { if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment