Commit 2f251bd1 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Add a max_width/height argument to angular_ipred_fn

This is used in z2 to limit the number of pixels over which the
filter is applied, as per "numPx" in 7.11.2.4 point 4 in the AV1
specification. This only applies to z2, because in z1/3, the edge
filter is (incomprehensibly) lengtened by the opposite side's edge
length, which undoes the limit on the filter length (like a bug
undoing another bug).

I admit the code is getting rather complex, so we may want to
redesign this to make writing SIMD easier.
parent 4b0683a6
......@@ -41,7 +41,7 @@
*/
#define decl_angular_ipred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
int width, int height, int angle)
int width, int height, int angle, int max_width, int max_height)
typedef decl_angular_ipred_fn(*angular_ipred_fn);
/*
......
......@@ -83,8 +83,7 @@ cfl_pred(pixel *dst, const ptrdiff_t stride,
}
}
static unsigned dc_gen_top(const pixel *const topleft, const int width)
{
static unsigned dc_gen_top(const pixel *const topleft, const int width) {
unsigned dc = width >> 1;
for (int i = 0; i < width; i++)
dc += topleft[1 + i];
......@@ -93,7 +92,8 @@ static unsigned dc_gen_top(const pixel *const topleft, const int width)
static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen_top(topleft, width));
}
......@@ -106,8 +106,7 @@ static void ipred_cfl_top_c(pixel *dst, const ptrdiff_t stride,
cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha);
}
static unsigned dc_gen_left(const pixel *const topleft, const int height)
{
static unsigned dc_gen_left(const pixel *const topleft, const int height) {
unsigned dc = height >> 1;
for (int i = 0; i < height; i++)
dc += topleft[-(1 + i)];
......@@ -116,7 +115,8 @@ static unsigned dc_gen_left(const pixel *const topleft, const int height)
static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen_left(topleft, height));
}
......@@ -140,8 +140,8 @@ static void ipred_cfl_left_c(pixel *dst, const ptrdiff_t stride,
#define BASE_SHIFT 17
#endif
static unsigned
dc_gen(const pixel *const topleft, const int width, const int height)
static unsigned dc_gen(const pixel *const topleft,
const int width, const int height)
{
unsigned dc = (width + height) >> 1;
for (int i = 0; i < width; i++)
......@@ -160,7 +160,8 @@ dc_gen(const pixel *const topleft, const int width, const int height)
static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen(topleft, width, height));
}
......@@ -180,7 +181,8 @@ static void ipred_cfl_c(pixel *dst, const ptrdiff_t stride,
static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1));
}
......@@ -195,7 +197,8 @@ static void ipred_cfl_128_c(pixel *dst, const ptrdiff_t stride,
static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
for (int y = 0; y < height; y++) {
pixel_copy(dst, topleft + 1, width);
......@@ -205,7 +208,8 @@ static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
static void ipred_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
for (int y = 0; y < height; y++) {
pixel_set(dst, topleft[-(1 + y)], width);
......@@ -215,7 +219,8 @@ static void ipred_h_c(pixel *dst, const ptrdiff_t stride,
static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const tl_ptr,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
const int topleft = tl_ptr[0];
for (int y = 0; y < height; y++) {
......@@ -236,7 +241,8 @@ static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride,
static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
......@@ -256,7 +262,8 @@ static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride,
static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
const int bottom = topleft[-height];
......@@ -273,7 +280,8 @@ static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride,
static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a)
const int width, const int height, const int a,
const int max_width, const int max_height)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const int right = topleft[width];
......@@ -328,7 +336,9 @@ static int get_filter_strength(const unsigned blk_wh, const unsigned d,
return strength;
}
static void filter_edge(pixel *const out, const int sz, const pixel *const in,
static void filter_edge(pixel *const out, const int sz,
const int lim_from, const int lim_to,
const pixel *const in,
const int from, const int to, const unsigned strength)
{
static const uint8_t kernel[3][5] = {
......@@ -338,12 +348,17 @@ static void filter_edge(pixel *const out, const int sz, const pixel *const in,
};
assert(strength > 0);
for (int i = 0; i < sz; i++) {
int i = 0;
for (; i < lim_from; i++)
out[i] = in[iclip(i, from, to - 1)];
for (; i < imin(lim_to, sz); i++) {
int s = 0;
for (int j = 0; j < 5; j++)
s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j];
out[i] = (s + 8) >> 4;
}
for (; i < sz; i++)
out[i] = in[iclip(i, from, to - 1)];
}
static int get_upsample(const int blk_wh, const unsigned d, const int type) {
......@@ -369,7 +384,8 @@ static void upsample_edge(pixel *const out, const int hsz,
static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle)
const int width, const int height, int angle,
const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
......@@ -389,7 +405,7 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
get_filter_strength(width + height, 90 - angle, is_sm);
if (filter_strength) {
filter_edge(top_out, width + height,
filter_edge(top_out, width + height, 0, width + height,
&topleft_in[1], -1, width + imin(width, height),
filter_strength);
top = top_out;
......@@ -421,7 +437,8 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle)
const int width, const int height, int angle,
const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
......@@ -440,7 +457,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
get_filter_strength(width + height, angle - 90, is_sm);
if (filter_strength) {
filter_edge(&topleft[1], width, &topleft_in[1], -1, width,
filter_edge(&topleft[1], width, 0, max_width,
&topleft_in[1], -1, width,
filter_strength);
} else {
pixel_copy(&topleft[1], &topleft_in[1], width);
......@@ -453,7 +471,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
get_filter_strength(width + height, 180 - angle, is_sm);
if (filter_strength) {
filter_edge(&topleft[-height], height, &topleft_in[-height],
filter_edge(&topleft[-height], height, height - max_height, height,
&topleft_in[-height],
0, height + 1, filter_strength);
} else {
pixel_copy(&topleft[-height], &topleft_in[-height], height);
......@@ -492,7 +511,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle)
const int width, const int height, int angle,
const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
......@@ -513,7 +533,7 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
get_filter_strength(width + height, angle - 180, is_sm);
if (filter_strength) {
filter_edge(left_out, width + height,
filter_edge(left_out, width + height, 0, width + height,
&topleft_in[-(width + height)],
imax(width - height, 0), width + height + 1,
filter_strength);
......@@ -548,7 +568,8 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
/* Up to 32x32 only */
static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int filt_idx)
const int width, const int height, int filt_idx,
const int max_width, const int max_height)
{
filt_idx &= 511;
assert(filt_idx < 5);
......
......@@ -766,7 +766,9 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
t_dim->w, t_dim->h, edge);
dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,
t_dim->w * 4, t_dim->h * 4,
angle | sm_fl);
angle | sm_fl,
f->cur.p.p.w - 4 * t->bx,
f->cur.p.p.h - 4 * t->by);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
......@@ -981,7 +983,11 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
dsp->ipred.intra_pred[m](dst, stride, edge,
uv_t_dim->w * 4,
uv_t_dim->h * 4,
angle | sm_uv_fl);
angle | sm_uv_fl,
(f->cur.p.p.w + ss_hor -
4 * (t->bx & ~ss_hor)) >> ss_hor,
(f->cur.p.p.w + ss_ver -
4 * (t->by & ~ss_ver)) >> ss_ver);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
uv_t_dim->h * 4, 2, "l");
......@@ -1136,7 +1142,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
0, dst, f->cur.p.stride[0], top_sb_edge,
m, &angle, bw4, bh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
tl_edge, bw4 * 4, bh4 * 4, 0);
tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
const uint8_t *const ii_mask =
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
......@@ -1273,7 +1279,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize
top_sb_edge, m,
&angle, cbw4, cbh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
tl_edge, cbw4 * 4, cbh4 * 4, 0);
tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask);
}
......
......@@ -68,7 +68,7 @@ static void check_intra_pred(Dav1dIntraPredDSPContext *const c) {
pixel *const topleft = topleft_buf + 128;
declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
int width, int height, int angle);
int width, int height, int angle, int max_width, int max_height);
for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++)
for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1)
......@@ -89,12 +89,13 @@ static void check_intra_pred(Dav1dIntraPredDSPContext *const c) {
for (int i = -h * 2; i <= w * 2; i++)
topleft[i] = rand() & ((1 << BITDEPTH) - 1);
call_ref(c_dst, stride, topleft, w, h, a);
call_new(a_dst, stride, topleft, w, h, a);
const int maxw = 1 + (rand() % 128), maxh = 1 + (rand() % 128);
call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh);
call_new(a_dst, stride, topleft, w, h, a, maxw, maxh);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
bench_new(a_dst, stride, topleft, w, h, a);
bench_new(a_dst, stride, topleft, w, h, a, 128, 128);
}
}
report("intra_pred");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment