Commit 3aac0252 authored by Martin Storsjö's avatar Martin Storsjö Committed by Jean-Baptiste Kempf
Browse files

arm64: filmgrain16: Guard against out of range pixels in the gather function

In 16 bpc, the pixels are 16 bit integers, but valid pixels only
are up to 12 bits, and the scaling buffer only contains 4096
elements.

The src pixels are, normally, supposed to be valid pixels, but when
processing blocks of 32 pixels at a time, it can operate on
uninitialized pixels past the right edge.

Before:               Cortex A53      A72      A73  Apple M1
fgy_32x32xn_16bpc_neon:  10372.5   8194.4   8612.1  24.2
After:
fgy_32x32xn_16bpc_neon:  10837.9   8469.5   8885.1   24.6
parent 1cf1b309
Pipeline #95994 passed with stages
in 15 minutes and 27 seconds
......@@ -188,6 +188,7 @@ function fgy_loop_neon
.macro fgy ox, oy
L(loop_\ox\oy):
1:
mov w16, #0xfff
ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x1], x2 // src
.if \ox
ld1 {v20.4h}, [x4], x9 // grain_lut old
......@@ -198,8 +199,15 @@ L(loop_\ox\oy):
.if \ox && \oy
ld1 {v14.4h}, [x8], x9 // grain_lut top old
.endif
dup v4.8h, w16
ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x5], x9 // grain_lut
// Make sure that uninitialized pixels out of range past the right
// edge are in range; their actual values shouldn't matter.
and v0.16b, v0.16b, v4.16b
and v1.16b, v1.16b, v4.16b
and v2.16b, v2.16b, v4.16b
and v3.16b, v3.16b, v4.16b
bl gather_neon
.if \ox
......
......@@ -183,10 +183,6 @@ static void check_fgy_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
generate_scaling(bitdepth_from_max(bitdepth_max), fg_data[0].y_points,
fg_data[0].num_y_points, scaling);
for (int y = 0; y < 32; y++)
for (int x = 0; x < 128; x++)
src[y * PXSTRIDE(stride) + x] = rnd() & bitdepth_max;
fg_data[0].clip_to_restricted_range = rnd() & 1;
fg_data[0].scaling_shift = (rnd() & 3) + 8;
for (fg_data[0].overlap_flag = 0; fg_data[0].overlap_flag <= 1;
......@@ -204,6 +200,14 @@ static void check_fgy_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
row_num = rnd() & 0x7ff;
}
for (int y = 0; y < 32; y++) {
// Src pixels past the right edge can be uninitialized
for (int x = 0; x < 128; x++)
src[y * PXSTRIDE(stride) + x] = rnd();
for (int x = 0; x < w; x++)
src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
}
CLEAR_PIXEL_RECT(c_dst);
CLEAR_PIXEL_RECT(a_dst);
call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut, h,
......@@ -275,12 +279,6 @@ static void check_fguv_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
dsp->generate_grain_uv[layout_idx](grain_lut[1], grain_lut[0],
fg_data, uv_pl HIGHBD_TAIL_SUFFIX);
for (int y = 0; y < 32; y++)
for (int x = 0; x < 128; x++)
src[y * PXSTRIDE(stride) + x] = rnd() & bitdepth_max;
for (int y = 0; y < 32; y++)
for (int x = 0; x < 128; x++)
luma_src[y * PXSTRIDE(lstride) + x] = rnd() & bitdepth_max;
if (csfl) {
fg_data[0].num_y_points = 2 + (rnd() % 13);
const int pad = 0xff / fg_data[0].num_y_points;
......@@ -325,6 +323,18 @@ static void check_fguv_sbrow(const Dav1dFilmGrainDSPContext *const dsp) {
row_num = rnd() & 0x7ff;
}
for (int y = 0; y < 32; y++) {
// Src pixels past the right edge can be uninitialized
for (int x = 0; x < 128; x++) {
src[y * PXSTRIDE(stride) + x] = rnd();
luma_src[y * PXSTRIDE(lstride) + x] = rnd();
}
for (int x = 0; x < w; x++)
src[y * PXSTRIDE(stride) + x] &= bitdepth_max;
for (int x = 0; x < (w << ss_x); x++)
luma_src[y * PXSTRIDE(lstride) + x] &= bitdepth_max;
}
CLEAR_PIXEL_RECT(c_dst);
CLEAR_PIXEL_RECT(a_dst);
call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut[1], h,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment