Commit b12229cc authored by Nathan Egge's avatar Nathan Egge Committed by Jean-Baptiste Kempf

Round and clip with one step, mc_8tap_regular_h_c

Relative speed-ups compared with gcc-9.2.0:

                                  Before     After
mc_8tap_regular_w2_h_16bpc_c:      276.6     219.9
mc_8tap_regular_w4_h_16bpc_c:      489.5     374.5
mc_8tap_regular_w8_h_16bpc_c:      897.7     686.8
mc_8tap_regular_w16_h_16bpc_c:    2573.7    2314.2
mc_8tap_regular_w32_h_16bpc_c:    7647.3    7012.4
mc_8tap_regular_w64_h_16bpc_c:   28163.8   25057.4
mc_8tap_regular_w128_h_16bpc_c:  77678.4   73570.0
parent 0bd57c6b
Pipeline #56591 passed with stages
in 3 minutes and 59 seconds
......@@ -87,9 +87,15 @@ prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,
#define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \
((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh))
#define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \
((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh))
#define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \
iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh))
#define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \
iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh))
#define GET_H_FILTER(mx) \
const int8_t *const fh = !(mx) ? NULL : w > 4 ? \
dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \
......@@ -111,7 +117,7 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
const int filter_type HIGHBD_DECL_SUFFIX)
{
const int intermediate_bits = get_intermediate_bits(bitdepth_max);
const int intermediate_rnd = (1 << intermediate_bits) >> 1;
const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1);
GET_FILTERS();
dst_stride = PXSTRIDE(dst_stride);
......@@ -144,9 +150,8 @@ put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
} else {
do {
for (int x = 0; x < w; x++) {
const int px = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
6 - intermediate_bits);
dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits);
dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1,
intermediate_rnd, 6);
}
dst += dst_stride;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment