Commit c3980e39 authored by Ronald S. Bultje's avatar Ronald S. Bultje

12 bits/component support

parent 2e6c8a92
Pipeline #3285 passed with stages
in 8 minutes and 1 second
......@@ -34,6 +34,9 @@
#if !defined(BITDEPTH)
typedef void pixel;
typedef void coef;
#define HIGHBD_DECL_SUFFIX /* nothing */
#define HIGHBD_CALL_SUFFIX /* nothing */
#define HIGHBD_TAIL_SUFFIX /* nothing */
#elif BITDEPTH == 8
typedef uint8_t pixel;
typedef int16_t coef;
......@@ -41,28 +44,37 @@ typedef int16_t coef;
#define pixel_set memset
#define iclip_pixel iclip_u8
#define PIX_HEX_FMT "%02x"
#define bytefn(x) x##_8bpc
#define bitfn(x) x##_8bpc
#define PXSTRIDE(x) x
#elif BITDEPTH == 10 || BITDEPTH == 12
#define highbd_only(x)
#define HIGHBD_DECL_SUFFIX /* nothing */
#define HIGHBD_CALL_SUFFIX /* nothing */
#define HIGHBD_TAIL_SUFFIX /* nothing */
#define bitdepth_from_max(x) 8
#elif BITDEPTH == 16
typedef uint16_t pixel;
typedef int32_t coef;
#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
#define iclip_pixel(x) iclip(x, 0, ((1 << BITDEPTH) - 1))
static inline void pixel_set(pixel *const dst, const int val, const int num) {
for (int n = 0; n < num; n++)
dst[n] = val;
}
#define PIX_HEX_FMT "%03x"
#define bytefn(x) x##_16bpc
#if BITDEPTH == 10
#define bitfn(x) x##_10bpc
#else
#define bitfn(x) x##_12bpc
#endif
#define iclip_pixel(x) iclip(x, 0, bitdepth_max)
#define HIGHBD_DECL_SUFFIX , const int bitdepth_max
#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
#define HIGHBD_TAIL_SUFFIX , bitdepth_max
#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
#define bitfn(x) x##_16bpc
#define PXSTRIDE(x) (x >> 1)
#define highbd_only(x) x
#else
#error invalid value for bitdepth
#endif
#define bytefn(x) bitfn(x)
#define bitfn_decls(name, ...) \
name##_8bpc(__VA_ARGS__); \
name##_16bpc(__VA_ARGS__)
#endif /* __DAV1D_COMMON_BITDEPTH_H__ */
......@@ -55,7 +55,7 @@ dav1d_inc_dirs = include_directories(['.', 'include', 'include/dav1d'])
# Bitdepth option
dav1d_bitdepths = get_option('bitdepths')
foreach bitdepth : ['8', '10']
foreach bitdepth : ['8', '16']
cdata.set10('CONFIG_@0@BPC'.format(bitdepth), dav1d_bitdepths.contains(bitdepth))
endforeach
......
......@@ -2,7 +2,7 @@
option('bitdepths',
type: 'array',
choices: ['8', '10'],
choices: ['8', '16'],
description: 'Enable only specified bitdepths')
option('build_asm',
......
......@@ -53,11 +53,11 @@ typedef const void *const_left_pixel_row_2px;
#define decl_cdef_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \
/*const*/ pixel *const top[2], int pri_strength, int sec_strength, \
int dir, int damping, enum CdefEdgeFlags edges)
int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX)
typedef decl_cdef_fn(*cdef_fn);
#define decl_cdef_dir_fn(name) \
int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var)
int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX)
typedef decl_cdef_dir_fn(*cdef_dir_fn);
typedef struct Dav1dCdefDSPContext {
......@@ -65,10 +65,7 @@ typedef struct Dav1dCdefDSPContext {
cdef_fn fb[3 /* 444/luma, 422, 420 */];
} Dav1dCdefDSPContext;
void dav1d_cdef_dsp_init_8bpc(Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_10bpc(Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_x86_8bpc(Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_x86_10bpc(Dav1dCdefDSPContext *c);
bitfn_decls(void dav1d_cdef_dsp_init, Dav1dCdefDSPContext *c);
bitfn_decls(void dav1d_cdef_dsp_init_x86, Dav1dCdefDSPContext *c);
#endif /* __DAV1D_SRC_CDEF_H__ */
......@@ -83,12 +83,13 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
const Av1Filter *const lflvl,
const int by_start, const int by_end)
{
const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
const Dav1dDSPContext *const dsp = f->dsp;
enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
pixel *ptrs[3] = { p[0], p[1], p[2] };
const int sbsz = 16;
const int sb64w = f->sb128w << 1;
const int damping = f->frame_hdr->cdef.damping + BITDEPTH - 8;
const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
......@@ -156,17 +157,17 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
}
// the actual filter
const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8);
const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
int y_sec_lvl = y_lvl & 3;
y_sec_lvl += y_sec_lvl == 3;
y_sec_lvl <<= BITDEPTH - 8;
const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8);
y_sec_lvl <<= bitdepth_min_8;
const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
int uv_sec_lvl = uv_lvl & 3;
uv_sec_lvl += uv_sec_lvl == 3;
uv_sec_lvl <<= BITDEPTH - 8;
uv_sec_lvl <<= bitdepth_min_8;
unsigned variance;
const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
&variance);
&variance HIGHBD_CALL_SUFFIX);
if (y_lvl) {
dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) {
......@@ -175,7 +176,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
},
adjust_strength(y_pri_lvl, variance),
y_sec_lvl, y_pri_lvl ? dir : 0,
damping, edges);
damping, edges HIGHBD_CALL_SUFFIX);
}
if (uv_lvl && has_chroma) {
const int uvdir =
......@@ -190,7 +191,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
},
uv_pri_lvl, uv_sec_lvl,
uv_pri_lvl ? uvdir : 0,
damping - 1, edges);
damping - 1, edges HIGHBD_CALL_SUFFIX);
}
}
......
......@@ -97,7 +97,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel (*left)[2], /*const*/ pixel *const top[2],
const int w, const int h, const int pri_strength,
const int sec_strength, const int dir,
const int damping, const enum CdefEdgeFlags edges)
const int damping, const enum CdefEdgeFlags edges
HIGHBD_DECL_SUFFIX)
{
static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
{ -1 * 12 + 1, -2 * 12 + 2 },
......@@ -115,7 +116,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride,
assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> bitdepth_min_8) & 1];
padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
......@@ -170,10 +172,11 @@ static void cdef_filter_block_##w##x##h##_c(pixel *const dst, \
const int sec_strength, \
const int dir, \
const int damping, \
const enum CdefEdgeFlags edges) \
const enum CdefEdgeFlags edges \
HIGHBD_DECL_SUFFIX) \
{ \
cdef_filter_block_c(dst, stride, left, top, w, h, pri_strength, sec_strength, \
dir, damping, edges); \
dir, damping, edges HIGHBD_TAIL_SUFFIX); \
}
cdef_fn(4, 4);
......@@ -181,15 +184,16 @@ cdef_fn(4, 8);
cdef_fn(8, 8);
static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
unsigned *const var)
unsigned *const var HIGHBD_DECL_SUFFIX)
{
const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
int partial_sum_hv[2][8] = { { 0 } };
int partial_sum_diag[2][15] = { { 0 } };
int partial_sum_alt[4][11] = { { 0 } };
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
const int px = (img[x] >> (BITDEPTH - 8)) - 128;
const int px = (img[x] >> bitdepth_min_8) - 128;
partial_sum_diag[0][ y + x ] += px;
partial_sum_alt [0][ y + (x >> 1)] += px;
......
......@@ -3013,7 +3013,6 @@ int dav1d_submit_frame(Dav1dContext *const c) {
switch (bpc) {
#define assign_bitdepth_case(bd) \
case bd: \
dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \
......@@ -3022,10 +3021,13 @@ int dav1d_submit_frame(Dav1dContext *const c) {
dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
break
#if CONFIG_8BPC
assign_bitdepth_case(8);
case 8:
assign_bitdepth_case(8);
#endif
#if CONFIG_10BPC
assign_bitdepth_case(10);
#if CONFIG_16BPC
case 10:
case 12:
assign_bitdepth_case(16);
#endif
#undef assign_bitdepth_case
default:
......@@ -3047,7 +3049,7 @@ int dav1d_submit_frame(Dav1dContext *const c) {
assign_bitdepth_case(8);
#endif
} else {
#if CONFIG_10BPC
#if CONFIG_16BPC
assign_bitdepth_case(16);
#endif
}
......@@ -3168,6 +3170,7 @@ int dav1d_submit_frame(Dav1dContext *const c) {
f->sb_step = 16 << f->seq_hdr->sb128;
f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
f->b4_stride = (f->bw + 31) & ~31;
f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
// ref_mvs
if ((f->frame_hdr->frame_type & 1) || f->frame_hdr->allow_intrabc) {
......
......@@ -160,5 +160,70 @@ const uint16_t dav1d_dq_tbl[][QINDEX_RANGE][2] = {
{ 3586, 5916, }, { 3702, 6032, }, { 3823, 6148, }, { 3953, 6268, },
{ 4089, 6388, }, { 4236, 6512, }, { 4394, 6640, }, { 4559, 6768, },
{ 4737, 6900, }, { 4929, 7036, }, { 5130, 7172, }, { 5347, 7312, },
}, {
{ 4, 4 }, { 12, 13 }, { 18, 19 }, { 25, 27 },
{ 33, 35 }, { 41, 44 }, { 50, 54 }, { 60, 64 },
{ 70, 75 }, { 80, 87 }, { 91, 99 }, { 103, 112 },
{ 115, 126 }, { 127, 139 }, { 140, 154 }, { 153, 168 },
{ 166, 183 }, { 180, 199 }, { 194, 214 }, { 208, 230 },
{ 222, 247 }, { 237, 263 }, { 251, 280 }, { 266, 297 },
{ 281, 314 }, { 296, 331 }, { 312, 349 }, { 327, 366 },
{ 343, 384 }, { 358, 402 }, { 374, 420 }, { 390, 438 },
{ 405, 456 }, { 421, 475 }, { 437, 493 }, { 453, 511 },
{ 469, 530 }, { 484, 548 }, { 500, 567 }, { 516, 586 },
{ 532, 604 }, { 548, 623 }, { 564, 642 }, { 580, 660 },
{ 596, 679 }, { 611, 698 }, { 627, 716 }, { 643, 735 },
{ 659, 753 }, { 674, 772 }, { 690, 791 }, { 706, 809 },
{ 721, 828 }, { 737, 846 }, { 752, 865 }, { 768, 884 },
{ 783, 902 }, { 798, 920 }, { 814, 939 }, { 829, 957 },
{ 844, 976 }, { 859, 994 }, { 874, 1012 }, { 889, 1030 },
{ 904, 1049 }, { 919, 1067 }, { 934, 1085 }, { 949, 1103 },
{ 964, 1121 }, { 978, 1139 }, { 993, 1157 }, { 1008, 1175 },
{ 1022, 1193 }, { 1037, 1211 }, { 1051, 1229 }, { 1065, 1246 },
{ 1080, 1264 }, { 1094, 1282 }, { 1108, 1299 }, { 1122, 1317 },
{ 1136, 1335 }, { 1151, 1352 }, { 1165, 1370 }, { 1179, 1387 },
{ 1192, 1405 }, { 1206, 1422 }, { 1220, 1440 }, { 1234, 1457 },
{ 1248, 1474 }, { 1261, 1491 }, { 1275, 1509 }, { 1288, 1526 },
{ 1302, 1543 }, { 1315, 1560 }, { 1329, 1577 }, { 1342, 1595 },
{ 1368, 1627 }, { 1393, 1660 }, { 1419, 1693 }, { 1444, 1725 },
{ 1469, 1758 }, { 1494, 1791 }, { 1519, 1824 }, { 1544, 1856 },
{ 1569, 1889 }, { 1594, 1922 }, { 1618, 1954 }, { 1643, 1987 },
{ 1668, 2020 }, { 1692, 2052 }, { 1717, 2085 }, { 1741, 2118 },
{ 1765, 2150 }, { 1789, 2183 }, { 1814, 2216 }, { 1838, 2248 },
{ 1862, 2281 }, { 1885, 2313 }, { 1909, 2346 }, { 1933, 2378 },
{ 1957, 2411 }, { 1992, 2459 }, { 2027, 2508 }, { 2061, 2556 },
{ 2096, 2605 }, { 2130, 2653 }, { 2165, 2701 }, { 2199, 2750 },
{ 2233, 2798 }, { 2267, 2847 }, { 2300, 2895 }, { 2334, 2943 },
{ 2367, 2992 }, { 2400, 3040 }, { 2434, 3088 }, { 2467, 3137 },
{ 2499, 3185 }, { 2532, 3234 }, { 2575, 3298 }, { 2618, 3362 },
{ 2661, 3426 }, { 2704, 3491 }, { 2746, 3555 }, { 2788, 3619 },
{ 2830, 3684 }, { 2872, 3748 }, { 2913, 3812 }, { 2954, 3876 },
{ 2995, 3941 }, { 3036, 4005 }, { 3076, 4069 }, { 3127, 4149 },
{ 3177, 4230 }, { 3226, 4310 }, { 3275, 4390 }, { 3324, 4470 },
{ 3373, 4550 }, { 3421, 4631 }, { 3469, 4711 }, { 3517, 4791 },
{ 3565, 4871 }, { 3621, 4967 }, { 3677, 5064 }, { 3733, 5160 },
{ 3788, 5256 }, { 3843, 5352 }, { 3897, 5448 }, { 3951, 5544 },
{ 4005, 5641 }, { 4058, 5737 }, { 4119, 5849 }, { 4181, 5961 },
{ 4241, 6073 }, { 4301, 6185 }, { 4361, 6297 }, { 4420, 6410 },
{ 4479, 6522 }, { 4546, 6650 }, { 4612, 6778 }, { 4677, 6906 },
{ 4742, 7034 }, { 4807, 7162 }, { 4871, 7290 }, { 4942, 7435 },
{ 5013, 7579 }, { 5083, 7723 }, { 5153, 7867 }, { 5222, 8011 },
{ 5291, 8155 }, { 5367, 8315 }, { 5442, 8475 }, { 5517, 8635 },
{ 5591, 8795 }, { 5665, 8956 }, { 5745, 9132 }, { 5825, 9308 },
{ 5905, 9484 }, { 5984, 9660 }, { 6063, 9836 }, { 6149, 10028 },
{ 6234, 10220 }, { 6319, 10412 }, { 6404, 10604 }, { 6495, 10812 },
{ 6587, 11020 }, { 6678, 11228 }, { 6769, 11437 }, { 6867, 11661 },
{ 6966, 11885 }, { 7064, 12109 }, { 7163, 12333 }, { 7269, 12573 },
{ 7376, 12813 }, { 7483, 13053 }, { 7599, 13309 }, { 7715, 13565 },
{ 7832, 13821 }, { 7958, 14093 }, { 8085, 14365 }, { 8214, 14637 },
{ 8352, 14925 }, { 8492, 15213 }, { 8635, 15502 }, { 8788, 15806 },
{ 8945, 16110 }, { 9104, 16414 }, { 9275, 16734 }, { 9450, 17054 },
{ 9639, 17390 }, { 9832, 17726 }, { 10031, 18062 }, { 10245, 18414 },
{ 10465, 18766 }, { 10702, 19134 }, { 10946, 19502 }, { 11210, 19886 },
{ 11482, 20270 }, { 11776, 20670 }, { 12081, 21070 }, { 12409, 21486 },
{ 12750, 21902 }, { 13118, 22334 }, { 13501, 22766 }, { 13913, 23214 },
{ 14343, 23662 }, { 14807, 24126 }, { 15290, 24590 }, { 15812, 25070 },
{ 16356, 25551 }, { 16943, 26047 }, { 17575, 26559 }, { 18237, 27071 },
{ 18949, 27599 }, { 19718, 28143 }, { 20521, 28687 }, { 21387, 29247 },
}
};
......@@ -30,10 +30,7 @@
#include "dav1d/dav1d.h"
void dav1d_apply_grain_8bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
void dav1d_apply_grain_10bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
bitfn_decls(void dav1d_apply_grain, Dav1dPicture *const out,
const Dav1dPicture *const in);
#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */
This diff is collapsed.
......@@ -176,6 +176,7 @@ struct Dav1dFrameContext {
int a_sz /* w*tile_rows */;
AV1_COMMON *libaom_cm; // FIXME
uint8_t jnt_weights[7][7];
int bitdepth_max;
struct {
struct thread_data td;
......
......@@ -43,7 +43,8 @@
*/
#define decl_angular_ipred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
int width, int height, int angle, int max_width, int max_height)
int width, int height, int angle, int max_width, int max_height \
HIGHBD_DECL_SUFFIX)
typedef decl_angular_ipred_fn(*angular_ipred_fn);
/*
......@@ -63,7 +64,8 @@ typedef decl_cfl_ac_fn(*cfl_ac_fn);
*/
#define decl_cfl_pred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
int width, int height, const int16_t *ac, int alpha)
int width, int height, const int16_t *ac, int alpha \
HIGHBD_DECL_SUFFIX)
typedef decl_cfl_pred_fn(*cfl_pred_fn);
/*
......@@ -86,10 +88,7 @@ typedef struct Dav1dIntraPredDSPContext {
pal_pred_fn pal_pred;
} Dav1dIntraPredDSPContext;
void dav1d_intra_pred_dsp_init_8bpc(Dav1dIntraPredDSPContext *c);
void dav1d_intra_pred_dsp_init_10bpc(Dav1dIntraPredDSPContext *c);
void dav1d_intra_pred_dsp_init_x86_8bpc(Dav1dIntraPredDSPContext *c);
void dav1d_intra_pred_dsp_init_x86_10bpc(Dav1dIntraPredDSPContext *c);
bitfn_decls(void dav1d_intra_pred_dsp_init, Dav1dIntraPredDSPContext *c);
bitfn_decls(void dav1d_intra_pred_dsp_init_x86, Dav1dIntraPredDSPContext *c);
#endif /* __DAV1D_SRC_IPRED_H__ */
......@@ -81,7 +81,8 @@ enum IntraPredMode
const pixel *dst, ptrdiff_t stride,
const pixel *prefilter_toplevel_sb_edge,
enum IntraPredMode mode, int *angle,
int tw, int th, pixel *topleft_out);
int tw, int th, pixel *topleft_out
HIGHBD_DECL_SUFFIX);
// These flags are OR'd with the angle argument into intra predictors.
// ANGLE_USE_EDGE_FILTER_FLAG signals that edges should be convolved
......
......@@ -83,8 +83,9 @@ bytefn(dav1d_prepare_intra_edges)(const int x, const int have_left,
const pixel *prefilter_toplevel_sb_edge,
enum IntraPredMode mode, int *const angle,
const int tw, const int th,
pixel *const topleft_out)
pixel *const topleft_out HIGHBD_DECL_SUFFIX)
{
const int bitdepth = bitdepth_from_max(bitdepth_max);
assert(y < h && x < w);
switch (mode) {
......@@ -144,7 +145,7 @@ bytefn(dav1d_prepare_intra_edges)(const int x, const int have_left,
if (px_have < sz)
pixel_set(left, left[sz - px_have], sz - px_have);
} else {
pixel_set(left, have_top ? *dst_top : ((1 << BITDEPTH) >> 1) + 1, sz);
pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
}
if (av1_intra_prediction_edges[mode].needs_bottomleft) {
......@@ -174,7 +175,7 @@ bytefn(dav1d_prepare_intra_edges)(const int x, const int have_left,
if (px_have < sz)
pixel_set(top + px_have, top[px_have - 1], sz - px_have);
} else {
pixel_set(top, have_left ? dst[-1] : ((1 << BITDEPTH) >> 1) - 1, sz);
pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
}
if (av1_intra_prediction_edges[mode].needs_topright) {
......@@ -198,7 +199,7 @@ bytefn(dav1d_prepare_intra_edges)(const int x, const int have_left,
if (have_left) {
*topleft_out = have_top ? dst_top[-1] : dst[-1];
} else {
*topleft_out = have_top ? *dst_top : (1 << BITDEPTH) >> 1;
*topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
}
if (mode == Z2_PRED && tw + th >= 6)
*topleft_out = (topleft_out[-1] * 5 + topleft_out[0] * 6 +
......
This diff is collapsed.
......@@ -35,17 +35,15 @@
#include "src/levels.h"
#define decl_itx_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob)
void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob \
HIGHBD_DECL_SUFFIX)
typedef decl_itx_fn(*itxfm_fn);
typedef struct Dav1dInvTxfmDSPContext {
itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
} Dav1dInvTxfmDSPContext;
void dav1d_itx_dsp_init_8bpc(Dav1dInvTxfmDSPContext *c);
void dav1d_itx_dsp_init_10bpc(Dav1dInvTxfmDSPContext *c);
void dav1d_itx_dsp_init_x86_8bpc(Dav1dInvTxfmDSPContext *c);
void dav1d_itx_dsp_init_x86_10bpc(Dav1dInvTxfmDSPContext *c);
bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c);
bitfn_decls(void dav1d_itx_dsp_init_x86, Dav1dInvTxfmDSPContext *c);
#endif /* __DAV1D_SRC_ITX_H__ */
......@@ -46,7 +46,8 @@ static void NOINLINE
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob,
const int w, const int h, const int shift1, const int shift2,
const itx_1d_fn first_1d_fn, const itx_1d_fn second_1d_fn)
const itx_1d_fn first_1d_fn, const itx_1d_fn second_1d_fn
HIGHBD_DECL_SUFFIX)
{
int i, j;
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
......@@ -54,8 +55,9 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int is_rect2 = w * 2 == h || h * 2 == w;
const int row_clip_max = (1 << (BITDEPTH + 8 - 1)) - 1;
const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
const int bitdepth = bitdepth_from_max(bitdepth_max);
const int row_clip_max = (1 << (bitdepth + 8 - 1)) - 1;
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
......@@ -93,10 +95,12 @@ static void \
inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
const ptrdiff_t stride, \
coef *const coeff, \
const int eob) \
const int eob \
HIGHBD_DECL_SUFFIX) \
{ \
inv_txfm_add_c(dst, stride, coeff, eob, w, h, shift1, shift2, \
inv_##type1##w##_1d, inv_##type2##h##_1d); \
inv_##type1##w##_1d, inv_##type2##h##_1d \
HIGHBD_TAIL_SUFFIX); \
}
#define inv_txfm_fn64(w, h, shift1, shift2) \
......@@ -147,9 +151,11 @@ inv_txfm_fn64(64, 32, 1, 4)
inv_txfm_fn64(64, 64, 2, 4)
static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob)
coef *const coeff, const int eob
HIGHBD_DECL_SUFFIX)
{
const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
const int bitdepth = bitdepth_from_max(bitdepth_max);
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
coef tmp[4 * 4], out[4];
......
......@@ -66,7 +66,7 @@ static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
hmask[3] = 0;
dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][0], b4_stride,
&f->lf.lim_lut, endy4 - starty4);
&f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
}
}
......@@ -96,7 +96,7 @@ static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
};
dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
(const uint8_t(*)[4]) &lvl[0][1], b4_stride,
&f->lf.lim_lut, w);
&f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
}
}
......@@ -130,10 +130,10 @@ static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
hmask[2] = 0;
dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][2], b4_stride,
&f->lf.lim_lut, endy4 - starty4);
&f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][3], b4_stride,
&f->lf.lim_lut, endy4 - starty4);
&f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
}
}
......@@ -164,10 +164,10 @@ static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
};
dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
(const uint8_t(*)[4]) &lvl[0][2], b4_stride,
&f->lf.lim_lut, w);
&f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
(const uint8_t(*)[4]) &lvl[0][3], b4_stride,
&f->lf.lim_lut, w);
&f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
}
}
......
......@@ -264,9 +264,10 @@ static int output_image(Dav1dContext *const c, Dav1dPicture *const out,
dav1d_apply_grain_8bpc(out, in);
break;
#endif
#if CONFIG_10BPC
#if CONFIG_16BPC
case 10:
dav1d_apply_grain_10bpc(out, in);
case 12:
dav1d_apply_grain_16bpc(out, in);
break;
#endif
default:
......
......@@ -39,7 +39,7 @@
#define decl_loopfilter_sb_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const uint32_t *mask, \
const uint8_t (*lvl)[4], ptrdiff_t lvl_stride, \
const Av1FilterLUT *lut, int w)
const Av1FilterLUT *lut, int w HIGHBD_DECL_SUFFIX)
typedef decl_loopfilter_sb_fn(*loopfilter_sb_fn);
typedef struct Dav1dLoopFilterDSPContext {
......@@ -52,10 +52,7 @@ typedef struct Dav1dLoopFilterDSPContext {
loopfilter_sb_fn loop_filter_sb[2][2];
} Dav1dLoopFilterDSPContext;
void dav1d_loop_filter_dsp_init_8bpc(Dav1dLoopFilterDSPContext *c);
void dav1d_loop_filter_dsp_init_10bpc(Dav1dLoopFilterDSPContext *c);
void dav1d_loop_filter_dsp_init_x86_8bpc(Dav1dLoopFilterDSPContext *c);
void dav1d_loop_filter_dsp_init_x86_10bpc(Dav1dLoopFilterDSPContext *c);
bitfn_decls(void dav1d_loop_filter_dsp_init, Dav1dLoopFilterDSPContext *c);
bitfn_decls(void dav1d_loop_filter_dsp_init_x86, Dav1dLoopFilterDSPContext *c);
#endif /* __DAV1D_SRC_LOOPFILTER_H__ */
......@@ -36,12 +36,14 @@
static NOINLINE void
loop_filter(pixel *dst, int E, int I, int H,
const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd)
const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd
HIGHBD_DECL_SUFFIX)
{
const int F = 1 << (BITDEPTH - 8);
E <<= BITDEPTH - 8;
I <<= BITDEPTH - 8;
H <<= BITDEPTH - 8;
const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
const int F = 1 << bitdepth_min_8;
E <<= bitdepth_min_8;
I <<= bitdepth_min_8;
H <<= bitdepth_min_8;
for (int i = 0; i < 4; i++, dst += stridea) {
int p6, p5, p4, p3, p2;
......@@ -128,23 +130,23 @@ loop_filter(pixel *dst, int E, int I, int H,
} else {
const int hev = abs(p1 - p0) > H || abs(q1 - q0) > H;
#define iclip_diff(v) iclip(v, -128 * (1 << (BITDEPTH - 8)), \
128 * (1 << (BITDEPTH - 8)) - 1)
#define iclip_diff(v) iclip(v, -128 * (1 << bitdepth_min_8), \
128 * (1 << bitdepth_min_8) - 1)
if (hev) {
int f = iclip_diff(p1 - q1), f1, f2;
f = iclip_diff(3 * (q0 - p0) + f);
f1 = imin(f + 4, (128 << (BITDEPTH - 8)) - 1) >> 3;
f2 = imin(f + 3, (128 << (BITDEPTH - 8)) - 1) >> 3;
f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
dst[strideb * -1] = iclip_pixel(p0 + f2);
dst[strideb * +0] = iclip_pixel(q0 - f1);
} else {
int f = iclip_diff(3 * (q0 - p0)), f1, f2;
f1 = imin(f + 4, (128 << (BITDEPTH - 8)) - 1) >> 3;
f2 = imin(f + 3, (128 << (BITDEPTH - 8)) - 1) >> 3;
f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
dst[strideb * -1] = iclip_pixel(p0 + f2);
dst[strideb * +0] = iclip_pixel(q0 - f1);
......@@ -161,7 +163,8 @@ loop_filter(pixel *dst, int E, int I, int H,
static void loop_filter_h_sb128y_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
const Av1FilterLUT *lut, const int h)
const Av1FilterLUT *lut, const int h
HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1] | vmask[2];
for (unsigned y = 1; vm & ~(y - 1);
......@@ -173,7 +176,8 @@ static void loop_filter_h_sb128y_c(pixel *dst, const ptrdiff_t stride,
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = (vmask[2] & y) ? 2 : !!(vmask[1] & y);
loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 << idx);
loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 << idx
HIGHBD_TAIL_SUFFIX);
}
}
}
......@@ -181,7 +185,8 @@ static void loop_filter_h_sb128y_c(pixel *dst, const ptrdiff_t stride,
static void loop_filter_v_sb128y_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
const Av1FilterLUT *lut, const int w)
const Av1FilterLUT *lut, const int w
HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1] | vmask[2];
for (unsigned x = 1; vm & ~(x - 1); x <<= 1, dst += 4, l++) {
......@@ -191,7 +196,8 @@ static void loop_filter_v_sb128y_c(pixel *dst, const ptrdiff_t stride,
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = (vmask[2] & x) ? 2 : !!(vmask[1] & x);
loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 << idx);
loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 << idx
HIGHBD_TAIL_SUFFIX);
}
}
}
......@@ -199,7 +205,8 @@ static void loop_filter_v_sb128y_c(pixel *dst, const ptrdiff_t stride,
static void loop_filter_h_sb128uv_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
const Av1FilterLUT *lut, const int h)
const Av1FilterLUT *lut, const int h
HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1];
for (unsigned y = 1; vm & ~(y - 1);
......@@ -211,7 +218,8 @@ static void loop_filter_h_sb128uv_c(pixel *dst, const ptrdiff_t stride,
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = !!(vmask[1] & y);
loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 + 2 * idx);
loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 + 2 * idx
HIGHBD_TAIL_SUFFIX);
}
}
}
......@@ -219,7 +227,8 @@ static void loop_filter_h_sb128uv_c(pixel *dst, const ptrdiff_t stride,
static void loop_filter_v_sb128uv_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *<