Commit c3980e39 authored by Ronald S. Bultje's avatar Ronald S. Bultje

12 bits/component support

parent 2e6c8a92
...@@ -34,6 +34,9 @@ ...@@ -34,6 +34,9 @@
#if !defined(BITDEPTH) #if !defined(BITDEPTH)
typedef void pixel; typedef void pixel;
typedef void coef; typedef void coef;
#define HIGHBD_DECL_SUFFIX /* nothing */
#define HIGHBD_CALL_SUFFIX /* nothing */
#define HIGHBD_TAIL_SUFFIX /* nothing */
#elif BITDEPTH == 8 #elif BITDEPTH == 8
typedef uint8_t pixel; typedef uint8_t pixel;
typedef int16_t coef; typedef int16_t coef;
...@@ -41,28 +44,37 @@ typedef int16_t coef; ...@@ -41,28 +44,37 @@ typedef int16_t coef;
#define pixel_set memset #define pixel_set memset
#define iclip_pixel iclip_u8 #define iclip_pixel iclip_u8
#define PIX_HEX_FMT "%02x" #define PIX_HEX_FMT "%02x"
#define bytefn(x) x##_8bpc
#define bitfn(x) x##_8bpc #define bitfn(x) x##_8bpc
#define PXSTRIDE(x) x #define PXSTRIDE(x) x
#elif BITDEPTH == 10 || BITDEPTH == 12 #define highbd_only(x)
#define HIGHBD_DECL_SUFFIX /* nothing */
#define HIGHBD_CALL_SUFFIX /* nothing */
#define HIGHBD_TAIL_SUFFIX /* nothing */
#define bitdepth_from_max(x) 8
#elif BITDEPTH == 16
typedef uint16_t pixel; typedef uint16_t pixel;
typedef int32_t coef; typedef int32_t coef;
#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1) #define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
#define iclip_pixel(x) iclip(x, 0, ((1 << BITDEPTH) - 1))
static inline void pixel_set(pixel *const dst, const int val, const int num) { static inline void pixel_set(pixel *const dst, const int val, const int num) {
for (int n = 0; n < num; n++) for (int n = 0; n < num; n++)
dst[n] = val; dst[n] = val;
} }
#define PIX_HEX_FMT "%03x" #define PIX_HEX_FMT "%03x"
#define bytefn(x) x##_16bpc #define iclip_pixel(x) iclip(x, 0, bitdepth_max)
#if BITDEPTH == 10 #define HIGHBD_DECL_SUFFIX , const int bitdepth_max
#define bitfn(x) x##_10bpc #define HIGHBD_CALL_SUFFIX , f->bitdepth_max
#else #define HIGHBD_TAIL_SUFFIX , bitdepth_max
#define bitfn(x) x##_12bpc #define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
#endif #define bitfn(x) x##_16bpc
#define PXSTRIDE(x) (x >> 1) #define PXSTRIDE(x) (x >> 1)
#define highbd_only(x) x
#else #else
#error invalid value for bitdepth #error invalid value for bitdepth
#endif #endif
#define bytefn(x) bitfn(x)
#define bitfn_decls(name, ...) \
name##_8bpc(__VA_ARGS__); \
name##_16bpc(__VA_ARGS__)
#endif /* __DAV1D_COMMON_BITDEPTH_H__ */ #endif /* __DAV1D_COMMON_BITDEPTH_H__ */
...@@ -55,7 +55,7 @@ dav1d_inc_dirs = include_directories(['.', 'include', 'include/dav1d']) ...@@ -55,7 +55,7 @@ dav1d_inc_dirs = include_directories(['.', 'include', 'include/dav1d'])
# Bitdepth option # Bitdepth option
dav1d_bitdepths = get_option('bitdepths') dav1d_bitdepths = get_option('bitdepths')
foreach bitdepth : ['8', '10'] foreach bitdepth : ['8', '16']
cdata.set10('CONFIG_@0@BPC'.format(bitdepth), dav1d_bitdepths.contains(bitdepth)) cdata.set10('CONFIG_@0@BPC'.format(bitdepth), dav1d_bitdepths.contains(bitdepth))
endforeach endforeach
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
option('bitdepths', option('bitdepths',
type: 'array', type: 'array',
choices: ['8', '10'], choices: ['8', '16'],
description: 'Enable only specified bitdepths') description: 'Enable only specified bitdepths')
option('build_asm', option('build_asm',
......
...@@ -53,11 +53,11 @@ typedef const void *const_left_pixel_row_2px; ...@@ -53,11 +53,11 @@ typedef const void *const_left_pixel_row_2px;
#define decl_cdef_fn(name) \ #define decl_cdef_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \ void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \
/*const*/ pixel *const top[2], int pri_strength, int sec_strength, \ /*const*/ pixel *const top[2], int pri_strength, int sec_strength, \
int dir, int damping, enum CdefEdgeFlags edges) int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX)
typedef decl_cdef_fn(*cdef_fn); typedef decl_cdef_fn(*cdef_fn);
#define decl_cdef_dir_fn(name) \ #define decl_cdef_dir_fn(name) \
int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var) int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX)
typedef decl_cdef_dir_fn(*cdef_dir_fn); typedef decl_cdef_dir_fn(*cdef_dir_fn);
typedef struct Dav1dCdefDSPContext { typedef struct Dav1dCdefDSPContext {
...@@ -65,10 +65,7 @@ typedef struct Dav1dCdefDSPContext { ...@@ -65,10 +65,7 @@ typedef struct Dav1dCdefDSPContext {
cdef_fn fb[3 /* 444/luma, 422, 420 */]; cdef_fn fb[3 /* 444/luma, 422, 420 */];
} Dav1dCdefDSPContext; } Dav1dCdefDSPContext;
void dav1d_cdef_dsp_init_8bpc(Dav1dCdefDSPContext *c); bitfn_decls(void dav1d_cdef_dsp_init, Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_10bpc(Dav1dCdefDSPContext *c); bitfn_decls(void dav1d_cdef_dsp_init_x86, Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_x86_8bpc(Dav1dCdefDSPContext *c);
void dav1d_cdef_dsp_init_x86_10bpc(Dav1dCdefDSPContext *c);
#endif /* __DAV1D_SRC_CDEF_H__ */ #endif /* __DAV1D_SRC_CDEF_H__ */
...@@ -83,12 +83,13 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -83,12 +83,13 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
const Av1Filter *const lflvl, const Av1Filter *const lflvl,
const int by_start, const int by_end) const int by_start, const int by_end)
{ {
const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
const Dav1dDSPContext *const dsp = f->dsp; const Dav1dDSPContext *const dsp = f->dsp;
enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0); enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
pixel *ptrs[3] = { p[0], p[1], p[2] }; pixel *ptrs[3] = { p[0], p[1], p[2] };
const int sbsz = 16; const int sbsz = 16;
const int sb64w = f->sb128w << 1; const int sb64w = f->sb128w << 1;
const int damping = f->frame_hdr->cdef.damping + BITDEPTH - 8; const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
const enum Dav1dPixelLayout layout = f->cur.p.layout; const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400; const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
...@@ -156,17 +157,17 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -156,17 +157,17 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
} }
// the actual filter // the actual filter
const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8); const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
int y_sec_lvl = y_lvl & 3; int y_sec_lvl = y_lvl & 3;
y_sec_lvl += y_sec_lvl == 3; y_sec_lvl += y_sec_lvl == 3;
y_sec_lvl <<= BITDEPTH - 8; y_sec_lvl <<= bitdepth_min_8;
const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8); const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
int uv_sec_lvl = uv_lvl & 3; int uv_sec_lvl = uv_lvl & 3;
uv_sec_lvl += uv_sec_lvl == 3; uv_sec_lvl += uv_sec_lvl == 3;
uv_sec_lvl <<= BITDEPTH - 8; uv_sec_lvl <<= bitdepth_min_8;
unsigned variance; unsigned variance;
const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0], const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
&variance); &variance HIGHBD_CALL_SUFFIX);
if (y_lvl) { if (y_lvl) {
dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) { (pixel *const [2]) {
...@@ -175,7 +176,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -175,7 +176,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
}, },
adjust_strength(y_pri_lvl, variance), adjust_strength(y_pri_lvl, variance),
y_sec_lvl, y_pri_lvl ? dir : 0, y_sec_lvl, y_pri_lvl ? dir : 0,
damping, edges); damping, edges HIGHBD_CALL_SUFFIX);
} }
if (uv_lvl && has_chroma) { if (uv_lvl && has_chroma) {
const int uvdir = const int uvdir =
...@@ -190,7 +191,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -190,7 +191,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
}, },
uv_pri_lvl, uv_sec_lvl, uv_pri_lvl, uv_sec_lvl,
uv_pri_lvl ? uvdir : 0, uv_pri_lvl ? uvdir : 0,
damping - 1, edges); damping - 1, edges HIGHBD_CALL_SUFFIX);
} }
} }
......
...@@ -97,7 +97,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -97,7 +97,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel (*left)[2], /*const*/ pixel *const top[2], const pixel (*left)[2], /*const*/ pixel *const top[2],
const int w, const int h, const int pri_strength, const int w, const int h, const int pri_strength,
const int sec_strength, const int dir, const int sec_strength, const int dir,
const int damping, const enum CdefEdgeFlags edges) const int damping, const enum CdefEdgeFlags edges
HIGHBD_DECL_SUFFIX)
{ {
static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = { static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
{ -1 * 12 + 1, -2 * 12 + 2 }, { -1 * 12 + 1, -2 * 12 + 2 },
...@@ -115,7 +116,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride, ...@@ -115,7 +116,8 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride,
assert((w == 4 || w == 8) && (h == 4 || h == 8)); assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4) uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2; uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1]; const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> bitdepth_min_8) & 1];
padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges); padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
...@@ -170,10 +172,11 @@ static void cdef_filter_block_##w##x##h##_c(pixel *const dst, \ ...@@ -170,10 +172,11 @@ static void cdef_filter_block_##w##x##h##_c(pixel *const dst, \
const int sec_strength, \ const int sec_strength, \
const int dir, \ const int dir, \
const int damping, \ const int damping, \
const enum CdefEdgeFlags edges) \ const enum CdefEdgeFlags edges \
HIGHBD_DECL_SUFFIX) \
{ \ { \
cdef_filter_block_c(dst, stride, left, top, w, h, pri_strength, sec_strength, \ cdef_filter_block_c(dst, stride, left, top, w, h, pri_strength, sec_strength, \
dir, damping, edges); \ dir, damping, edges HIGHBD_TAIL_SUFFIX); \
} }
cdef_fn(4, 4); cdef_fn(4, 4);
...@@ -181,15 +184,16 @@ cdef_fn(4, 8); ...@@ -181,15 +184,16 @@ cdef_fn(4, 8);
cdef_fn(8, 8); cdef_fn(8, 8);
static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride, static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
unsigned *const var) unsigned *const var HIGHBD_DECL_SUFFIX)
{ {
const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
int partial_sum_hv[2][8] = { { 0 } }; int partial_sum_hv[2][8] = { { 0 } };
int partial_sum_diag[2][15] = { { 0 } }; int partial_sum_diag[2][15] = { { 0 } };
int partial_sum_alt[4][11] = { { 0 } }; int partial_sum_alt[4][11] = { { 0 } };
for (int y = 0; y < 8; y++) { for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) { for (int x = 0; x < 8; x++) {
const int px = (img[x] >> (BITDEPTH - 8)) - 128; const int px = (img[x] >> bitdepth_min_8) - 128;
partial_sum_diag[0][ y + x ] += px; partial_sum_diag[0][ y + x ] += px;
partial_sum_alt [0][ y + (x >> 1)] += px; partial_sum_alt [0][ y + (x >> 1)] += px;
......
...@@ -3013,7 +3013,6 @@ int dav1d_submit_frame(Dav1dContext *const c) { ...@@ -3013,7 +3013,6 @@ int dav1d_submit_frame(Dav1dContext *const c) {
switch (bpc) { switch (bpc) {
#define assign_bitdepth_case(bd) \ #define assign_bitdepth_case(bd) \
case bd: \
dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \ dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \ dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \ dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \
...@@ -3022,10 +3021,13 @@ int dav1d_submit_frame(Dav1dContext *const c) { ...@@ -3022,10 +3021,13 @@ int dav1d_submit_frame(Dav1dContext *const c) {
dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \ dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
break break
#if CONFIG_8BPC #if CONFIG_8BPC
assign_bitdepth_case(8); case 8:
assign_bitdepth_case(8);
#endif #endif
#if CONFIG_10BPC #if CONFIG_16BPC
assign_bitdepth_case(10); case 10:
case 12:
assign_bitdepth_case(16);
#endif #endif
#undef assign_bitdepth_case #undef assign_bitdepth_case
default: default:
...@@ -3047,7 +3049,7 @@ int dav1d_submit_frame(Dav1dContext *const c) { ...@@ -3047,7 +3049,7 @@ int dav1d_submit_frame(Dav1dContext *const c) {
assign_bitdepth_case(8); assign_bitdepth_case(8);
#endif #endif
} else { } else {
#if CONFIG_10BPC #if CONFIG_16BPC
assign_bitdepth_case(16); assign_bitdepth_case(16);
#endif #endif
} }
...@@ -3168,6 +3170,7 @@ int dav1d_submit_frame(Dav1dContext *const c) { ...@@ -3168,6 +3170,7 @@ int dav1d_submit_frame(Dav1dContext *const c) {
f->sb_step = 16 << f->seq_hdr->sb128; f->sb_step = 16 << f->seq_hdr->sb128;
f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift; f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
f->b4_stride = (f->bw + 31) & ~31; f->b4_stride = (f->bw + 31) & ~31;
f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
// ref_mvs // ref_mvs
if ((f->frame_hdr->frame_type & 1) || f->frame_hdr->allow_intrabc) { if ((f->frame_hdr->frame_type & 1) || f->frame_hdr->allow_intrabc) {
......
...@@ -160,5 +160,70 @@ const uint16_t dav1d_dq_tbl[][QINDEX_RANGE][2] = { ...@@ -160,5 +160,70 @@ const uint16_t dav1d_dq_tbl[][QINDEX_RANGE][2] = {
{ 3586, 5916, }, { 3702, 6032, }, { 3823, 6148, }, { 3953, 6268, }, { 3586, 5916, }, { 3702, 6032, }, { 3823, 6148, }, { 3953, 6268, },
{ 4089, 6388, }, { 4236, 6512, }, { 4394, 6640, }, { 4559, 6768, }, { 4089, 6388, }, { 4236, 6512, }, { 4394, 6640, }, { 4559, 6768, },
{ 4737, 6900, }, { 4929, 7036, }, { 5130, 7172, }, { 5347, 7312, }, { 4737, 6900, }, { 4929, 7036, }, { 5130, 7172, }, { 5347, 7312, },
}, {
{ 4, 4 }, { 12, 13 }, { 18, 19 }, { 25, 27 },
{ 33, 35 }, { 41, 44 }, { 50, 54 }, { 60, 64 },
{ 70, 75 }, { 80, 87 }, { 91, 99 }, { 103, 112 },
{ 115, 126 }, { 127, 139 }, { 140, 154 }, { 153, 168 },
{ 166, 183 }, { 180, 199 }, { 194, 214 }, { 208, 230 },
{ 222, 247 }, { 237, 263 }, { 251, 280 }, { 266, 297 },
{ 281, 314 }, { 296, 331 }, { 312, 349 }, { 327, 366 },
{ 343, 384 }, { 358, 402 }, { 374, 420 }, { 390, 438 },
{ 405, 456 }, { 421, 475 }, { 437, 493 }, { 453, 511 },
{ 469, 530 }, { 484, 548 }, { 500, 567 }, { 516, 586 },
{ 532, 604 }, { 548, 623 }, { 564, 642 }, { 580, 660 },
{ 596, 679 }, { 611, 698 }, { 627, 716 }, { 643, 735 },
{ 659, 753 }, { 674, 772 }, { 690, 791 }, { 706, 809 },
{ 721, 828 }, { 737, 846 }, { 752, 865 }, { 768, 884 },
{ 783, 902 }, { 798, 920 }, { 814, 939 }, { 829, 957 },
{ 844, 976 }, { 859, 994 }, { 874, 1012 }, { 889, 1030 },
{ 904, 1049 }, { 919, 1067 }, { 934, 1085 }, { 949, 1103 },
{ 964, 1121 }, { 978, 1139 }, { 993, 1157 }, { 1008, 1175 },
{ 1022, 1193 }, { 1037, 1211 }, { 1051, 1229 }, { 1065, 1246 },
{ 1080, 1264 }, { 1094, 1282 }, { 1108, 1299 }, { 1122, 1317 },
{ 1136, 1335 }, { 1151, 1352 }, { 1165, 1370 }, { 1179, 1387 },
{ 1192, 1405 }, { 1206, 1422 }, { 1220, 1440 }, { 1234, 1457 },
{ 1248, 1474 }, { 1261, 1491 }, { 1275, 1509 }, { 1288, 1526 },
{ 1302, 1543 }, { 1315, 1560 }, { 1329, 1577 }, { 1342, 1595 },
{ 1368, 1627 }, { 1393, 1660 }, { 1419, 1693 }, { 1444, 1725 },
{ 1469, 1758 }, { 1494, 1791 }, { 1519, 1824 }, { 1544, 1856 },
{ 1569, 1889 }, { 1594, 1922 }, { 1618, 1954 }, { 1643, 1987 },
{ 1668, 2020 }, { 1692, 2052 }, { 1717, 2085 }, { 1741, 2118 },
{ 1765, 2150 }, { 1789, 2183 }, { 1814, 2216 }, { 1838, 2248 },
{ 1862, 2281 }, { 1885, 2313 }, { 1909, 2346 }, { 1933, 2378 },
{ 1957, 2411 }, { 1992, 2459 }, { 2027, 2508 }, { 2061, 2556 },
{ 2096, 2605 }, { 2130, 2653 }, { 2165, 2701 }, { 2199, 2750 },
{ 2233, 2798 }, { 2267, 2847 }, { 2300, 2895 }, { 2334, 2943 },
{ 2367, 2992 }, { 2400, 3040 }, { 2434, 3088 }, { 2467, 3137 },
{ 2499, 3185 }, { 2532, 3234 }, { 2575, 3298 }, { 2618, 3362 },
{ 2661, 3426 }, { 2704, 3491 }, { 2746, 3555 }, { 2788, 3619 },
{ 2830, 3684 }, { 2872, 3748 }, { 2913, 3812 }, { 2954, 3876 },
{ 2995, 3941 }, { 3036, 4005 }, { 3076, 4069 }, { 3127, 4149 },
{ 3177, 4230 }, { 3226, 4310 }, { 3275, 4390 }, { 3324, 4470 },
{ 3373, 4550 }, { 3421, 4631 }, { 3469, 4711 }, { 3517, 4791 },
{ 3565, 4871 }, { 3621, 4967 }, { 3677, 5064 }, { 3733, 5160 },
{ 3788, 5256 }, { 3843, 5352 }, { 3897, 5448 }, { 3951, 5544 },
{ 4005, 5641 }, { 4058, 5737 }, { 4119, 5849 }, { 4181, 5961 },
{ 4241, 6073 }, { 4301, 6185 }, { 4361, 6297 }, { 4420, 6410 },
{ 4479, 6522 }, { 4546, 6650 }, { 4612, 6778 }, { 4677, 6906 },
{ 4742, 7034 }, { 4807, 7162 }, { 4871, 7290 }, { 4942, 7435 },
{ 5013, 7579 }, { 5083, 7723 }, { 5153, 7867 }, { 5222, 8011 },
{ 5291, 8155 }, { 5367, 8315 }, { 5442, 8475 }, { 5517, 8635 },
{ 5591, 8795 }, { 5665, 8956 }, { 5745, 9132 }, { 5825, 9308 },
{ 5905, 9484 }, { 5984, 9660 }, { 6063, 9836 }, { 6149, 10028 },
{ 6234, 10220 }, { 6319, 10412 }, { 6404, 10604 }, { 6495, 10812 },
{ 6587, 11020 }, { 6678, 11228 }, { 6769, 11437 }, { 6867, 11661 },
{ 6966, 11885 }, { 7064, 12109 }, { 7163, 12333 }, { 7269, 12573 },
{ 7376, 12813 }, { 7483, 13053 }, { 7599, 13309 }, { 7715, 13565 },
{ 7832, 13821 }, { 7958, 14093 }, { 8085, 14365 }, { 8214, 14637 },
{ 8352, 14925 }, { 8492, 15213 }, { 8635, 15502 }, { 8788, 15806 },
{ 8945, 16110 }, { 9104, 16414 }, { 9275, 16734 }, { 9450, 17054 },
{ 9639, 17390 }, { 9832, 17726 }, { 10031, 18062 }, { 10245, 18414 },
{ 10465, 18766 }, { 10702, 19134 }, { 10946, 19502 }, { 11210, 19886 },
{ 11482, 20270 }, { 11776, 20670 }, { 12081, 21070 }, { 12409, 21486 },
{ 12750, 21902 }, { 13118, 22334 }, { 13501, 22766 }, { 13913, 23214 },
{ 14343, 23662 }, { 14807, 24126 }, { 15290, 24590 }, { 15812, 25070 },
{ 16356, 25551 }, { 16943, 26047 }, { 17575, 26559 }, { 18237, 27071 },
{ 18949, 27599 }, { 19718, 28143 }, { 20521, 28687 }, { 21387, 29247 },
} }
}; };
...@@ -30,10 +30,7 @@ ...@@ -30,10 +30,7 @@
#include "dav1d/dav1d.h" #include "dav1d/dav1d.h"
void dav1d_apply_grain_8bpc(Dav1dPicture *const out, bitfn_decls(void dav1d_apply_grain, Dav1dPicture *const out,
const Dav1dPicture *const in); const Dav1dPicture *const in);
void dav1d_apply_grain_10bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */ #endif /* __DAV1D_SRC_FILM_GRAIN_H__ */
...@@ -51,7 +51,11 @@ enum { ...@@ -51,7 +51,11 @@ enum {
SUB_GRAIN_HEIGHT = 38, SUB_GRAIN_HEIGHT = 38,
SUB_GRAIN_OFFSET = 6, SUB_GRAIN_OFFSET = 6,
BLOCK_SIZE = 32, BLOCK_SIZE = 32,
SCALING_SIZE = 1 << BITDEPTH, #if BITDEPTH == 8
SCALING_SIZE = 256
#else
SCALING_SIZE = 4096
#endif
}; };
static inline int get_random_number(const int bits, unsigned *state) { static inline int get_random_number(const int bits, unsigned *state) {
...@@ -66,18 +70,14 @@ static inline int round2(const int x, const int shift) { ...@@ -66,18 +70,14 @@ static inline int round2(const int x, const int shift) {
return (x + ((1 << shift) >> 1)) >> shift; return (x + ((1 << shift) >> 1)) >> shift;
} }
enum {
GRAIN_CENTER = 128 << (BITDEPTH - 8),
GRAIN_MIN = -GRAIN_CENTER,
GRAIN_MAX = (256 << (BITDEPTH - 8)) - 1 - GRAIN_CENTER,
};
static void generate_grain_y(const Dav1dPicture *const in, static void generate_grain_y(const Dav1dPicture *const in,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH]) entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])
{ {
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data; const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed; unsigned seed = data->seed;
const int shift = 12 - BITDEPTH + data->grain_scale_shift; const int shift = 12 - in->p.bpc + data->grain_scale_shift;
const int grain_ctr = 128 << (in->p.bpc - 8);
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
for (int y = 0; y < GRAIN_HEIGHT; y++) { for (int y = 0; y < GRAIN_HEIGHT; y++) {
for (int x = 0; x < GRAIN_WIDTH; x++) { for (int x = 0; x < GRAIN_WIDTH; x++) {
...@@ -102,7 +102,7 @@ static void generate_grain_y(const Dav1dPicture *const in, ...@@ -102,7 +102,7 @@ static void generate_grain_y(const Dav1dPicture *const in,
} }
int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX); buf[y][x] = iclip(grain, grain_min, grain_max);
} }
} }
} }
...@@ -113,7 +113,9 @@ static void generate_grain_uv(const Dav1dPicture *const in, int uv, ...@@ -113,7 +113,9 @@ static void generate_grain_uv(const Dav1dPicture *const in, int uv,
{ {
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data; const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524); unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
const int shift = 12 - BITDEPTH + data->grain_scale_shift; const int shift = 12 - in->p.bpc + data->grain_scale_shift;
const int grain_ctr = 128 << (in->p.bpc - 8);
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444; const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
...@@ -160,15 +162,17 @@ static void generate_grain_uv(const Dav1dPicture *const in, int uv, ...@@ -160,15 +162,17 @@ static void generate_grain_uv(const Dav1dPicture *const in, int uv,
} }
const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift); const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX); buf[y][x] = iclip(grain, grain_min, grain_max);
} }
} }
} }
static void generate_scaling(const uint8_t points[][2], int num, static void generate_scaling(const int bitdepth,
const uint8_t points[][2], int num,
uint8_t scaling[SCALING_SIZE]) uint8_t scaling[SCALING_SIZE])
{ {
const int shift_x = BITDEPTH - 8; const int shift_x = bitdepth - 8;
const int scaling_size = 1 << bitdepth;
// Fill up the preceding entries with the initial value // Fill up the preceding entries with the initial value
for (int i = 0; i < points[0][0] << shift_x; i++) for (int i = 0; i < points[0][0] << shift_x; i++)
...@@ -190,7 +194,7 @@ static void generate_scaling(const uint8_t points[][2], int num, ...@@ -190,7 +194,7 @@ static void generate_scaling(const uint8_t points[][2], int num,
} }
// Fill up the remaining entries with the final value // Fill up the remaining entries with the final value
for (int i = points[num - 1][0] << shift_x; i < SCALING_SIZE; i++) for (int i = points[num - 1][0] << shift_x; i < scaling_size; i++)
scaling[i] = points[num - 1][1]; scaling[i] = points[num - 1][1];
} }
...@@ -213,14 +217,17 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in ...@@ -213,14 +217,17 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in
{ {
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0); const int rows = 1 + (data->overlap_flag && row_num > 0);
const int bitdepth_min_8 = in->p.bpc - 8;
const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
int min_value, max_value; int min_value, max_value;
if (data->clip_to_restricted_range) { if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8); min_value = 16 << bitdepth_min_8;
max_value = 235 << (BITDEPTH - 8); max_value = 235 << bitdepth_min_8;
} else { } else {
min_value = 0; min_value = 0;
max_value = (1 << BITDEPTH) - 1; max_value = (1U << in->p.bpc) - 1;
} }
// seed[0] contains the current row, seed[1] contains the previous // seed[0] contains the current row, seed[1] contains the previous
...@@ -278,7 +285,7 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in ...@@ -278,7 +285,7 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5); grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX); grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain); add_noise_y(x, y, grain);
} }
} }
...@@ -289,7 +296,7 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in ...@@ -289,7 +296,7 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
grain = round2(old * w[y][0] + grain * w[y][1], 5); grain = round2(old * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX); grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain); add_noise_y(x, y, grain);
} }
...@@ -299,17 +306,17 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in ...@@ -299,17 +306,17 @@ static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in
int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y); int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y); int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
top = round2(old * w[x][0] + top * w[x][1], 5); top = round2(old * w[x][0] + top * w[x][1], 5);
top = iclip(top, GRAIN_MIN, GRAIN_MAX); top = iclip(top, grain_min, grain_max);
// Blend the current pixel with the left block // Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y); int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y); old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5); grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX); grain = iclip(grain, grain_min, grain_max);
// Mix the row rows together and apply grain // Mix the row rows together and apply grain
grain = round2(top * w[y][0] + grain * w[y][1], 5); grain = round2(top * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX); grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain); add_noise_y(x, y, grain);
} }
} }
...@@ -322,18 +329,22 @@ static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const i ...@@ -322,18 +329,22 @@ static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const i
{ {
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0); const int rows = 1 + (data->overlap_flag && row_num > 0);
const int bitdepth_max = (1 << in->p.bpc) - 1;
const int bitdepth_min_8 = in->p.bpc - 8;
const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
int min_value, max_value; int min_value, max_value;
if (data->clip_to_restricted_range) { if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8); min_value = 16 << bitdepth_min_8;
if (out->seq_hdr->mtrx == DAV1D_MC_IDENTITY) { if (out->seq_hdr->mtrx == DAV1D_MC_IDENTITY) {
max_value = 235 << (BITDEPTH - 8); max_value = 235 << bitdepth_min_8;
} else { } else {
max_value = 240 << (BITDEPTH - 8); max_value = 240 << bitdepth_min_8;
} }
} else { } else {
min_value = 0; min_value = 0;
max_value = (1 << BITDEPTH) - 1;