Commit cfa986fe authored by Niklas Haas's avatar Niklas Haas Committed by Ronald S. Bultje

film_grain: implement film grain synthesis

This is using a slightly adapted version of my GPU-based algorithm. The
major difference to the algorithm suggested by the spec (and implemented
in libaom) is that instead of using a line buffer to hold the previous
row's film grain blocks, we compute each row/block fully independently.

This opens up the door to exploit parallelism in the future, since we
don't have any left->right or top->down dependency except for the PRNG
state. (Which we could pre-compute for a massively parallel / GPU
implementation)

That being said, it's probably somewhat slower than using a line buffer
for the serial / single CPU case, although most likely not by much
(since the areas with the most redundant work get progressively smaller,
down to a single 2x2 square for the worst case).
parent 20e9f4df
......@@ -45,6 +45,7 @@ typedef struct Dav1dSettings {
int n_frame_threads;
int n_tile_threads;
Dav1dPicAllocator allocator;
int apply_grain;
} Dav1dSettings;
/**
......
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DAV1D_SRC_FILM_GRAIN_H__
#define __DAV1D_SRC_FILM_GRAIN_H__
#include "dav1d/dav1d.h"
void dav1d_apply_grain_8bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
void dav1d_apply_grain_10bpc(Dav1dPicture *const out,
const Dav1dPicture *const in);
#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */
/*
* Copyright © 2018, Niklas Haas
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <assert.h>
#include <stdint.h>
#include "common.h"
#include "common/intops.h"
#include "common/bitdepth.h"
#include "tables.h"
#include "film_grain.h"
#if BITDEPTH == 8
typedef int8_t entry;
#else
typedef int16_t entry;
#endif
enum {
GRAIN_WIDTH = 82,
GRAIN_HEIGHT = 73,
SUB_GRAIN_WIDTH = 44,
SUB_GRAIN_HEIGHT = 38,
SUB_GRAIN_OFFSET = 6,
BLOCK_SIZE = 32,
SCALING_SIZE = 1 << BITDEPTH,
};
static inline int get_random_number(const int bits, unsigned *state) {
const int r = *state;
unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
*state = (r >> 1) | (bit << 15);
return (*state >> (16 - bits)) & ((1 << bits) - 1);
}
static inline int round2(const int x, const int shift) {
return (x + ((1 << shift) >> 1)) >> shift;
}
enum {
GRAIN_CENTER = 128 << (BITDEPTH - 8),
GRAIN_MIN = -GRAIN_CENTER,
GRAIN_MAX = (256 << (BITDEPTH - 8)) - 1 - GRAIN_CENTER,
};
static void generate_grain_y(const Dav1dPicture *const in,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])
{
const Dav1dFilmGrainData *data = &in->p.film_grain;
unsigned seed = data->seed;
const int shift = 12 - BITDEPTH + data->grain_scale_shift;
for (int y = 0; y < GRAIN_HEIGHT; y++) {
for (int x = 0; x < GRAIN_WIDTH; x++) {
const int value = get_random_number(11, &seed);
buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
}
}
const int ar_pad = 3;
const int ar_lag = data->ar_coeff_lag;
for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {
for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {
const int8_t *coeff = data->ar_coeffs_y;
int sum = 0;
for (int dy = -ar_lag; dy <= 0; dy++) {
for (int dx = -ar_lag; dx <= ar_lag; dx++) {
if (!dx && !dy)
break;
sum += *(coeff++) * buf[y + dy][x + dx];
}
}
int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
}
}
}
static void generate_grain_uv(const Dav1dPicture *const in, int uv,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],
entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH])
{
const Dav1dFilmGrainData *data = &in->p.film_grain;
unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
const int shift = 12 - BITDEPTH + data->grain_scale_shift;
const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH;
const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
for (int y = 0; y < chromaH; y++) {
for (int x = 0; x < chromaW; x++) {
const int value = get_random_number(11, &seed);
buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
}
}
const int ar_pad = 3;
const int ar_lag = data->ar_coeff_lag;
for (int y = ar_pad; y < chromaH; y++) {
for (int x = ar_pad; x < chromaW - ar_pad; x++) {
const int8_t *coeff = data->ar_coeffs_uv[uv];
int sum = 0;
for (int dy = -ar_lag; dy <= 0; dy++) {
for (int dx = -ar_lag; dx <= ar_lag; dx++) {
// For the final (current) pixel, we need to add in the
// contribution from the luma grain texture
if (!dx && !dy) {
if (!data->num_y_points)
break;
int luma = 0;
const int lumaX = ((x - ar_pad) << subx) + ar_pad;
const int lumaY = ((y - ar_pad) << suby) + ar_pad;
for (int i = 0; i <= suby; i++) {
for (int j = 0; j <= subx; j++) {
luma += buf_y[lumaY + i][lumaX + j];
}
}
luma = round2(luma, subx + suby);
sum += luma * (*coeff);
break;
}
sum += *(coeff++) * buf[y + dy][x + dx];
}
}
const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
}
}
}
static void generate_scaling(const uint8_t points[][2], int num,
uint8_t scaling[SCALING_SIZE])
{
const int shift_x = BITDEPTH - 8;
// Fill up the preceding entries with the initial value
for (int i = 0; i < points[0][0] << shift_x; i++)
scaling[i] = points[0][1];
// Linearly interpolate the values in the middle
for (int i = 0; i < num - 1; i++) {
const int bx = points[i][0] << shift_x;
const int by = points[i][1];
const int ex = points[i+1][0] << shift_x;
const int ey = points[i+1][1];
const int dx = ex - bx;
const int dy = ey - by;
const int delta = dy * ((0xFFFF + (dx >> 1))) / dx;
for (int x = 0; x < dx; x++) {
const int v = by + ((x * delta + 0x8000) >> 16);
scaling[bx + x] = v;
}
}
// Fill up the remaining entries with the final value
for (int i = points[num - 1][0] << shift_x; i < SCALING_SIZE; i++)
scaling[i] = points[num - 1][1];
}
// samples from the correct block of a grain LUT, while taking into account the
// offsets provided by the offsets cache
static inline entry sample_lut(entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
int offsets[2][2], int subx, int suby,
int bx, int by, int x, int y)
{
const int randval = offsets[bx][by];
const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
return grain_lut[offy + y + (BLOCK_SIZE >> suby) * by]
[offx + x + (BLOCK_SIZE >> subx) * bx];
}
static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in,
entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
uint8_t scaling[SCALING_SIZE], int row_num)
{
const Dav1dFilmGrainData *const data = &out->p.film_grain;
const int rows = 1 + (data->overlap_flag && row_num > 0);
int min_value, max_value;
if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8);
max_value = 235 << (BITDEPTH - 8);
} else {
min_value = 0;
max_value = (1 << BITDEPTH) - 1;
}
// seed[0] contains the current row, seed[1] contains the previous
unsigned seed[2];
for (int i = 0; i < rows; i++) {
seed[i] = data->seed;
seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
const ptrdiff_t stride = out->stride[0];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
assert(stride == in->stride[0]);
void *const src_row = in->data[0] + stride * row_num * BLOCK_SIZE;
void *const dst_row = out->data[0] + stride * row_num * BLOCK_SIZE;
// edge extend source pixels
const int row_len = (out->p.w + BLOCK_SIZE - 1) & ~(BLOCK_SIZE - 1);
for (int x = out->p.w; x < row_len; x++) {
for (int y = 0; y < BLOCK_SIZE; y++) {
pixel *src = src_row + y * stride + x * sizeof(pixel);
*src = 0;
}
}
const int row_h = (row_num + 1) * BLOCK_SIZE;
for (int y = out->p.h; y < row_h; y++)
memset(in->data[0] + stride * y, 0, row_len * sizeof(pixel));
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks
for (int bx = 0; bx < out->p.w; bx += BLOCK_SIZE) {
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
offsets[1][i] = offsets[0][i];
}
// update current offsets
for (int i = 0; i < rows; i++)
offsets[0][i] = get_random_number(8, &seed[i]);
// x/y block offsets to compensate for overlapped regions
const int ystart = data->overlap_flag && row_num ? 2 : 0;
const int xstart = data->overlap_flag && bx ? 2 : 0;
static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
#define add_noise_y(x, y, grain) \
pixel *src = src_row + (y) * stride + (bx + (x)) * sizeof(pixel); \
pixel *dst = dst_row + (y) * stride + (bx + (x)) * sizeof(pixel); \
int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
*dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < BLOCK_SIZE; y++) {
// Non-overlapped image region (straightforward)
for (int x = xstart; x < BLOCK_SIZE; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
add_noise_y(x, y, grain);
}
// Special case for overlapped column
for (int x = 0; x < xstart; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
}
for (int y = 0; y < ystart; y++) {
// Special case for overlapped row (sans corner)
for (int x = xstart; x < BLOCK_SIZE; x++) {
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
grain = round2(old * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
// Special case for doubly-overlapped corner
for (int x = 0; x < xstart; x++) {
// Blend the top pixel with the top left block
int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
top = round2(old * w[x][0] + top * w[x][1], 5);
top = iclip(top, GRAIN_MIN, GRAIN_MAX);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
// Mix the row rows together and apply grain
grain = round2(top * w[y][0] + grain * w[y][1], 5);
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_y(x, y, grain);
}
}
}
}
static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const in,
entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
uint8_t scaling[SCALING_SIZE], int uv, int row_num)
{
const Dav1dFilmGrainData *const data = &out->p.film_grain;
const int rows = 1 + (data->overlap_flag && row_num > 0);
int min_value, max_value;
if (data->clip_to_restricted_range) {
min_value = 16 << (BITDEPTH - 8);
if (out->p.mtrx == DAV1D_MC_IDENTITY) {
max_value = 235 << (BITDEPTH - 8);
} else {
max_value = 240 << (BITDEPTH - 8);
}
} else {
min_value = 0;
max_value = (1 << BITDEPTH) - 1;
}
const int sx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int sy = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
// seed[0] contains the current row, seed[1] contains the previous
unsigned seed[2];
for (int i = 0; i < rows; i++) {
seed[i] = data->seed;
seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
const ptrdiff_t stride = out->stride[1];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
assert(stride == in->stride[1]);
const int by = row_num * (BLOCK_SIZE >> sy);
void *const dst_row = out->data[1 + uv] + stride * by;
void *const src_row = in->data[1 + uv] + stride * by;
void *const luma_row = out->data[0] + out->stride[0] * row_num * BLOCK_SIZE;
// edge extend source pixels
const int row_len = ((out->p.w >> sx) + (BLOCK_SIZE >> sx) - 1)
& ~((BLOCK_SIZE >> sx) - 1);
for (int x = out->p.w >> sx; x < row_len; x++) {
for (int y = 0; y < BLOCK_SIZE >> sy; y++) {
pixel *src = src_row + y * stride + x * sizeof(pixel);
*src = 0;
}
}
const int row_h = (row_num + 1) * (BLOCK_SIZE >> sy);
for (int y = out->p.h >> sy; y < row_h; y++)
memset(in->data[1 + uv] + stride * y, 0, row_len * sizeof(pixel));
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks (subsampled)
for (int bx = 0; bx < (out->p.w + sx) >> sx; bx += BLOCK_SIZE >> sx) {
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
offsets[1][i] = offsets[0][i];
}
// update current offsets
for (int i = 0; i < rows; i++)
offsets[0][i] = get_random_number(8, &seed[i]);
// x/y block offsets to compensate for overlapped regions
const int ystart = data->overlap_flag && row_num ? (2 >> sy) : 0;
const int xstart = data->overlap_flag && bx ? (2 >> sx) : 0;
static const int w[2 /* sub */][2 /* off */][2] = {
{ { 27, 17 }, { 17, 27 } },
{ { 23, 22 } },
};
#define add_noise_uv(x, y, grain) \
const int lx = (bx + x) << sx; \
const int ly = y << sy; \
pixel *luma = luma_row + ly * out->stride[0] + lx * sizeof(pixel); \
pixel avg = luma[0]; \
if (sx && lx + 1 < out->p.w) \
avg = (avg + luma[1] + 1) >> 1; \
\
pixel *src = src_row + (y) * stride + (bx + (x)) * sizeof(pixel); \
pixel *dst = dst_row + (y) * stride + (bx + (x)) * sizeof(pixel); \
int val = avg; \
if (!data->chroma_scaling_from_luma) { \
int combined = avg * data->uv_luma_mult[uv] + \
*src * data->uv_mult[uv]; \
val = iclip_pixel( (combined >> 6) + \
(data->uv_offset[uv] << (BITDEPTH - 8)) ); \
} \
\
int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
*dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < BLOCK_SIZE >> sy; y++) {
// Non-overlapped image region (straightforward)
for (int x = xstart; x < BLOCK_SIZE >> sx; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
add_noise_uv(x, y, grain);
}
// Special case for overlapped column
for (int x = 0; x < xstart; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
}
for (int y = 0; y < ystart; y++) {
// Special case for overlapped row (sans corner)
for (int x = xstart; x < BLOCK_SIZE >> sx; x++) {
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
grain = (old * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
// Special case for doubly-overlapped corner
for (int x = 0; x < xstart; x++) {
// Blend the top pixel with the top left block
int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
top = (old * w[sx][x][0] + top * w[sx][x][1] + 16) >> 5;
top = iclip(top, GRAIN_MIN, GRAIN_MAX);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
// Mix the row rows together and apply to image
grain = (top * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
add_noise_uv(x, y, grain);
}
}
}
}
void bitfn(dav1d_apply_grain)(Dav1dPicture *const out,
const Dav1dPicture *const in)
{
const Dav1dFilmGrainData *const data = &out->p.film_grain;
entry grain_lut[3][GRAIN_HEIGHT][GRAIN_WIDTH];
uint8_t scaling[3][SCALING_SIZE];
// Generate grain LUTs as needed
generate_grain_y(out, grain_lut[0]); // always needed
if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
generate_grain_uv(out, 0, grain_lut[1], grain_lut[0]);
if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
generate_grain_uv(out, 1, grain_lut[2], grain_lut[0]);
// Generate scaling LUTs as needed
if (data->num_y_points)
generate_scaling(data->y_points, data->num_y_points, scaling[0]);
if (data->num_uv_points[0])
generate_scaling(data->uv_points[0], data->num_uv_points[0], scaling[1]);
if (data->num_uv_points[1])
generate_scaling(data->uv_points[1], data->num_uv_points[1], scaling[2]);
// Synthesize grain for the affected planes
int rows = (out->p.h + 16) >> 5;
for (int row = 0; row < rows; row++) {
if (data->num_y_points)
apply_to_row_y(out, in, grain_lut[0], scaling[0], row);
if (data->chroma_scaling_from_luma) {
apply_to_row_uv(out, in, grain_lut[1], scaling[0], 0, row);
apply_to_row_uv(out, in, grain_lut[2], scaling[0], 1, row);
} else {
if (data->num_uv_points[0])
apply_to_row_uv(out, in, grain_lut[1], scaling[1], 0, row);
if (data->num_uv_points[1])
apply_to_row_uv(out, in, grain_lut[2], scaling[2], 1, row);
}
}
// Copy over the non-modified planes
// TODO: eliminate in favor of per-plane refs
if (!data->num_y_points) {
assert(out->stride[0] == in->stride[0]);
memcpy(out->data[0], in->data[0], out->p.h * out->stride[0]);
}
for (int i = 0; i < 2; i++) {
if (!data->num_uv_points[i] && !data->chroma_scaling_from_luma) {
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
assert(out->stride[1] == in->stride[1]);
memcpy(out->data[1+i], in->data[1+i],
(out->p.h >> suby) * out->stride[1]);
}
}
}
......@@ -115,6 +115,7 @@ struct Dav1dContext {
} intra_edge;
Dav1dPicAllocator allocator;
int apply_grain;
};
struct Dav1dFrameContext {
......
......@@ -43,6 +43,7 @@
#include "src/ref.h"
#include "src/thread_task.h"
#include "src/wedge.h"
#include "src/film_grain.h"
static void init_internal(void) {
dav1d_init_wedge_masks();
......@@ -57,6 +58,7 @@ const char *dav1d_version(void) {
void dav1d_default_settings(Dav1dSettings *const s) {
s->n_frame_threads = 1;
s->n_tile_threads = 1;
s->apply_grain = 1;
s->allocator.cookie = NULL;
s->allocator.alloc_picture_callback = default_picture_allocator;
s->allocator.release_picture_callback = default_picture_release;
......@@ -84,6 +86,7 @@ int dav1d_open(Dav1dContext **const c_out,
memset(c, 0, sizeof(*c));
c->allocator = s->allocator;
c->apply_grain = s->apply_grain;
c->n_fc = s->n_frame_threads;
c->fc = dav1d_alloc_aligned(sizeof(*c->fc) * s->n_frame_threads, 32);
if (!c->fc) goto error;
......@@ -170,6 +173,39 @@ int dav1d_send_data(Dav1dContext *const c, Dav1dData *const in)
return 0;
}
static int output_image(Dav1dContext *const c, Dav1dPicture *const out,
Dav1dPicture *const in)
{
const Dav1dFilmGrainData *fgdata = &in->p.film_grain;
int has_grain = fgdata->num_y_points || fgdata->num_uv_points[0] ||
fgdata->num_uv_points[1];
// If there is nothing to be done, skip the allocation/copy
if (!c->apply_grain || !has_grain) {
dav1d_picture_move_ref(out, in);
return 0;
}
// Apply film grain to a new copy of the image to avoid corrupting refs
int res = dav1d_picture_alloc_copy(out, in);
if (res < 0)
return res;
switch (out->p.bpc) {
case 8:
dav1d_apply_grain_8bpc(out, in);
break;
case 10:
dav1d_apply_grain_10bpc(out, in);
break;
default:
assert(!"apply_grain: missing bit depth");
}
dav1d_picture_unref(in);
return 0;
}
int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)
{
int res;
......@@ -220,16 +256,12 @@ int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)
in->sz -= res;
in->data += res;
if (!in->sz) dav1d_data_unref(in);
if (c->out.data[0]) {
dav1d_picture_move_ref(out, &c->out);
return 0;
}
if (c->out.data[0])
break;
}
if (c->out.data[0]) {
dav1d_picture_move_ref(out, &c->out);
return 0;
}
if (c->out.data[0])
return output_image(c, out, &c->out);
return -EAGAIN;
}
......
......@@ -62,7 +62,8 @@ libdav1d_tmpl_sources = files(
'cdef_tmpl.c',
'lr_apply_tmpl.c',
'looprestoration_tmpl.c',
'recon_tmpl.c'
'recon_tmpl.c',
'film_grain_tmpl.c',
)
# libdav1d entrypoint source files
......
......@@ -180,6 +180,21 @@ int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,
return res;
}
int dav1d_picture_alloc_copy(Dav1dPicture *const dst,
const Dav1dPicture *const src)
{
struct pic_ctx_context *const pic_ctx = src->ref->user_data;
int res = dav1d_picture_alloc(dst, src->p.w, src->p.h, src->p.layout,
src->p.bpc, &pic_ctx->allocator);
if (!res) {
dst->poc = src->poc;
dst->p = src->p;
}
return res;
}
void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
validate_input(dst != NULL);
validate_input(dst->data[0] == NULL);
......
......@@ -63,6 +63,11 @@ int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h,
struct thread_data *t, int <