Skip to content
Snippets Groups Projects

YCbCr output support

Merged Niklas Haas requested to merge yuv_output into master
1 file
+ 7
6
Compare changes
  • Side-by-side
  • Inline
  • e9a2087d
    The scaler code only checks whether or not the FBOFMT is storable,
    but in the case of e.g. draw_overlays, we have to check the actual FBO
    (provided by the user) instead.
    
    Solve this by just checking both. (Note: In theory, if the FBO is
    storable but FBOFMT is not, this would be a false negative. But
    whatever.)
+ 488
331
@@ -21,17 +21,6 @@
#include "shaders.h"
#include "dispatch.h"
enum {
// The scalers for each plane are set up to be just the index itself
SCALER_PLANE0 = 0,
SCALER_PLANE1 = 1,
SCALER_PLANE2 = 2,
SCALER_PLANE3 = 3,
SCALER_MAIN,
SCALER_COUNT,
};
struct sampler {
struct pl_shader_obj *upscaler_state;
struct pl_shader_obj *downscaler_state;
@@ -67,9 +56,11 @@ struct pl_renderer {
struct pl_shader_obj *grain_state[4];
const struct pl_tex **fbos;
int num_fbos;
struct sampler samplers[SCALER_COUNT];
struct sampler *osd_samplers;
int num_osd_samplers;
struct sampler sampler_main;
struct sampler samplers_src[4];
struct sampler samplers_dst[4];
struct sampler *samplers_osd;
int num_samplers_osd;
};
static void find_fbo_format(struct pl_renderer *rr)
@@ -168,10 +159,13 @@ void pl_renderer_destroy(struct pl_renderer **p_rr)
pl_shader_obj_destroy(&rr->grain_state[i]);
// Free all samplers
for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers); i++)
sampler_destroy(rr, &rr->samplers[i]);
for (int i = 0; i < rr->num_osd_samplers; i++)
sampler_destroy(rr, &rr->osd_samplers[i]);
sampler_destroy(rr, &rr->sampler_main);
for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_src); i++)
sampler_destroy(rr, &rr->samplers_src[i]);
for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_dst); i++)
sampler_destroy(rr, &rr->samplers_dst[i]);
for (int i = 0; i < rr->num_samplers_osd; i++)
sampler_destroy(rr, &rr->samplers_osd[i]);
pl_dispatch_destroy(&rr->dp);
TA_FREEP(p_rr);
@@ -237,6 +231,15 @@ struct img {
int comps;
};
// Plane 'type', ordered by incrementing priority
enum plane_type {
PLANE_ALPHA,
PLANE_CHROMA,
PLANE_LUMA,
PLANE_RGB,
PLANE_XYZ,
};
struct pass_state {
void *tmp;
@@ -249,17 +252,22 @@ struct pass_state {
struct img img;
// Represents the "reference rect". Canonically, this is functionallity
// equivalent to `pl_image.src_rect`, but both guaranteed to be valid, and
// also updates as the refplane evolves (e.g. due to user hook prescalers)
// equivalent to `image.crop`, but both guaranteed to be valid, and also
// updates as the refplane evolves (e.g. due to user hook prescalers)
struct pl_rect2df ref_rect;
// Integer version of `target.dst_rect`. Semantically identical.
// Integer version of `target.crop`. Semantically identical.
struct pl_rect2d dst_rect;
// Cached copies of the `image` / `target` for this rendering pass,
// corrected to make sure all rects etc. are properly defaulted/inferred.
struct pl_image image;
struct pl_render_target target;
struct pl_frame image;
struct pl_frame target;
// Some extra plane metadata, inferred from `planes`
enum plane_type src_type[4];
enum plane_type dst_type[4];
int src_ref, dst_ref; // index into `planes`
// Metadata for `rr->fbos`
bool *fbos_used;
@@ -438,7 +446,7 @@ static struct sampler_info sample_src_info(struct pl_renderer *rr,
}
static void dispatch_sampler(struct pass_state *pass, struct pl_shader *sh,
struct sampler *sampler,
struct sampler *sampler, bool no_compute,
const struct pl_render_params *params,
const struct pl_sample_src *src)
{
@@ -478,7 +486,7 @@ static void dispatch_sampler(struct pass_state *pass, struct pl_shader *sh,
.lut_entries = params->lut_entries,
.cutoff = params->polar_cutoff,
.antiring = params->antiringing_strength,
.no_compute = rr->disable_compute,
.no_compute = rr->disable_compute || no_compute,
.no_widening = params->skip_anti_aliasing,
.lut = lut,
};
@@ -520,7 +528,23 @@ fallback:
pl_shader_sample_direct(sh, src);
}
static void swizzle_color(struct pl_shader *sh, int comps, const int comp_map[4])
{
ident_t orig = sh_fresh(sh, "orig_color");
GLSL("vec4 %s = color; \n"
"color = vec4(0.0); \n", orig);
static const int def_map[4] = {0, 1, 2, 3};
comp_map = PL_DEF(comp_map, def_map);
for (int c = 0; c < comps; c++) {
if (comp_map[c] >= 0)
GLSL("color[%d] = %s[%d]; \n", c, orig, comp_map[c]);
}
}
static void draw_overlays(struct pass_state *pass, const struct pl_tex *fbo,
int comps, const int comp_map[4],
const struct pl_overlay *overlays, int num,
struct pl_color_space color, struct pl_color_repr repr,
bool use_sigmoid, struct pl_transform2x2 *scale,
@@ -537,8 +561,8 @@ static void draw_overlays(struct pass_state *pass, const struct pl_tex *fbo,
rr->disable_blending = true;
}
while (num > rr->num_osd_samplers) {
TARRAY_APPEND(rr, rr->osd_samplers, rr->num_osd_samplers,
while (num > rr->num_samplers_osd) {
TARRAY_APPEND(rr, rr->samplers_osd, rr->num_samplers_osd,
(struct sampler) {0});
}
@@ -569,12 +593,12 @@ static void draw_overlays(struct pass_state *pass, const struct pl_tex *fbo,
},
};
struct sampler *sampler = &rr->osd_samplers[n];
struct sampler *sampler = &rr->samplers_osd[n];
if (params->disable_overlay_sampling)
sampler = NULL;
struct pl_shader *sh = pl_dispatch_begin(rr->dp);
dispatch_sampler(pass, sh, sampler, params, &src);
dispatch_sampler(pass, sh, sampler, !fbo->params.storable, params, &src);
GLSL("vec4 osd_color;\n");
for (int c = 0; c < src.components; c++) {
@@ -607,6 +631,7 @@ static void draw_overlays(struct pass_state *pass, const struct pl_tex *fbo,
pl_shader_sigmoidize(sh, params->sigmoid_params);
pl_shader_encode_color(sh, &repr);
swizzle_color(sh, comps, comp_map);
static const struct pl_blend_params blend_params = {
.src_rgb = PL_BLEND_SRC_ALPHA,
@@ -778,7 +803,7 @@ enum {
static int deband_src(struct pass_state *pass, struct pl_shader *psh,
struct pl_sample_src *psrc,
const struct pl_image *image,
const struct pl_frame *image,
const struct pl_render_params *params)
{
struct pl_renderer *rr = pass->rr;
@@ -895,13 +920,10 @@ cleanup:
pl_shader_obj_destroy(&rr->peak_detect_state);
}
// Plane 'type', ordered by incrementing priority
enum plane_type {
PLANE_ALPHA,
PLANE_CHROMA,
PLANE_LUMA,
PLANE_RGB,
PLANE_XYZ,
struct plane_state {
enum plane_type type;
struct pl_plane plane;
struct img img; // for per-plane shaders
};
static const char *plane_type_names[] = {
@@ -912,12 +934,6 @@ static const char *plane_type_names[] = {
[PLANE_XYZ] = "xyz",
};
struct plane_state {
enum plane_type type;
struct pl_plane plane;
struct img img; // for per-plane shaders
};
static void log_plane_info(struct pl_renderer *rr, const struct plane_state *st)
{
const struct pl_plane *plane = &st->plane;
@@ -960,47 +976,11 @@ static void log_plane_info(struct pl_renderer *rr, const struct plane_state *st)
st->img.repr.bits.bit_shift);
}
static enum plane_type detect_plane_type(const struct plane_state *st)
{
const struct pl_plane *plane = &st->plane;
if (pl_color_system_is_ycbcr_like(st->img.repr.sys)) {
int t = -1;
for (int c = 0; c < plane->components; c++) {
switch (plane->component_mapping[c]) {
case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue;
case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue;
case PL_CHANNEL_CB:
case PL_CHANNEL_CR:
t = PL_MAX(t, PLANE_CHROMA);
continue;
default: continue;
}
}
pl_assert(t >= 0);
return t;
}
// Extra test for exclusive / separated alpha plane
if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A)
return PLANE_ALPHA;
switch (st->img.repr.sys) {
case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB
case PL_COLOR_SYSTEM_RGB: return PLANE_RGB;
case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ;
default: abort();
}
}
// Returns true if grain was applied
static bool plane_av1_grain(struct pass_state *pass, int plane_idx,
struct plane_state *st,
const struct plane_state *ref,
const struct pl_image *image,
const struct pl_frame *image,
const struct pl_render_params *params)
{
struct pl_renderer *rr = pass->rr;
@@ -1074,25 +1054,18 @@ static bool plane_user_hooks(struct pass_state *pass, struct plane_state *st,
return pass_hook(pass, &st->img, plane_stages[st->type], params);
}
static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex);
// This scales and merges all of the source images, and initializes pass->img.
static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
const struct pl_render_params *params)
{
struct pl_image *image = &pass->image;
struct pl_frame *image = &pass->image;
// First of all, we have to pick a "reference" plane for alignment.
// This should ideally be the plane that most closely matches the target
// image size
struct plane_state planes[4];
struct plane_state *ref = NULL;
struct plane_state *ref = &planes[pass->src_ref];
// Do a first pass to figure out each plane's type and find the ref plane
pl_assert(image->num_planes < PL_ARRAY_SIZE(planes));
for (int i = 0; i < image->num_planes; i++) {
struct plane_state *st = &planes[i];
*st = (struct plane_state) {
planes[i] = (struct plane_state) {
.type = pass->src_type[i],
.plane = image->planes[i],
.img = {
.w = image->planes[i].texture->params.w,
@@ -1103,39 +1076,12 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
.comps = image->planes[i].components,
},
};
st->type = detect_plane_type(st);
pl_assert(st->type >= 0);
switch (st->type) {
case PLANE_RGB:
case PLANE_LUMA:
case PLANE_XYZ:
ref = st;
break;
default: break;
}
}
if (!ref) {
PL_ERR(rr, "Image contains no LUMA, RGB or XYZ planes?");
return false;
}
// Original ref texture, even after preprocessing
const struct pl_tex *ref_tex = ref->plane.texture;
// At this point in time we can finally infer/fix the image/target rects
fix_rects(pass, ref_tex);
// Guess the source primaries based on resolution
if (!image->color.primaries) {
image->color.primaries = pl_color_primaries_guess(ref_tex->params.w,
ref_tex->params.h);
}
pass->ref_rect = image->src_rect;
// Do a second pass to compute the rc of each plane
// Compute the sampling rc of each plane
for (int i = 0; i < image->num_planes; i++) {
struct plane_state *st = &planes[i];
float rx = (float) ref_tex->params.w / st->plane.texture->params.w,
@@ -1151,10 +1097,10 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
sy = st->plane.shift_y;
st->img.rect = (struct pl_rect2df) {
.x0 = (image->src_rect.x0 - sx) / rrx,
.y0 = (image->src_rect.y0 - sy) / rry,
.x1 = (image->src_rect.x1 - sx) / rrx,
.y1 = (image->src_rect.y1 - sy) / rry,
.x0 = (image->crop.x0 - sx) / rrx,
.y0 = (image->crop.y0 - sy) / rry,
.x1 = (image->crop.x1 - sx) / rrx,
.y1 = (image->crop.y1 - sy) / rry,
};
PL_TRACE(rr, "Plane %d:", i);
@@ -1232,7 +1178,7 @@ static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass,
struct pl_shader *psh = pl_dispatch_begin_ex(rr->dp, true);
if (deband_src(pass, psh, &src, image, params) != DEBAND_SCALED)
dispatch_sampler(pass, psh, &rr->samplers[i], params, &src);
dispatch_sampler(pass, psh, &rr->samplers_src[i], false, params, &src);
ident_t sub = sh_subpass(sh, psh);
if (!sub) {
@@ -1325,7 +1271,7 @@ static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass,
.rect = img->rect,
};
const struct pl_image *image = &pass->image;
const struct pl_frame *image = &pass->image;
bool need_fbo = image->num_overlays > 0;
need_fbo |= rr->peak_detect_state && !params->allow_delayed_peak_detect;
@@ -1361,6 +1307,8 @@ static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass,
}
if (info.type == SAMPLER_DIRECT && !need_fbo) {
img->w = src.new_w;
img->h = src.new_h;
PL_TRACE(rr, "Skipping main scaler (free sampling)");
return true;
}
@@ -1397,27 +1345,28 @@ static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass,
// Draw overlay on top of the intermediate image if needed, accounting
// for possible stretching needed due to mismatch between the ref and src
struct pl_transform2x2 tf = pl_transform2x2_identity;
if (!pl_rect2d_eq(img->rect, image->src_rect)) {
float rx = pl_rect_w(img->rect) / pl_rect_w(image->src_rect),
ry = pl_rect_w(img->rect) / pl_rect_w(image->src_rect);
if (!pl_rect2d_eq(img->rect, image->crop)) {
float rx = pl_rect_w(img->rect) / pl_rect_w(image->crop),
ry = pl_rect_w(img->rect) / pl_rect_w(image->crop);
tf = (struct pl_transform2x2) {
.mat = {{{ rx, 0.0 }, { 0.0, ry }}},
.c = {
img->rect.x0 - image->src_rect.x0 * rx,
img->rect.y0 - image->src_rect.y0 * ry
img->rect.x0 - image->crop.x0 * rx,
img->rect.y0 - image->crop.y0 * ry
},
};
}
draw_overlays(pass, img->tex, image->overlays, image->num_overlays,
img->color, img->repr, use_sigmoid, &tf, params);
draw_overlays(pass, img->tex, img->comps, NULL, image->overlays,
image->num_overlays, img->color, img->repr, use_sigmoid,
&tf, params);
pass_hook(pass, img, PL_HOOK_PRE_KERNEL, params);
src.tex = img_tex(pass, img);
struct pl_shader *sh = pl_dispatch_begin_ex(rr->dp, true);
dispatch_sampler(pass, sh, &rr->samplers[SCALER_MAIN], params, &src);
dispatch_sampler(pass, sh, &rr->sampler_main, false, params, &src);
*img = (struct img) {
.sh = sh,
.w = src.new_w,
@@ -1440,18 +1389,17 @@ static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass,
static bool pass_output_target(struct pl_renderer *rr, struct pass_state *pass,
const struct pl_render_params *params)
{
const struct pl_image *image = &pass->image;
const struct pl_render_target *target = &pass->target;
const struct pl_tex *fbo = target->fbo;
const struct pl_frame *image = &pass->image;
const struct pl_frame *target = &pass->target;
struct img *img = &pass->img;
struct pl_shader *sh = img_sh(pass, img);
// Color management
bool prelinearized = false;
struct pl_color_space ref = image->color;
assert(ref.primaries == pass->img.color.primaries);
assert(ref.light == pass->img.color.light);
if (pass->img.color.transfer == PL_COLOR_TRC_LINEAR)
struct pl_color_space ref_csp = image->color;
assert(ref_csp.primaries == img->color.primaries);
assert(ref_csp.light == img->color.light);
if (img->color.transfer == PL_COLOR_TRC_LINEAR)
prelinearized = true;
bool need_icc = (image->profile.data || target->profile.data) &&
@@ -1465,7 +1413,7 @@ static bool pass_output_target(struct pl_renderer *rr, struct pass_state *pass,
if (use_3dlut) {
struct pl_3dlut_profile src = {
.color = ref,
.color = ref_csp,
.profile = image->profile,
};
@@ -1484,7 +1432,7 @@ static bool pass_output_target(struct pl_renderer *rr, struct pass_state *pass,
}
// current -> 3DLUT in
pl_shader_color_map(sh, params->color_map_params, ref, res.src_color,
pl_shader_color_map(sh, params->color_map_params, ref_csp, res.src_color,
&rr->peak_detect_state, prelinearized);
// 3DLUT in -> 3DLUT out
pl_3dlut_apply(sh, &rr->lut3d_state);
@@ -1508,7 +1456,7 @@ fallback:
if (!use_3dlut) {
// current -> target
pl_shader_color_map(sh, params->color_map_params, ref, target->color,
pl_shader_color_map(sh, params->color_map_params, ref_csp, target->color,
&rr->peak_detect_state, prelinearized);
}
@@ -1516,59 +1464,315 @@ fallback:
if (params->cone_params)
pl_shader_cone_distort(sh, target->color, params->cone_params);
bool is_comp = pl_shader_is_compute(sh);
if (is_comp && !fbo->params.storable) {
if (!img_tex(pass, &pass->img)) {
PL_ERR(rr, "Failed dispatching compute shader to intermediate FBO?");
// Apply the color scale separately, after encoding is done, to make sure
// that the intermediate FBO (if any) has the correct precision.
struct pl_color_repr repr = target->repr;
float scale = pl_color_repr_normalize(&repr);
pl_shader_encode_color(sh, &repr);
pass_hook(pass, img, PL_HOOK_OUTPUT, params);
sh = NULL;
const struct pl_plane *ref = &target->planes[pass->dst_ref];
bool flipped_x = pass->dst_rect.x1 < pass->dst_rect.x0,
flipped_y = pass->dst_rect.y1 < pass->dst_rect.y0;
for (int p = 0; p < target->num_planes; p++) {
const struct pl_plane *plane = &target->planes[p];
float rx = (float) plane->texture->params.w / ref->texture->params.w,
ry = (float) plane->texture->params.h / ref->texture->params.h;
// Only accept integer scaling ratios. This accounts for the fact
// that fractionally subsampled planes get rounded up to the
// nearest integer size, which we want to over-render.
float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx),
rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry);
float sx = plane->shift_x, sy = plane->shift_y;
struct pl_rect2df dst_rectf = {
.x0 = (pass->dst_rect.x0 - sx) * rrx,
.y0 = (pass->dst_rect.y0 - sy) * rry,
.x1 = (pass->dst_rect.x1 - sx) * rrx,
.y1 = (pass->dst_rect.y1 - sy) * rry,
};
// Normalize to make the math easier
pl_rect2df_normalize(&dst_rectf);
// Round the output rect
int rx0 = floorf(dst_rectf.x0), ry0 = floorf(dst_rectf.y0),
rx1 = ceilf(dst_rectf.x1), ry1 = ceilf(dst_rectf.y1);
PL_TRACE(rr, "Subsampled target %d: {%f %f %f %f} -> {%d %d %d %d}",
p, dst_rectf.x0, dst_rectf.y0,
dst_rectf.x1, dst_rectf.y1,
rx0, ry0, rx1, ry1);
if (target->num_planes > 1) {
// Planar input, so we need to sample from an intermediate FBO
struct pl_sample_src src = {
.tex = img_tex(pass, img),
.new_w = rx1 - rx0,
.new_h = ry1 - ry0,
.rect = {
.x0 = (rx0 - dst_rectf.x0) / rrx,
.x1 = (rx1 - dst_rectf.x0) / rrx,
.y0 = (ry0 - dst_rectf.y0) / rry,
.y1 = (ry1 - dst_rectf.y0) / rry,
},
};
if (!src.tex) {
PL_ERR(rr, "Output requires multiple planes, but FBOs are "
"unavailable. This combination is unsupported.");
return false;
}
PL_TRACE(rr, "Sampling %dx%d img aligned from {%f %f %f %f}",
pass->img.w, pass->img.h,
src.rect.x0, src.rect.y0,
src.rect.x1, src.rect.y1);
for (int c = 0; c < plane->components; c++) {
if (plane->component_mapping[c] < 0)
continue;
src.component_mask |= 1 << plane->component_mapping[c];
}
sh = pl_dispatch_begin(rr->dp);
dispatch_sampler(pass, sh, &rr->samplers_dst[p],
!plane->texture->params.storable, params, &src);
GLSL("vec4 orig_color = color; \n");
} else {
// Single plane, so we can directly re-use the img shader unless
// it's incompatible with the FBO capabilities
bool is_comp = pl_shader_is_compute(img_sh(pass, img));
if (is_comp && !plane->texture->params.storable) {
if (!img_tex(pass, img)) {
PL_ERR(rr, "Rendering requires compute shaders, but output "
"is not storable, and FBOs are unavailable. This "
"combination is unsupported.");
return false;
}
}
sh = img_sh(pass, img);
img->sh = NULL;
}
GLSL("color *= vec4(1.0 / %f); \n", scale);
swizzle_color(sh, plane->components, plane->component_mapping);
if (params->dither_params) {
// Ignore dithering for > 16-bit FBOs by default, since it makes
// little sense to do so (and probably just adds errors)
int depth = repr.bits.sample_depth;
if (depth && (depth <= 16 || params->force_dither))
pl_shader_dither(sh, depth, &rr->dither_state, params->dither_params);
}
bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
.shader = &sh,
.target = plane->texture,
.rect = {
.x0 = flipped_x ? rx1 : rx0,
.y0 = flipped_y ? ry1 : ry0,
.x1 = flipped_x ? rx0 : rx1,
.y1 = flipped_y ? ry0 : ry1,
},
});
if (!ok)
return false;
// Render any overlays, including overlays that need to be rendered
// from the `image` itself, but which couldn't be rendered as
// part of the intermediate scaling pass due to missing FBOs.
if (image->num_overlays > 0 && !FBOFMT) {
// The original image dimensions need to be scaled by the effective
// end-to-end scaling ratio to compensate for the mismatch in
// pixel coordinates between the image and target.
float scale_x = pl_rect_w(dst_rectf) / pl_rect_w(image->crop),
scale_y = pl_rect_h(dst_rectf) / pl_rect_h(image->crop);
struct pl_transform2x2 iscale = {
.mat = {{{ scale_x, 0.0 }, { 0.0, scale_y }}},
.c = {
// If the image was rendered with an offset relative to the
// target crop, we also need to shift the overlays.
dst_rectf.x0 - image->crop.x0 * scale_x,
dst_rectf.y0 - image->crop.y0 * scale_y,
},
};
draw_overlays(pass, plane->texture, plane->components,
plane->component_mapping, image->overlays,
image->num_overlays, target->color, target->repr,
false, &iscale, params);
}
struct pl_transform2x2 tscale = {
.mat = {{{ rrx, 0.0 }, { 0.0, rry }}},
};
draw_overlays(pass, plane->texture, plane->components,
plane->component_mapping, target->overlays,
target->num_overlays, target->color, target->repr,
false, &tscale, params);
}
*img = (struct img) {0};
return true;
}
#define require(expr) \
do { \
if (!(expr)) { \
PL_ERR(rr, "Validation failed: %s (%s:%d)", \
#expr, __FILE__, __LINE__); \
return false; \
} \
} while (0)
#define validate_plane(plane, param) \
do { \
require((plane).texture); \
require((plane).texture->params.param); \
require((plane).components > 0 && (plane).components <= 4); \
for (int c = 0; c < (plane).components; c++) { \
require((plane).component_mapping[c] >= PL_CHANNEL_NONE && \
(plane).component_mapping[c] <= PL_CHANNEL_A); \
} \
} while (0)
// Perform some basic validity checks on incoming structs to help catch invalid
// API usage. This is not an exhaustive check. In particular, enums are not
// bounds checked. This is because most functions accepting enums already
// abort() in the default case, and because it's not the intent of this check
// to catch all instances of memory corruption - just common logic bugs.
static bool validate_structs(struct pl_renderer *rr,
const struct pl_frame *image,
const struct pl_frame *target)
{
// Rendering to/from a frame with no planes is technically allowed, but so
// pointless that it's more likely to be a user error worth catching.
require(image->num_planes > 0 && image->num_planes < PL_MAX_PLANES);
require(target->num_planes > 0 && target->num_planes < PL_MAX_PLANES);
for (int i = 0; i < image->num_planes; i++)
validate_plane(image->planes[i], sampleable);
for (int i = 0; i < target->num_planes; i++)
validate_plane(target->planes[i], renderable);
float src_w = pl_rect_w(image->crop), src_h = pl_rect_h(image->crop);
float dst_w = pl_rect_w(target->crop), dst_h = pl_rect_h(target->crop);
require(!src_w == !src_h);
require(!dst_w == !dst_h);
require(image->num_overlays >= 0);
require(target->num_overlays >= 0);
for (int i = 0; i < image->num_overlays; i++) {
const struct pl_overlay *overlay = &image->overlays[i];
validate_plane(overlay->plane, sampleable);
require(pl_rect_w(overlay->rect) && pl_rect_h(overlay->rect));
}
for (int i = 0; i < target->num_overlays; i++) {
const struct pl_overlay *overlay = &target->overlays[i];
validate_plane(overlay->plane, sampleable);
require(pl_rect_w(overlay->rect) && pl_rect_h(overlay->rect));
}
return true;
}
pl_shader_encode_color(img_sh(pass, img), &target->repr);
pass_hook(pass, &pass->img, PL_HOOK_OUTPUT, params);
// FIXME: What if this ends up being a compute shader?? Should we do the
// is_compute unredirection *after* encode_color, or will that fuck up
// the bit depth?
// FIXME: Technically we should try dithering before bit shifting if we're
// going to be encoding to a low bit depth, since the caller might end up
// discarding the extra bits. Ideally, we would pull the `bit_shift` out
// of the `target->repr` and apply it separately after dithering.
if (params->dither_params) {
int depth = target->repr.bits.sample_depth;
// Ignore dithering for >16-bit FBOs, since it's pretty pointless
if (depth > 0 && (depth <= 16 || params->force_dither)) {
pl_shader_dither(img_sh(pass, img), depth, &rr->dither_state,
params->dither_params);
static inline enum plane_type detect_plane_type(const struct pl_plane *plane,
const struct pl_color_repr *repr)
{
if (pl_color_system_is_ycbcr_like(repr->sys)) {
int t = -1;
for (int c = 0; c < plane->components; c++) {
switch (plane->component_mapping[c]) {
case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue;
case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue;
case PL_CHANNEL_CB:
case PL_CHANNEL_CR:
t = PL_MAX(t, PLANE_CHROMA);
continue;
default: continue;
}
}
pl_assert(t >= 0);
return t;
}
sh = img_sh(pass, img);
pl_assert(fbo->params.renderable);
bool ok = pl_dispatch_finish(rr->dp, &(struct pl_dispatch_params) {
.shader = &sh,
.target = fbo,
.rect = pass->dst_rect,
});
// Extra test for exclusive / separated alpha plane
if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A)
return PLANE_ALPHA;
*img = (struct img) {0};
return ok;
switch (repr->sys) {
case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB
case PL_COLOR_SYSTEM_RGB: return PLANE_RGB;
case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ;
default: abort();
}
}
static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex)
static inline void default_rect(struct pl_rect2df *rc,
const struct pl_rect2df *backup)
{
struct pl_rect2df *src = &pass->image.src_rect,
*dst = &pass->target.dst_rect;
if (!rc->x0 && !rc->y0 && !rc->x1 && !rc->y1)
*rc = *backup;
}
static void fix_refs_and_rects(struct pass_state *pass)
{
struct pl_frame *image = &pass->image;
struct pl_frame *target = &pass->target;
// Find the ref planes
for (int i = 0; i < image->num_planes; i++) {
pass->src_type[i] = detect_plane_type(&image->planes[i], &image->repr);
switch (pass->src_type[i]) {
case PLANE_RGB:
case PLANE_LUMA:
case PLANE_XYZ:
pass->src_ref = i;
break;
default: break;
}
}
for (int i = 0; i < target->num_planes; i++) {
pass->dst_type[i] = detect_plane_type(&target->planes[i], &target->repr);
switch (pass->dst_type[i]) {
case PLANE_RGB:
case PLANE_LUMA:
case PLANE_XYZ:
pass->dst_ref = i;
break;
default: break;
}
}
// Fix the rendering rects
struct pl_rect2df *src = &image->crop, *dst = &target->crop;
const struct pl_tex *src_ref = pass->image.planes[pass->src_ref].texture;
const struct pl_tex *dst_ref = pass->target.planes[pass->dst_ref].texture;
if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) {
src->x1 = ref_tex->params.w;
src->y1 = ref_tex->params.h;
src->x1 = src_ref->params.w;
src->y1 = src_ref->params.h;
};
if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) {
dst->x1 = pass->target.fbo->params.w;
dst->y1 = pass->target.fbo->params.h;
dst->x1 = dst_ref->params.w;
dst->y1 = dst_ref->params.h;
}
// Keep track of whether the end-to-end rendering is flipped
@@ -1582,8 +1786,8 @@ static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex)
// Round the output rect and clip it to the framebuffer dimensions
float rx0 = roundf(PL_MAX(dst->x0, 0.0)),
ry0 = roundf(PL_MAX(dst->y0, 0.0)),
rx1 = roundf(PL_MIN(dst->x1, pass->target.fbo->params.w)),
ry1 = roundf(PL_MIN(dst->y1, pass->target.fbo->params.h));
rx1 = roundf(PL_MIN(dst->x1, dst_ref->params.w)),
ry1 = roundf(PL_MIN(dst->y1, dst_ref->params.h));
// Adjust the src rect corresponding to the rounded crop
float scale_x = pl_rect_w(*src) / pl_rect_w(*dst),
@@ -1597,8 +1801,8 @@ static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex)
src->y1 = base_y + (ry1 - dst->y0) * scale_y;
// Update dst_rect to the rounded values and re-apply flip if needed. We
// always do this in the `dst_rect` rather than the `src_rect` because this
// allows e.g. polar sampling compute shaders to work.
// always do this in the `dst` rather than the `src`` because this allows
// e.g. polar sampling compute shaders to work.
*dst = (struct pl_rect2df) {
.x0 = flipped_x ? rx1 : rx0,
.y0 = flipped_y ? ry1 : ry0,
@@ -1606,78 +1810,44 @@ static void fix_rects(struct pass_state *pass, const struct pl_tex *ref_tex)
.y1 = flipped_y ? ry0 : ry1,
};
// Integer copy of the above, for convenience
// Copies of the above, for convenience
pass->ref_rect = *src;
pass->dst_rect = (struct pl_rect2d) {
dst->x0, dst->y0, dst->x1, dst->y1,
};
}
#define require(expr) \
do { \
if (!(expr)) { \
PL_ERR(rr, "Validation failed: %s (%s:%d)", \
#expr, __FILE__, __LINE__); \
return false; \
} \
} while (0)
#define validate_plane(plane) \
do { \
require((plane).texture); \
require((plane).components > 0 && (plane).components <= 4); \
for (int c = 0; c < (plane).components; c++) { \
require((plane).component_mapping[c] >= PL_CHANNEL_NONE && \
(plane).component_mapping[c] <= PL_CHANNEL_A); \
} \
} while (0)
// Perform some basic validity checks on incoming structs to help catch invalid
// API usage. This is not an exhaustive check. In particular, enums are not
// bounds checked. This is because most functions accepting enums already
// abort() in the default case, and because it's not the intent of this check
// to catch all instances of memory corruption - just common logic bugs.
static bool validate_structs(struct pl_renderer *rr,
const struct pl_image *image,
const struct pl_render_target *target)
static const struct pl_tex *frame_ref(const struct pl_frame *frame)
{
// Rendering an image with no planes technically works, but is pointless
require(image->num_planes > 0 && image->num_planes < PL_MAX_PLANES);
for (int i = 0; i < image->num_planes; i++)
validate_plane(image->planes[i]);
float src_w = pl_rect_w(image->src_rect),
src_h = pl_rect_h(image->src_rect);
require(!src_w == !src_h);
require(image->num_overlays >= 0);
for (int i = 0; i < image->num_overlays; i++) {
const struct pl_overlay *overlay = &image->overlays[i];
validate_plane(overlay->plane);
require(pl_rect_w(overlay->rect) && pl_rect_h(overlay->rect));
pl_assert(frame->num_planes);
for (int i = 0; i < frame->num_planes; i++) {
switch (detect_plane_type(&frame->planes[i], &frame->repr)) {
case PLANE_RGB:
case PLANE_LUMA:
case PLANE_XYZ:
return frame->planes[i].texture;
default: continue;
}
}
require(target->fbo);
require(target->fbo->params.renderable);
return frame->planes[0].texture;
}
float dst_w = pl_rect_w(target->dst_rect),
dst_h = pl_rect_h(target->dst_rect);
require(!dst_w == !dst_h);
static void fix_color_space(struct pl_frame *frame)
{
const struct pl_tex *tex = frame_ref(frame);
for (int i = 0; i < target->num_overlays; i++) {
const struct pl_overlay *overlay = &target->overlays[i];
validate_plane(overlay->plane);
require(pl_rect_w(overlay->rect) && pl_rect_h(overlay->rect));
}
// If the primaries are not known, guess them based on the resolution
if (!frame->color.primaries)
frame->color.primaries = pl_color_primaries_guess(tex->params.w, tex->params.h);
return true;
}
pl_color_space_infer(&frame->color);
static void fix_bits(struct pl_bit_encoding *bits, const struct pl_tex *tex)
{
// For UNORM formats, we can infer the sampled bit depth from the texture
// itself. This is ignored for other format types, because the logic
// doesn't really work out for them anyways, and it's best not to do
// anything too crazy unless the user provides explicit details.
struct pl_bit_encoding *bits = &frame->repr.bits;
if (!bits->sample_depth && tex->params.format->type == PL_FMT_UNORM) {
// Just assume the first component's depth is canonical. This works in
// practice, since for cases like rgb565 we want to use the lower depth
@@ -1695,38 +1865,45 @@ static void fix_bits(struct pl_bit_encoding *bits, const struct pl_tex *tex)
}
}
bool pl_render_image(struct pl_renderer *rr, const struct pl_image *pimage,
const struct pl_render_target *ptarget,
bool pl_render_image(struct pl_renderer *rr, const struct pl_frame *pimage,
const struct pl_frame *ptarget,
const struct pl_render_params *params)
{
params = PL_DEF(params, &pl_render_default_params);
if (!validate_structs(rr, pimage, ptarget))
return false;
struct pass_state pass = {
.tmp = talloc_new(NULL),
.rr = rr,
.image = *pimage,
.target = *ptarget,
};
pass.fbos_used = talloc_zero_array(pass.tmp, bool, rr->num_fbos);
struct pl_image *image = &pass.image;
struct pl_render_target *target = &pass.target;
// Backwards compatibility hacks
struct pl_frame *image = &pass.image;
struct pl_frame *target = &pass.target;
default_rect(&image->crop, &image->src_rect);
default_rect(&target->crop, &target->src_rect);
if (!target->num_planes && target->fbo) {
target->num_planes = 1;
target->planes[0] = (struct pl_plane) {
.texture = target->fbo,
.components = target->fbo->params.format->num_components,
.component_mapping = {0, 1, 2, 3},
};
}
pl_color_space_infer(&image->color);
pl_color_space_infer(&target->color);
if (!validate_structs(rr, image, target))
return false;
fix_bits(&image->repr.bits, image->planes[0].texture);
fix_bits(&target->repr.bits, target->fbo);
fix_refs_and_rects(&pass);
fix_color_space(image);
// Note: the rects are fixed as part of `pass_read_image`
// Infer the target color space info based on the image's
target->color.primaries = PL_DEF(target->color.primaries, image->color.primaries);
target->color.transfer = PL_DEF(target->color.transfer, image->color.transfer);
fix_color_space(target);
// As a special case, don't infer the image primaries just yet, since
// that's done in a resolution-dependent way in pass_read_image
if (!pimage->color.primaries)
image->color.primaries = PL_COLOR_PRIM_UNKNOWN;
pass.tmp = talloc_new(NULL),
pass.fbos_used = talloc_zero_array(pass.tmp, bool, rr->num_fbos);
// TODO: output caching
pl_dispatch_reset_frame(rr->dp);
@@ -1745,28 +1922,6 @@ bool pl_render_image(struct pl_renderer *rr, const struct pl_image *pimage,
if (!pass_output_target(rr, &pass, params))
goto error;
// If we don't have FBOs available, simulate the on-image overlays at
// this stage
if (image->num_overlays > 0 && !FBOFMT) {
float rx = pl_rect_w(target->dst_rect) / pl_rect_w(image->src_rect),
ry = pl_rect_h(target->dst_rect) / pl_rect_h(image->src_rect);
struct pl_transform2x2 scale = {
.mat = {{{ rx, 0.0 }, { 0.0, ry }}},
.c = {
target->dst_rect.x0 - image->src_rect.x0 * rx,
target->dst_rect.y0 - image->src_rect.y0 * ry
},
};
draw_overlays(&pass, target->fbo, image->overlays, image->num_overlays,
target->color, target->repr, false, &scale, params);
}
// Draw the final output overlays
draw_overlays(&pass, target->fbo, target->overlays, target->num_overlays,
target->color, target->repr, false, NULL, params);
talloc_free(pass.tmp);
return true;
@@ -1777,44 +1932,18 @@ error:
return false;
}
void pl_image_set_chroma_location(struct pl_image *image,
void pl_frame_set_chroma_location(struct pl_frame *frame,
enum pl_chroma_location chroma_loc)
{
// Re-use our internal helpers for this
struct plane_state planes[4];
struct plane_state *ref = NULL;
pl_assert(image->num_planes < PL_ARRAY_SIZE(planes));
for (int i = 0; i < image->num_planes; i++) {
struct plane_state *st = &planes[i];
*st = (struct plane_state) {
.plane = image->planes[i],
.img = { .repr = image->repr, },
};
const struct pl_tex *ref = frame_ref(frame);
st->type = detect_plane_type(st);
pl_assert(st->type >= 0);
switch (st->type) {
case PLANE_RGB:
case PLANE_LUMA:
case PLANE_XYZ:
ref = st;
break;
default: break;
}
}
if (!ref)
return; // no planes
if (ref->plane.texture) {
if (ref) {
// Texture dimensions are already known, so apply the chroma location
// only to subsampled planes
int ref_w = ref->plane.texture->params.w,
ref_h = ref->plane.texture->params.h;
int ref_w = ref->params.w, ref_h = ref->params.h;
for (int i = 0; i < image->num_planes; i++) {
struct pl_plane *plane = &image->planes[i];
for (int i = 0; i < frame->num_planes; i++) {
struct pl_plane *plane = &frame->planes[i];
const struct pl_tex *tex = plane->texture;
bool subsampled = tex->params.w < ref_w || tex->params.h < ref_h;
if (subsampled)
@@ -1823,42 +1952,70 @@ void pl_image_set_chroma_location(struct pl_image *image,
} else {
// Texture dimensions are not yet known, so apply the chroma location
// to all chroma planes, regardless of subsampling
for (int i = 0; i < image->num_planes; i++) {
struct pl_plane *plane = &image->planes[i];
if (planes[i].type == PLANE_CHROMA)
for (int i = 0; i < frame->num_planes; i++) {
struct pl_plane *plane = &frame->planes[i];
if (detect_plane_type(plane, &frame->repr) == PLANE_CHROMA)
pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y);
}
}
}
void pl_render_target_from_swapchain(struct pl_render_target *out_target,
const struct pl_swapchain_frame *frame)
void pl_frame_from_swapchain(struct pl_frame *out_frame,
const struct pl_swapchain_frame *frame)
{
const struct pl_tex *fbo = frame->fbo;
*out_target = (struct pl_render_target) {
.fbo = fbo,
.dst_rect = { 0, 0, fbo->params.w, fbo->params.h },
*out_frame = (struct pl_frame) {
.num_planes = 1,
.planes = {{
.texture = fbo,
.components = fbo->params.format->num_components,
.component_mapping = {0, 1, 2, 3},
}},
.crop = { 0, 0, fbo->params.w, fbo->params.h },
.repr = frame->color_repr,
.color = frame->color_space,
};
if (frame->flipped)
PL_SWAP(out_target->dst_rect.y0, out_target->dst_rect.y1);
PL_SWAP(out_frame->crop.y0, out_frame->crop.y1);
}
bool pl_render_target_partial(const struct pl_render_target *target)
bool pl_frame_is_cropped(const struct pl_frame *frame)
{
int x0 = roundf(PL_MIN(target->dst_rect.x0, target->dst_rect.x1)),
y0 = roundf(PL_MIN(target->dst_rect.y0, target->dst_rect.y1)),
x1 = roundf(PL_MAX(target->dst_rect.x0, target->dst_rect.x1)),
y1 = roundf(PL_MAX(target->dst_rect.y0, target->dst_rect.y1)),
fbo_w = target->fbo->params.w,
fbo_h = target->fbo->params.h;
int x0 = roundf(PL_MIN(frame->crop.x0, frame->crop.x1)),
y0 = roundf(PL_MIN(frame->crop.y0, frame->crop.y1)),
x1 = roundf(PL_MAX(frame->crop.x0, frame->crop.x1)),
y1 = roundf(PL_MAX(frame->crop.y0, frame->crop.y1));
const struct pl_tex *ref = frame_ref(frame);
pl_assert(ref);
if (!x0 && !x1)
x1 = fbo_w;
x1 = ref->params.w;
if (!y0 && !y1)
y1 = fbo_h;
y1 = ref->params.h;
return x0 > 0 || y0 > 0 || x1 < fbo_w || y1 < fbo_h;
return x0 > 0 || y0 > 0 || x1 < ref->params.w || y1 < ref->params.h;
}
void pl_frame_clear(const struct pl_gpu *gpu, const struct pl_frame *frame,
const float rgb[3])
{
struct pl_color_repr repr = frame->repr;
struct pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL);
pl_transform3x3_invert(&tr);
float encoded[3] = { rgb[0], rgb[1], rgb[2] };
pl_transform3x3_apply(&tr, encoded);
for (int p = 0; p < frame->num_planes; p++) {
const struct pl_plane *plane = &frame->planes[p];
float clear[4] = { 0.0, 0.0, 0.0, 1.0 };
for (int c = 0; c < plane->components; c++) {
if (plane->component_mapping[c] >= 0)
clear[c] = encoded[plane->component_mapping[c]];
}
pl_tex_clear(gpu, plane->texture, clear);
}
}
Loading