gpu.c 45.3 KB
Newer Older
Niklas Haas's avatar
Niklas Haas committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * This file is part of libplacebo.
 *
 * libplacebo is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * libplacebo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
 */

#include "common.h"
Niklas Haas's avatar
Niklas Haas committed
19
#include "context.h"
20
#include "shaders.h"
21
#include "gpu.h"
Niklas Haas's avatar
Niklas Haas committed
22

23
int pl_optimal_transfer_stride(const struct pl_gpu *gpu, int dimension)
24
{
25
    return PL_ALIGN2(dimension, gpu->limits.align_tex_xfer_stride);
26 27
}

28
void pl_gpu_destroy(const struct pl_gpu *gpu)
Niklas Haas's avatar
Niklas Haas committed
29
{
30
    if (!gpu)
Niklas Haas's avatar
Niklas Haas committed
31 32
        return;

33
    gpu->impl->destroy(gpu);
Niklas Haas's avatar
Niklas Haas committed
34
}
Niklas Haas's avatar
Niklas Haas committed
35

36
void pl_gpu_print_info(const struct pl_gpu *gpu, enum pl_log_level lev)
37
{
38 39 40 41 42
    PL_MSG(gpu, lev, "GPU information:");
    PL_MSG(gpu, lev, "    GLSL version: %d%s", gpu->glsl.version,
           gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : "");
    PL_MSG(gpu, lev, "    Capabilities: 0x%x", (unsigned int) gpu->caps);
    PL_MSG(gpu, lev, "    Limits:");
43 44

#define LOG(fmt, field) \
45
    PL_MSG(gpu, lev, "      %-26s %" fmt, #field ":", gpu->limits.field)
46

47 48 49 50 51 52 53 54 55 56
    LOG(PRIu32, max_tex_1d_dim);
    LOG(PRIu32, max_tex_2d_dim);
    LOG(PRIu32, max_tex_3d_dim);
    LOG("zu", max_pushc_size);
    LOG("zu", max_xfer_size);
    LOG("zu", max_ubo_size);
    LOG("zu", max_ssbo_size);
    LOG(PRIu64, max_buffer_texels);
    LOG(PRId16, min_gather_offset);
    LOG(PRId16, max_gather_offset);
57

58
    if (gpu->caps & PL_GPU_CAP_COMPUTE) {
59 60 61 62 63 64 65 66
        LOG("zu", max_shmem_size);
        LOG(PRIu32, max_group_threads);
        LOG(PRIu32, max_group_size[0]);
        LOG(PRIu32, max_group_size[1]);
        LOG(PRIu32, max_group_size[2]);
        LOG(PRIu32, max_dispatch[0]);
        LOG(PRIu32, max_dispatch[1]);
        LOG(PRIu32, max_dispatch[2]);
67 68
    }

69 70
    LOG(PRIu32, align_tex_xfer_stride);
    LOG("zu", align_tex_xfer_offset);
71
#undef LOG
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86

    if (pl_gpu_supports_interop(gpu)) {
        PL_MSG(gpu, lev, "    External API interop:");

        // Pretty-print the device UUID
        static const char *hexdigits = "0123456789ABCDEF";
        char buf[3 * sizeof(gpu->uuid)];
        for (int i = 0; i < sizeof(gpu->uuid); i++) {
            uint8_t x = gpu->uuid[i];
            buf[3 * i + 0] = hexdigits[x >> 4];
            buf[3 * i + 1] = hexdigits[x & 0xF];
            buf[3 * i + 2] = i == sizeof(gpu->uuid) - 1 ? '\0' : ':';
        }

        PL_MSG(gpu, lev, "      UUID: %s", buf);
87 88 89 90 91 92 93 94
        PL_MSG(gpu, lev, "      buf export caps: 0x%x",
               (unsigned int) gpu->export_caps.buf);
        PL_MSG(gpu, lev, "      buf import caps: 0x%x",
               (unsigned int) gpu->import_caps.buf);
        PL_MSG(gpu, lev, "      tex export caps: 0x%x",
               (unsigned int) gpu->export_caps.tex);
        PL_MSG(gpu, lev, "      tex import caps: 0x%x",
               (unsigned int) gpu->import_caps.tex);
95 96 97 98
        PL_MSG(gpu, lev, "      sync export caps: 0x%x",
               (unsigned int) gpu->export_caps.sync);
        PL_MSG(gpu, lev, "      sync import caps: 0x%x",
               (unsigned int) gpu->import_caps.sync);
99
    }
100 101
}

102 103
static int cmp_fmt(const void *pa, const void *pb)
{
104 105
    const struct pl_fmt *a = *(const struct pl_fmt **)pa;
    const struct pl_fmt *b = *(const struct pl_fmt **)pb;
106

Niklas Haas's avatar
Niklas Haas committed
107 108 109 110
    // Always prefer non-opaque formats
    if (a->opaque != b->opaque)
        return PL_CMP(a->opaque, b->opaque);

111 112 113 114
    // Always prefer non-emulated formats
    if (a->emulated != b->emulated)
        return PL_CMP(a->emulated, b->emulated);

115 116 117 118 119 120 121 122 123 124 125
    int ca = __builtin_popcount(a->caps),
        cb = __builtin_popcount(b->caps);
    if (ca != cb)
        return -PL_CMP(ca, cb); // invert to sort higher values first

    // If the population count is the same but the caps are different, prefer
    // the caps with a "lower" value (which tend to be more fundamental caps)
    if (a->caps != b->caps)
        return PL_CMP(a->caps, b->caps);

    // If the capabilities are equal, sort based on the component attributes
Niklas Haas's avatar
Niklas Haas committed
126
    for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) {
127 128 129 130
        int da = a->component_depth[i],
            db = b->component_depth[i];
        if (da != db)
            return PL_CMP(da, db);
Niklas Haas's avatar
Niklas Haas committed
131 132 133 134 135 136 137 138 139 140

        int ha = a->host_bits[i],
            hb = b->host_bits[i];
        if (ha != hb)
            return PL_CMP(ha, hb);

        int oa = a->sample_order[i],
            ob = b->sample_order[i];
        if (oa != ob)
            return PL_CMP(oa, ob);
141 142 143 144 145 146
    }

    // Fall back to sorting by the name (for stability)
    return strcmp(a->name, b->name);
}

147
void pl_gpu_sort_formats(struct pl_gpu *gpu)
148
{
149
    qsort(gpu->formats, gpu->num_formats, sizeof(struct pl_fmt *), cmp_fmt);
150 151
}

152
void pl_gpu_print_formats(const struct pl_gpu *gpu, enum pl_log_level lev)
153
{
154
    if (!pl_msg_test(gpu->ctx, lev))
155 156
        return;

157 158
    PL_MSG(gpu, lev, "GPU texture formats:");
    PL_MSG(gpu, lev, "    %-10s %-6s %-6s %-4s %-4s %-13s %-13s %-10s %-10s",
Niklas Haas's avatar
Niklas Haas committed
159 160
           "NAME", "TYPE", "CAPS", "SIZE", "COMP", "DEPTH", "BITS",
           "GLSL_TYPE", "GLSL_FMT");
161 162
    for (int n = 0; n < gpu->num_formats; n++) {
        const struct pl_fmt *fmt = gpu->formats[n];
163 164

        static const char *types[] = {
165 166 167 168 169 170
            [PL_FMT_UNKNOWN] = "UNKNOWN",
            [PL_FMT_UNORM]   = "UNORM",
            [PL_FMT_SNORM]   = "SNORM",
            [PL_FMT_UINT]    = "UINT",
            [PL_FMT_SINT]    = "SINT",
            [PL_FMT_FLOAT]   = "FLOAT",
171 172 173 174
        };

        static const char idx_map[4] = {'R', 'G', 'B', 'A'};
        char indices[4] = {' ', ' ', ' ', ' '};
Niklas Haas's avatar
Niklas Haas committed
175 176 177 178
        if (!fmt->opaque) {
            for (int i = 0; i < fmt->num_components; i++)
                indices[i] = idx_map[fmt->sample_order[i]];
        }
179 180 181

#define IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3]

182
        PL_MSG(gpu, lev, "    %-10s %-6s 0x%-4x %-4zu %c%c%c%c "
Niklas Haas's avatar
Niklas Haas committed
183 184 185 186
               "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s",
               fmt->name, types[fmt->type], (unsigned int) fmt->caps,
               fmt->texel_size, IDX4(indices), IDX4(fmt->component_depth),
               IDX4(fmt->host_bits), PL_DEF(fmt->glsl_type, ""),
187 188 189 190 191 192
               PL_DEF(fmt->glsl_format, ""));

#undef IDX4
    }
}

193
bool pl_fmt_is_ordered(const struct pl_fmt *fmt)
Niklas Haas's avatar
Niklas Haas committed
194
{
Niklas Haas's avatar
Niklas Haas committed
195
    bool ret = !fmt->opaque;
Niklas Haas's avatar
Niklas Haas committed
196
    for (int i = 0; i < fmt->num_components; i++)
Niklas Haas's avatar
Niklas Haas committed
197
        ret &= fmt->sample_order[i] == i;
Niklas Haas's avatar
Niklas Haas committed
198 199 200
    return ret;
}

201
struct glsl_fmt {
202
    enum pl_fmt_type type;
203
    int num_components;
204
    int depth[4];
205 206 207 208 209
    const char *glsl_format;
};

// List taken from the GLSL specification. (Yes, GLSL supports only exactly
// these formats with exactly these names)
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static const struct glsl_fmt pl_glsl_fmts[] = {
    {PL_FMT_FLOAT, 1, {16},             "r16f"},
    {PL_FMT_FLOAT, 1, {32},             "r32f"},
    {PL_FMT_FLOAT, 2, {16, 16},         "rg16f"},
    {PL_FMT_FLOAT, 2, {32, 32},         "rg32f"},
    {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"},
    {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"},
    {PL_FMT_FLOAT, 3, {11, 11, 10},     "r11f_g11f_b10f"},

    {PL_FMT_UNORM, 1, {8},              "r8"},
    {PL_FMT_UNORM, 1, {16},             "r16"},
    {PL_FMT_UNORM, 2, {8,  8},          "rg8"},
    {PL_FMT_UNORM, 2, {16, 16},         "rg16"},
    {PL_FMT_UNORM, 4, {8,  8,  8,  8},  "rgba8"},
    {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"},
    {PL_FMT_UNORM, 4, {10, 10, 10,  2}, "rgb10_a2"},

    {PL_FMT_SNORM, 1, {8},              "r8_snorm"},
    {PL_FMT_SNORM, 1, {16},             "r16_snorm"},
    {PL_FMT_SNORM, 2, {8,  8},          "rg8_snorm"},
    {PL_FMT_SNORM, 2, {16, 16},         "rg16_snorm"},
    {PL_FMT_SNORM, 4, {8,  8,  8,  8},  "rgba8_snorm"},
    {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"},

    {PL_FMT_UINT,  1, {8},              "r8ui"},
    {PL_FMT_UINT,  1, {16},             "r16ui"},
    {PL_FMT_UINT,  1, {32},             "r32ui"},
    {PL_FMT_UINT,  2, {8,  8},          "rg8ui"},
    {PL_FMT_UINT,  2, {16, 16},         "rg16ui"},
    {PL_FMT_UINT,  2, {32, 32},         "rg32ui"},
    {PL_FMT_UINT,  4, {8,  8,  8,  8},  "rgba8ui"},
    {PL_FMT_UINT,  4, {16, 16, 16, 16}, "rgba16ui"},
    {PL_FMT_UINT,  4, {32, 32, 32, 32}, "rgba32ui"},
    {PL_FMT_UINT,  4, {10, 10, 10,  2}, "rgb10_a2ui"},

    {PL_FMT_SINT,  1, {8},              "r8i"},
    {PL_FMT_SINT,  1, {16},             "r16i"},
    {PL_FMT_SINT,  1, {32},             "r32i"},
    {PL_FMT_SINT,  2, {8,  8},          "rg8i"},
    {PL_FMT_SINT,  2, {16, 16},         "rg16i"},
    {PL_FMT_SINT,  2, {32, 32},         "rg32i"},
    {PL_FMT_SINT,  4, {8,  8,  8,  8},  "rgba8i"},
    {PL_FMT_SINT,  4, {16, 16, 16, 16}, "rgba16i"},
    {PL_FMT_SINT,  4, {32, 32, 32, 32}, "rgba32i"},
254 255
};

256
const char *pl_fmt_glsl_format(const struct pl_fmt *fmt)
257
{
Niklas Haas's avatar
Niklas Haas committed
258 259 260
    if (fmt->opaque)
        return NULL;

261 262 263 264
    int components = fmt->num_components;
    if (fmt->emulated && components == 3)
        components = 4;

265 266
    for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) {
        const struct glsl_fmt *gfmt = &pl_glsl_fmts[n];
267 268 269

        if (fmt->type != gfmt->type)
            continue;
270
        if (components != gfmt->num_components)
271 272
            continue;

273 274 275 276
        // The component order is irrelevant, so we need to sort the depth
        // based on the component's index
        int depth[4] = {0};
        for (int i = 0; i < fmt->num_components; i++)
Niklas Haas's avatar
Niklas Haas committed
277
            depth[fmt->sample_order[i]] = fmt->component_depth[i];
278

279 280 281 282
        // Copy over any emulated components
        for (int i = fmt->num_components; i < components; i++)
            depth[i] = gfmt->depth[i];

283 284
        for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) {
            if (depth[i] != gfmt->depth[i])
285 286 287 288 289 290 291 292 293 294 295
                goto next_fmt;
        }

        return gfmt->glsl_format;

next_fmt: ; // equivalent to `continue`
    }

    return NULL;
}

296
const struct pl_fmt *pl_find_fmt(const struct pl_gpu *gpu, enum pl_fmt_type type,
Niklas Haas's avatar
Niklas Haas committed
297
                                 int num_components, int min_depth,
298
                                 int host_bits, enum pl_fmt_caps caps)
Niklas Haas's avatar
Niklas Haas committed
299
{
300 301
    for (int n = 0; n < gpu->num_formats; n++) {
        const struct pl_fmt *fmt = gpu->formats[n];
Niklas Haas's avatar
Niklas Haas committed
302 303
        if (fmt->type != type || fmt->num_components != num_components)
            continue;
Niklas Haas's avatar
Niklas Haas committed
304 305
        if ((fmt->caps & caps) != caps)
            continue;
Niklas Haas's avatar
Niklas Haas committed
306 307 308 309 310 311 312

        // When specifying some particular host representation, ensure the
        // format is non-opaque, ordered and unpadded
        if (host_bits && fmt->opaque)
            continue;
        if (host_bits && fmt->texel_size * 8 != host_bits * num_components)
            continue;
313
        if (host_bits && !pl_fmt_is_ordered(fmt))
Niklas Haas's avatar
Niklas Haas committed
314 315 316
            continue;

        for (int i = 0; i < fmt->num_components; i++) {
Niklas Haas's avatar
Niklas Haas committed
317 318
            if (fmt->component_depth[i] < min_depth)
                goto next_fmt;
319
            if (host_bits && fmt->host_bits[i] != host_bits)
Niklas Haas's avatar
Niklas Haas committed
320 321 322 323 324 325 326 327 328
                goto next_fmt;
        }

        return fmt;

next_fmt: ; // equivalent to `continue`
    }

    // ran out of formats
329
    PL_DEBUG(gpu, "No matching format found");
Niklas Haas's avatar
Niklas Haas committed
330 331 332
    return NULL;
}

333 334
const struct pl_fmt *pl_find_vertex_fmt(const struct pl_gpu *gpu,
                                        enum pl_fmt_type type, int comps)
Niklas Haas's avatar
Niklas Haas committed
335 336
{
    static const size_t sizes[] = {
337 338 339 340 341
        [PL_FMT_FLOAT] = sizeof(float),
        [PL_FMT_UNORM] = sizeof(unsigned),
        [PL_FMT_UINT]  = sizeof(unsigned),
        [PL_FMT_SNORM] = sizeof(int),
        [PL_FMT_SINT]  = sizeof(int),
Niklas Haas's avatar
Niklas Haas committed
342 343
    };

344
    return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX);
Niklas Haas's avatar
Niklas Haas committed
345 346
}

347
const struct pl_fmt *pl_find_named_fmt(const struct pl_gpu *gpu, const char *name)
Niklas Haas's avatar
Niklas Haas committed
348 349 350 351
{
    if (!name)
        return NULL;

352 353
    for (int i = 0; i < gpu->num_formats; i++) {
        const struct pl_fmt *fmt = gpu->formats[i];
Niklas Haas's avatar
Niklas Haas committed
354 355 356 357 358 359 360 361
        if (strcmp(name, fmt->name) == 0)
            return fmt;
    }

    // ran out of formats
    return NULL;
}

362 363
const struct pl_tex *pl_tex_create(const struct pl_gpu *gpu,
                                   const struct pl_tex_params *params)
Niklas Haas's avatar
Niklas Haas committed
364
{
365 366 367 368 369 370 371 372
    pl_assert(!params->import_handle || !params->export_handle);
    if (params->export_handle) {
        pl_assert(params->export_handle & gpu->export_caps.tex);
        pl_assert(PL_ISPOT(params->export_handle));
    }
    if (params->import_handle) {
        pl_assert(params->import_handle & gpu->import_caps.tex);
        pl_assert(PL_ISPOT(params->import_handle));
373 374
    }

375
    switch (pl_tex_params_dimension(*params)) {
376
    case 1:
377
        pl_assert(params->w > 0);
378
        pl_assert(params->w <= gpu->limits.max_tex_1d_dim);
379
        pl_assert(!params->renderable);
380 381
        break;
    case 2:
382
        pl_assert(params->w > 0 && params->h > 0);
383 384
        pl_assert(params->w <= gpu->limits.max_tex_2d_dim);
        pl_assert(params->h <= gpu->limits.max_tex_2d_dim);
385 386
        break;
    case 3:
387
        pl_assert(params->w > 0 && params->h > 0 && params->d > 0);
388 389 390
        pl_assert(params->w <= gpu->limits.max_tex_3d_dim);
        pl_assert(params->h <= gpu->limits.max_tex_3d_dim);
        pl_assert(params->d <= gpu->limits.max_tex_3d_dim);
391
        pl_assert(!params->renderable);
392 393 394
        break;
    }

395
    const struct pl_fmt *fmt = params->format;
396
    pl_assert(fmt);
397 398 399 400 401 402 403 404
    pl_assert(!params->sampleable || fmt->caps & PL_FMT_CAP_SAMPLEABLE);
    pl_assert(!params->renderable || fmt->caps & PL_FMT_CAP_RENDERABLE);
    pl_assert(!params->storable   || fmt->caps & PL_FMT_CAP_STORABLE);
    pl_assert(!params->blit_src   || fmt->caps & PL_FMT_CAP_BLITTABLE);
    pl_assert(!params->blit_dst   || fmt->caps & PL_FMT_CAP_BLITTABLE);
    pl_assert(params->sample_mode != PL_TEX_SAMPLE_LINEAR || fmt->caps & PL_FMT_CAP_LINEAR);

    return gpu->impl->tex_create(gpu, params);
Niklas Haas's avatar
Niklas Haas committed
405 406
}

407
static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b)
408 409
{
    return a.w == b.w && a.h == b.h && a.d == b.d &&
410 411 412 413 414 415 416 417 418 419
           a.format          == b.format &&
           a.sample_mode     == b.sample_mode &&
           a.address_mode    == b.address_mode &&
           (a.sampleable     || !b.sampleable) &&
           (a.renderable     || !b.renderable) &&
           (a.storable       || !b.storable) &&
           (a.blit_src       || !b.blit_src) &&
           (a.blit_dst       || !b.blit_dst) &&
           (a.host_writable  || !b.host_writable) &&
           (a.host_readable  || !b.host_readable);
420 421
}

422 423
bool pl_tex_recreate(const struct pl_gpu *gpu, const struct pl_tex **tex,
                     const struct pl_tex_params *params)
424
{
425 426 427 428 429
    if (params->initial_data) {
        PL_ERR(gpu, "pl_tex_recreate may not be used with `initial_data`!");
        return false;
    }

430 431
    if (*tex && pl_tex_params_superset((*tex)->params, *params)) {
        pl_tex_invalidate(gpu, *tex);
432
        return true;
433
    }
434

435
    PL_INFO(gpu, "(Re)creating %dx%dx%d texture", params->w, params->h, params->d);
436 437
    pl_tex_destroy(gpu, tex);
    *tex = pl_tex_create(gpu, params);
438 439 440 441

    return !!*tex;
}

442
void pl_tex_destroy(const struct pl_gpu *gpu, const struct pl_tex **tex)
Niklas Haas's avatar
Niklas Haas committed
443 444 445 446
{
    if (!*tex)
        return;

447
    gpu->impl->tex_destroy(gpu, *tex);
Niklas Haas's avatar
Niklas Haas committed
448 449 450
    *tex = NULL;
}

451
void pl_tex_clear(const struct pl_gpu *gpu, const struct pl_tex *dst,
452
                  const float color[4])
Niklas Haas's avatar
Niklas Haas committed
453
{
454
    pl_assert(dst->params.blit_dst);
455

456 457
    pl_tex_invalidate(gpu, dst);
    gpu->impl->tex_clear(gpu, dst, color);
Niklas Haas's avatar
Niklas Haas committed
458 459
}

460
void pl_tex_invalidate(const struct pl_gpu *gpu, const struct pl_tex *tex)
Niklas Haas's avatar
Niklas Haas committed
461
{
462 463
    if (gpu->impl->tex_invalidate)
        gpu->impl->tex_invalidate(gpu, tex);
Niklas Haas's avatar
Niklas Haas committed
464 465
}

466
static void strip_coords(const struct pl_tex *tex, struct pl_rect3d *rc)
Niklas Haas's avatar
Niklas Haas committed
467 468 469 470 471 472 473 474 475 476 477 478
{
    if (!tex->params.d) {
        rc->z0 = 0;
        rc->z1 = 1;
    }

    if (!tex->params.h) {
        rc->y0 = 0;
        rc->y1 = 1;
    }
}

479 480
void pl_tex_blit(const struct pl_gpu *gpu,
                 const struct pl_tex *dst, const struct pl_tex *src,
Niklas Haas's avatar
Niklas Haas committed
481 482
                 struct pl_rect3d dst_rc, struct pl_rect3d src_rc)
{
483 484
    const struct pl_fmt *src_fmt = src->params.format;
    const struct pl_fmt *dst_fmt = dst->params.format;
485
    pl_assert(src_fmt->texel_size == dst_fmt->texel_size);
486 487
    pl_assert((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT));
    pl_assert((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT));
488 489 490 491 492 493
    pl_assert(src->params.blit_src);
    pl_assert(dst->params.blit_dst);
    pl_assert(src_rc.x0 >= 0 && src_rc.x0 < src->params.w);
    pl_assert(src_rc.x1 > 0 && src_rc.x1 <= src->params.w);
    pl_assert(dst_rc.x0 >= 0 && dst_rc.x0 < dst->params.w);
    pl_assert(dst_rc.x1 > 0 && dst_rc.x1 <= dst->params.w);
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512

    if (src->params.h) {
        pl_assert(dst->params.h);
        pl_assert(src_rc.y0 >= 0 && src_rc.y0 < src->params.h);
        pl_assert(src_rc.y1 > 0 && src_rc.y1 <= src->params.h);
    }
    if (dst->params.h) {
        pl_assert(dst_rc.y0 >= 0 && dst_rc.y0 < dst->params.h);
        pl_assert(dst_rc.y1 > 0 && dst_rc.y1 <= dst->params.h);
    }
    if (src->params.d) {
        pl_assert(dst->params.d);
        pl_assert(src_rc.z0 >= 0 && src_rc.z0 < src->params.d);
        pl_assert(src_rc.z1 > 0 && src_rc.z1 <= src->params.d);
    }
    if (dst->params.d) {
        pl_assert(dst_rc.z0 >= 0 && dst_rc.z0 < dst->params.d);
        pl_assert(dst_rc.z1 > 0 && dst_rc.z1 <= dst->params.d);
    }
513

Niklas Haas's avatar
Niklas Haas committed
514 515
    strip_coords(src, &src_rc);
    strip_coords(dst, &dst_rc);
Niklas Haas's avatar
Niklas Haas committed
516

Niklas Haas's avatar
Niklas Haas committed
517
    struct pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d};
Niklas Haas's avatar
Niklas Haas committed
518 519
    strip_coords(dst, &full);

Niklas Haas's avatar
Niklas Haas committed
520 521 522
    struct pl_rect3d rcnorm = dst_rc;
    pl_rect3d_normalize(&rcnorm);
    if (pl_rect3d_eq(rcnorm, full))
523
        pl_tex_invalidate(gpu, dst);
Niklas Haas's avatar
Niklas Haas committed
524

525
    gpu->impl->tex_blit(gpu, dst, src, dst_rc, src_rc);
Niklas Haas's avatar
Niklas Haas committed
526 527
}

528
size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par)
Niklas Haas's avatar
Niklas Haas committed
529
{
530
    const struct pl_tex *tex = par->tex;
531
    int w = pl_rect_w(par->rc), h = pl_rect_h(par->rc), d = pl_rect_d(par->rc);
Niklas Haas's avatar
Niklas Haas committed
532

533 534 535 536
    // This generates the absolute bare minimum size of a buffer required to
    // hold the data of a texture upload/download, by including stride padding
    // only where strictly necessary.
    int texels = ((d - 1) * par->stride_h + (h - 1)) * par->stride_w + w;
Niklas Haas's avatar
Niklas Haas committed
537 538 539
    return texels * tex->params.format->texel_size;
}

540 541
static void fix_tex_transfer(const struct pl_gpu *gpu,
                             struct pl_tex_transfer_params *params)
Niklas Haas's avatar
Niklas Haas committed
542
{
543
    const struct pl_tex *tex = params->tex;
544
    struct pl_rect3d rc = params->rc;
Niklas Haas's avatar
Niklas Haas committed
545 546 547 548 549 550 551 552 553 554

    // Infer the default values
    if (!rc.x0 && !rc.x1)
        rc.x1 = tex->params.w;
    if (!rc.y0 && !rc.y1)
        rc.y1 = tex->params.h;
    if (!rc.z0 && !rc.z1)
        rc.z1 = tex->params.d;

    if (!params->stride_w)
555
        params->stride_w = pl_rect_w(rc);
Niklas Haas's avatar
Niklas Haas committed
556
    if (!params->stride_h)
557
        params->stride_h = pl_rect_h(rc);
Niklas Haas's avatar
Niklas Haas committed
558

559 560 561 562 563 564 565 566 567
    // Sanitize superfluous coordinates for the benefit of the GPU
    strip_coords(tex, &rc);
    if (!tex->params.w)
        params->stride_w = 1;
    if (!tex->params.h)
        params->stride_h = 1;

    params->rc = rc;

Niklas Haas's avatar
Niklas Haas committed
568 569
    // Check the parameters for sanity
#ifndef NDEBUG
570
    switch (pl_tex_params_dimension(tex->params))
571 572
    {
    case 3:
573 574 575 576
        pl_assert(rc.z1 > rc.z0);
        pl_assert(rc.z0 >= 0 && rc.z0 <  tex->params.d);
        pl_assert(rc.z1 >  0 && rc.z1 <= tex->params.d);
        pl_assert(params->stride_h >= pl_rect_h(rc));
577 578
        // fall through
    case 2:
579 580 581 582
        pl_assert(rc.y1 > rc.y0);
        pl_assert(rc.y0 >= 0 && rc.y0 <  tex->params.h);
        pl_assert(rc.y1 >  0 && rc.y1 <= tex->params.h);
        pl_assert(params->stride_w >= pl_rect_w(rc));
583 584
        // fall through
    case 1:
585 586 587
        pl_assert(rc.x1 > rc.x0);
        pl_assert(rc.x0 >= 0 && rc.x0 <  tex->params.w);
        pl_assert(rc.x1 >  0 && rc.x1 <= tex->params.w);
588 589 590
        break;
    }

591
    pl_assert(!params->buf ^ !params->ptr); // exactly one
592
    if (params->buf) {
593 594
        const struct pl_buf *buf = params->buf;
        size_t size = pl_tex_transfer_size(params);
595 596
        size_t texel = tex->params.format->texel_size;
        pl_assert(params->buf_offset == PL_ALIGN(params->buf_offset, texel));
597
        pl_assert(params->buf_offset + size <= buf->params.size);
598
    }
599 600 601
#endif
}

602 603
bool pl_tex_upload(const struct pl_gpu *gpu,
                   const struct pl_tex_transfer_params *params)
604
{
605
    const struct pl_tex *tex = params->tex;
606 607
    pl_assert(tex);
    pl_assert(tex->params.host_writable);
608

609 610 611
    struct pl_tex_transfer_params fixed = *params;
    fix_tex_transfer(gpu, &fixed);
    return gpu->impl->tex_upload(gpu, &fixed);
Niklas Haas's avatar
Niklas Haas committed
612 613
}

614 615
bool pl_tex_download(const struct pl_gpu *gpu,
                     const struct pl_tex_transfer_params *params)
616
{
617
    const struct pl_tex *tex = params->tex;
618 619
    pl_assert(tex);
    pl_assert(tex->params.host_readable);
620

621 622 623
    struct pl_tex_transfer_params fixed = *params;
    fix_tex_transfer(gpu, &fixed);
    return gpu->impl->tex_download(gpu, &fixed);
624 625
}

626 627
const struct pl_buf *pl_buf_create(const struct pl_gpu *gpu,
                                   const struct pl_buf_params *params)
Niklas Haas's avatar
Niklas Haas committed
628
{
629
    if (params->handle_type) {
630
        pl_assert(params->handle_type & gpu->export_caps.buf);
631 632 633
        pl_assert(PL_ISPOT(params->handle_type));
    }

634
    switch (params->type) {
635 636 637
    case PL_BUF_TEX_TRANSFER:
        pl_assert(gpu->limits.max_xfer_size);
        pl_assert(params->size <= gpu->limits.max_xfer_size);
638
        break;
639 640 641
    case PL_BUF_UNIFORM:
        pl_assert(gpu->limits.max_ubo_size);
        pl_assert(params->size <= gpu->limits.max_ubo_size);
642
        break;
643 644 645
    case PL_BUF_STORAGE:
        pl_assert(gpu->limits.max_ssbo_size);
        pl_assert(params->size <= gpu->limits.max_ssbo_size);
646
        break;
647 648 649 650 651 652 653 654 655 656 657 658 659 660
    case PL_BUF_TEXEL_UNIFORM: {
        pl_assert(params->format);
        pl_assert(params->format->caps & PL_FMT_CAP_TEXEL_UNIFORM);
        size_t limit = gpu->limits.max_buffer_texels * params->format->texel_size;
        pl_assert(params->size <= limit);
        break;
    }
    case PL_BUF_TEXEL_STORAGE: {
        pl_assert(params->format);
        pl_assert(params->format->caps & PL_FMT_CAP_TEXEL_STORAGE);
        size_t limit = gpu->limits.max_buffer_texels * params->format->texel_size;
        pl_assert(params->size <= limit);
        break;
    }
661
    case PL_BUF_PRIVATE: break;
662 663 664
    default: abort();
    }

665
    const struct pl_buf *buf = gpu->impl->buf_create(gpu, params);
666 667 668
    if (buf)
        pl_assert(buf->data || !params->host_mapped);

669
    return buf;
Niklas Haas's avatar
Niklas Haas committed
670 671
}

672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
static bool pl_buf_params_superset(struct pl_buf_params a, struct pl_buf_params b)
{
    return a.type            == b.type &&
           a.format          == b.format &&
           a.size            >= b.size &&
           (a.host_mapped    || !b.host_mapped) &&
           (a.host_writable  || !b.host_writable) &&
           (a.host_readable  || !b.host_readable);
}

bool pl_buf_recreate(const struct pl_gpu *gpu, const struct pl_buf **buf,
                     const struct pl_buf_params *params)
{
    if (params->initial_data) {
        PL_ERR(gpu, "pl_buf_recreate may not be used with `initial_data`!");
        return false;
    }

    if (*buf && pl_buf_params_superset((*buf)->params, *params))
        return true;

    PL_INFO(gpu, "(Re)creating %zu buffer", params->size);
    pl_buf_destroy(gpu, buf);
    *buf = pl_buf_create(gpu, params);

    return !!*buf;
}

700
void pl_buf_destroy(const struct pl_gpu *gpu, const struct pl_buf **buf)
Niklas Haas's avatar
Niklas Haas committed
701 702 703 704
{
    if (!*buf)
        return;

705
    gpu->impl->buf_destroy(gpu, *buf);
Niklas Haas's avatar
Niklas Haas committed
706 707 708
    *buf = NULL;
}

709
void pl_buf_write(const struct pl_gpu *gpu, const struct pl_buf *buf,
710
                  size_t buf_offset, const void *data, size_t size)
Niklas Haas's avatar
Niklas Haas committed
711
{
712 713 714
    pl_assert(buf->params.host_writable);
    pl_assert(buf_offset + size <= buf->params.size);
    pl_assert(buf_offset == PL_ALIGN2(buf_offset, 4));
715
    gpu->impl->buf_write(gpu, buf, buf_offset, data, size);
716 717
}

718
bool pl_buf_read(const struct pl_gpu *gpu, const struct pl_buf *buf,
719 720
                 size_t buf_offset, void *dest, size_t size)
{
721 722 723
    pl_assert(buf->params.host_readable);
    pl_assert(buf_offset + size <= buf->params.size);
    pl_assert(buf_offset == PL_ALIGN2(buf_offset, 4));
724
    return gpu->impl->buf_read(gpu, buf, buf_offset, dest, size);
Niklas Haas's avatar
Niklas Haas committed
725 726
}

727 728
bool pl_buf_export(const struct pl_gpu *gpu, const struct pl_buf *buf)
{
729
    pl_assert(buf->params.handle_type);
730 731 732
    return gpu->impl->buf_export(gpu, buf);
}

733
bool pl_buf_poll(const struct pl_gpu *gpu, const struct pl_buf *buf, uint64_t t)
Niklas Haas's avatar
Niklas Haas committed
734
{
735
    return gpu->impl->buf_poll ? gpu->impl->buf_poll(gpu, buf, t) : false;
Niklas Haas's avatar
Niklas Haas committed
736 737
}

738
size_t pl_var_type_size(enum pl_var_type type)
Niklas Haas's avatar
Niklas Haas committed
739 740
{
    switch (type) {
741 742 743
    case PL_VAR_SINT:  return sizeof(int);
    case PL_VAR_UINT:  return sizeof(unsigned int);
    case PL_VAR_FLOAT: return sizeof(float);
Niklas Haas's avatar
Niklas Haas committed
744 745 746 747
    default: abort();
    }
}

748 749
#define MAX_DIM 4

750
const char *pl_var_glsl_type_name(struct pl_var var)
751
{
752
    static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = {
753
    // float vectors
754 755 756 757
    [PL_VAR_FLOAT][1][1] = "float",
    [PL_VAR_FLOAT][1][2] = "vec2",
    [PL_VAR_FLOAT][1][3] = "vec3",
    [PL_VAR_FLOAT][1][4] = "vec4",
758
    // float matrices
759 760 761 762 763 764 765 766 767
    [PL_VAR_FLOAT][2][2] = "mat2",
    [PL_VAR_FLOAT][2][3] = "mat2x3",
    [PL_VAR_FLOAT][2][4] = "mat2x4",
    [PL_VAR_FLOAT][3][2] = "mat3x2",
    [PL_VAR_FLOAT][3][3] = "mat3",
    [PL_VAR_FLOAT][3][4] = "mat3x4",
    [PL_VAR_FLOAT][4][2] = "mat4x2",
    [PL_VAR_FLOAT][4][3] = "mat4x3",
    [PL_VAR_FLOAT][4][4] = "mat4",
768
    // integer vectors
769 770 771 772
    [PL_VAR_SINT][1][1] = "int",
    [PL_VAR_SINT][1][2] = "ivec2",
    [PL_VAR_SINT][1][3] = "ivec3",
    [PL_VAR_SINT][1][4] = "ivec4",
773
    // unsigned integer vectors
774 775 776 777
    [PL_VAR_UINT][1][1] = "uint",
    [PL_VAR_UINT][1][2] = "uvec2",
    [PL_VAR_UINT][1][3] = "uvec3",
    [PL_VAR_UINT][1][4] = "uvec4",
778 779 780 781 782 783 784 785
    };

    if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM)
        return NULL;

    return types[var.type][var.dim_m][var.dim_v];
}

786 787 788
#define PL_VAR(TYPE, NAME, M, V)                        \
    struct pl_var pl_var_##NAME(const char *name) {     \
        return (struct pl_var) {                        \
789
            .name  = name,                              \
790
            .type  = PL_VAR_##TYPE,                     \
791 792
            .dim_m = M,                                 \
            .dim_v = V,                                 \
793
            .dim_a = 1,                                 \
794 795 796
        };                                              \
    }

Niklas Haas's avatar
Niklas Haas committed
797
PL_VAR(SINT,  int,   1, 1);
798 799 800 801 802 803 804 805
PL_VAR(UINT,  uint,  1, 1);
PL_VAR(FLOAT, float, 1, 1);
PL_VAR(FLOAT, vec2,  1, 2);
PL_VAR(FLOAT, vec3,  1, 3);
PL_VAR(FLOAT, vec4,  1, 4);
PL_VAR(FLOAT, mat2,  2, 2);
PL_VAR(FLOAT, mat3,  3, 3);
PL_VAR(FLOAT, mat4,  4, 4);
806

807
#undef PL_VAR
808

809
struct pl_var pl_var_from_fmt(const struct pl_fmt *fmt, const char *name)
810
{
811 812 813 814 815 816
    static const enum pl_var_type vartypes[] = {
        [PL_FMT_FLOAT] = PL_VAR_FLOAT,
        [PL_FMT_UNORM] = PL_VAR_FLOAT,
        [PL_FMT_SNORM] = PL_VAR_FLOAT,
        [PL_FMT_UINT]  = PL_VAR_UINT,
        [PL_FMT_SINT]  = PL_VAR_SINT,
817 818
    };

819
    pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes));
820
    return (struct pl_var) {
821
        .type  = vartypes[fmt->type],
822
        .name  = name,
823 824 825 826 827 828
        .dim_v = fmt->num_components,
        .dim_m = 1,
        .dim_a = 1,
    };
}

829
struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var)
Niklas Haas's avatar
Niklas Haas committed
830
{
831 832
    size_t col_size = pl_var_type_size(var->type) * var->dim_v;
    return (struct pl_var_layout) {
Niklas Haas's avatar
Niklas Haas committed
833 834
        .offset = offset,
        .stride = col_size,
835
        .size   = col_size * var->dim_m * var->dim_a,
Niklas Haas's avatar
Niklas Haas committed
836 837 838
    };
}

839
struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var)
Niklas Haas's avatar
Niklas Haas committed
840
{
841
    size_t el_size = pl_var_type_size(var->type);
Niklas Haas's avatar
Niklas Haas committed
842

843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
    // std140 packing rules:
    // 1. The size of generic values is their size in bytes
    // 2. The size of vectors is the vector length * the base count
    // 3. Matrices are treated like arrays of column vectors
    // 4. The size of array rows is that of the element size rounded up to
    // the nearest multiple of vec4
    // 5. All values are aligned to a multiple of their size (stride for arrays),
    // with the exception of vec3 which is aligned like vec4
    size_t stride = el_size * var->dim_v;
    size_t align = stride;
    if (var->dim_v == 3)
        align += el_size;
    if (var->dim_m * var->dim_a > 1)
        stride = align = PL_ALIGN2(stride, sizeof(float[4]));

    return (struct pl_var_layout) {
        .offset = PL_ALIGN2(offset, align),
        .stride = stride,
        .size   = stride * var->dim_m * var->dim_a,
    };
Niklas Haas's avatar
Niklas Haas committed
863 864
}

865
struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var)
Niklas Haas's avatar
Niklas Haas committed
866
{
867 868 869 870 871 872 873 874 875 876 877 878 879 880
    size_t el_size = pl_var_type_size(var->type);

    // std430 packing rules: like std140, except arrays/matrices are always
    // "tightly" packed, even arrays/matrices of vec3s
    size_t stride = el_size * var->dim_v;
    size_t align = stride;
    if (var->dim_v == 3 && var->dim_m == 1 && var->dim_a == 1)
        align += el_size;

    return (struct pl_var_layout) {
        .offset = PL_ALIGN2(offset, align),
        .stride = stride,
        .size   = stride * var->dim_m * var->dim_a,
    };
Niklas Haas's avatar
Niklas Haas committed
881 882
}

883 884
void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout,
                   const void *src_p, struct pl_var_layout src_layout)
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
{
    uintptr_t src = (uintptr_t) src_p + src_layout.offset;
    uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset;

    if (src_layout.stride == dst_layout.stride) {
        memcpy((void *) dst, (const void *) src, src_layout.size);
        return;
    }

    size_t stride = PL_MIN(src_layout.stride, dst_layout.stride);
    uintptr_t end = src + src_layout.size;
    while (src < end) {
        memcpy((void *) dst, (const void *) src, stride);
        src += src_layout.stride;
        dst += dst_layout.stride;
    }
}

903
int pl_desc_namespace(const struct pl_gpu *gpu, enum pl_desc_type type)
904
{
905 906
    int ret = gpu->impl->desc_namespace(gpu, type);
    pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT);
907
    return ret;
908 909
}

910
const char *pl_desc_access_glsl_name(enum pl_desc_access mode)
Niklas Haas's avatar
Niklas Haas committed
911
{
Niklas Haas's avatar
Niklas Haas committed
912
    switch (mode) {
913 914 915
    case PL_DESC_ACCESS_READWRITE: return "";
    case PL_DESC_ACCESS_READONLY:  return "readonly";
    case PL_DESC_ACCESS_WRITEONLY: return "writeonly";
Niklas Haas's avatar
Niklas Haas committed
916 917
    default: abort();
    }
Niklas Haas's avatar
Niklas Haas committed
918 919
}

920 921
const struct pl_pass *pl_pass_create(const struct pl_gpu *gpu,
                                     const struct pl_pass_params *params)
Niklas Haas's avatar
Niklas Haas committed
922
{
923
    pl_assert(params->glsl_shader);
924
    switch(params->type) {
925
    case PL_PASS_RASTER:
926
        pl_assert(params->vertex_shader);
927
        for (int i = 0; i < params->num_vertex_attribs; i++) {
928
            struct pl_vertex_attrib va = params->vertex_attribs[i];
929 930
            pl_assert(va.name);
            pl_assert(va.fmt);
931
            pl_assert(va.fmt->caps & PL_FMT_CAP_VERTEX);
932
            pl_assert(va.offset + va.fmt->texel_size <= params->vertex_stride);
933 934
        }

935
        const struct pl_fmt *target_fmt = params->target_dummy.params.format;
936
        pl_assert(target_fmt);
937 938
        pl_assert(target_fmt->caps & PL_FMT_CAP_RENDERABLE);
        pl_assert(!params->blend_params || target_fmt->caps & PL_FMT_CAP_BLENDABLE);
939
        break;
940 941
    case PL_PASS_COMPUTE:
        pl_assert(gpu->caps & PL_GPU_CAP_COMPUTE);
942 943 944 945 946
        break;
    default: abort();
    }

    for (int i = 0; i < params->num_variables; i++) {
947 948
        pl_assert(gpu->caps & PL_GPU_CAP_INPUT_VARIABLES);
        struct pl_var var = params->variables[i];
949
        pl_assert(var.name);
950
        pl_assert(pl_var_glsl_type_name(var));
951 952 953
    }

    for (int i = 0; i < params->num_descriptors; i++) {
954
        struct pl_desc desc = params->descriptors[i];
955
        pl_assert(desc.name);
956 957 958
        // TODO: enforce disjoint bindings if possible?
    }

959
    pl_assert(params->push_constants_size <= gpu->limits.max_pushc_size);
960
    pl_assert(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4));
961

962
    return gpu->impl->pass_create(gpu, params);
Niklas Haas's avatar
Niklas Haas committed
963
}
Niklas Haas's avatar
Niklas Haas committed
964

965
void pl_pass_destroy(const struct pl_gpu *gpu, const struct pl_pass **pass)
Niklas Haas's avatar
Niklas Haas committed
966 967 968
{
    if (!*pass)
        return;
Niklas Haas's avatar
Niklas Haas committed
969

970
    gpu->impl->pass_destroy(gpu, *pass);
Niklas Haas's avatar
Niklas Haas committed
971 972
    *pass = NULL;
}
Niklas Haas's avatar
Niklas Haas committed
973

974
void pl_pass_run(const struct pl_gpu *gpu, const struct pl_pass_run_params *params)
Niklas Haas's avatar
Niklas Haas committed
975
{
976 977
    const struct pl_pass *pass = params->pass;
    struct pl_pass_run_params new = *params;
Niklas Haas's avatar
Niklas Haas committed
978

979
    for (int i = 0; i < pass->params.num_descriptors; i++) {
980 981
        struct pl_desc desc = pass->params.descriptors[i];
        struct pl_desc_binding db = params->desc_bindings[i];
982
        pl_assert(db.object);
983
        switch (desc.type) {
984 985
        case PL_DESC_SAMPLED_TEX: {
            const struct pl_tex *tex = db.object;
986
            pl_assert(tex->params.sampleable);
987 988
            break;
        }
989 990
        case PL_DESC_STORAGE_IMG: {
            const struct pl_tex *tex = db.object;
991
            pl_assert(tex->params.storable);
992 993
            break;
        }
994 995 996
        case PL_DESC_BUF_UNIFORM: {
            const struct pl_buf *buf = db.object;
            pl_assert(buf->params.type == PL_BUF_UNIFORM);
997 998
            break;
        }
999 1000 1001
        case PL_DESC_BUF_STORAGE: {
            const struct pl_buf *buf = db.object;
            pl_assert(buf->params.type == PL_BUF_STORAGE);
1002 1003
            break;
        }
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
        case PL_DESC_BUF_TEXEL_UNIFORM: {
            const struct pl_buf *buf = db.object;
            pl_assert(buf->params.type == PL_BUF_TEXEL_UNIFORM);
            break;
        }
        case PL_DESC_BUF_TEXEL_STORAGE: {
            const struct pl_buf *buf = db.object;
            pl_assert(buf->params.type == PL_BUF_TEXEL_STORAGE);
            break;
        }
1014 1015 1016 1017 1018
        default: abort();
        }
    }

    for (int i = 0; i < params->num_var_updates; i++) {
1019 1020
        struct pl_var_update vu = params->var_updates[i];
        pl_assert(gpu->caps & PL_GPU_CAP_INPUT_VARIABLES);
1021 1022
        pl_assert(vu.index >= 0 && vu.index < pass->params.num_variables);
        pl_assert(vu.data);
1023 1024
    }

1025
    pl_assert(params->push_constants || !pass->params.push_constants_size);
1026 1027

    switch (pass->params.type) {
1028
    case PL_PASS_RASTER: {
1029
        pl_assert(params->vertex_data);
Niklas Haas's avatar
Niklas Haas committed
1030
        switch (pass->params.vertex_type) {
1031
        case PL_PRIM_TRIANGLE_LIST:
1032
            pl_assert(params->vertex_count % 3 == 0);
Niklas Haas's avatar
Niklas Haas committed
1033
            // fall through
1034 1035
        case PL_PRIM_TRIANGLE_STRIP:
        case PL_PRIM_TRIANGLE_FAN:
1036
            pl_assert(params->vertex_count >= 3);
Niklas Haas's avatar
Niklas Haas committed
1037 1038 1039
            break;
        }

1040
        const struct pl_tex *tex = params->target;
1041
        pl_assert(tex);
1042
        pl_assert(pl_tex_params_dimension(tex->params) == 2);
1043
        pl_assert(tex->params.format == pass->params.target_dummy.params.format);
1044
        pl_assert(tex->params.renderable);
1045 1046 1047
        struct pl_rect2d *vp = &new.viewport;
        struct pl_rect2d *sc = &new.scissors;

1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
        // Sanitize viewport/scissors
        if (!vp->x0 && !vp->x1)
            vp->x1 = tex->params.w;
        if (!vp->y0 && !vp->y1)
            vp->y1 = tex->params.h;

        if (!sc->x0 && !sc->x1)
            sc->x1 = tex->params.w;
        if (!sc->y0 && !sc->y1)
            sc->y1 = tex->params.h;

1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
        // Constrain the scissors to the target dimension (to sanitize the
        // underlying graphics API calls)
        sc->x0 = PL_MAX(0, PL_MIN(tex->params.w, sc->x0));
        sc->y0 = PL_MAX(0, PL_MIN(tex->params.h, sc->y0));
        sc->x1 = PL_MAX(0, PL_MIN(tex->params.w, sc->x1));
        sc->y1 = PL_MAX(0, PL_MIN(tex->params.h, sc->y1));

        // Scissors wholly outside target -> silently drop pass (also needed
        // to ensure we don't cause UB by specifying invalid scissors)
        if (!pl_rect_w(*sc) || !pl_rect_h(*sc))
            return;

        pl_assert(pl_rect_w(*vp) > 0);
        pl_assert(pl_rect_h(*vp) > 0);
        pl_assert(pl_rect_w(*sc) > 0);
        pl_assert(pl_rect_h(*sc) > 0);
1075 1076
        break;
    }
1077
    case PL_PASS_COMPUTE:
1078
        for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) {
1079
            pl_assert(params->compute_groups[i] >= 0);
1080
            pl_assert(params->compute_groups[i] <= gpu->limits.max_dispatch[i]);
1081 1082 1083 1084 1085
        }
        break;
    default: abort();
    }

Niklas Haas's avatar
Niklas Haas committed
1086
    if (params->target && !pass->params.load_target)
1087
        pl_tex_invalidate(gpu, params->target);
Niklas Haas's avatar
Niklas Haas committed
1088

1089
    return gpu->impl->pass_run(gpu, &new);
Niklas Haas's avatar
Niklas Haas committed
1090
}
Niklas Haas's avatar
Niklas Haas committed
1091

1092
void pl_gpu_flush(const struct pl_gpu *gpu)
Niklas Haas's avatar
Niklas Haas committed
1093
{
1094 1095
    if (gpu->impl->gpu_flush)
        gpu->impl->gpu_flush(gpu);
Niklas Haas's avatar
Niklas Haas committed
1096
}
Niklas Haas's avatar
Niklas Haas committed
1097

Niklas Haas's avatar
Niklas Haas committed
1098 1099 1100 1101 1102
void pl_gpu_finish(const struct pl_gpu *gpu)
{
    gpu->impl->gpu_finish(gpu);
}

1103
// GPU-internal helpers
Niklas Haas's avatar
Niklas Haas committed
1104

1105
void pl_buf_pool_uninit(const struct pl_gpu *gpu, struct pl_buf_pool *pool)
Niklas Haas's avatar
Niklas Haas committed
1106 1107
{
    for (int i = 0; i < pool->num_buffers; i++)
1108
        pl_buf_destroy(gpu, &pool->buffers[i]);
Niklas Haas's avatar
Niklas Haas committed
1109 1110

    talloc_free(pool->buffers);
1111
    *pool = (struct pl_buf_pool) {0};
Niklas Haas's avatar
Niklas Haas committed
1112 1113
}

1114
static bool pl_buf_pool_grow(const struct pl_gpu *gpu, struct pl_buf_pool *pool)
Niklas Haas's avatar
Niklas Haas committed
1115
{
1116
    const struct pl_buf *buf = pl_buf_create(gpu, &pool->current_params);
Niklas Haas's avatar
Niklas Haas committed
1117 1118 1119 1120
    if (!buf)
        return false;

    TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf);
1121
    PL_DEBUG(gpu, "Resized buffer pool of type %u to size %d",
Niklas Haas's avatar
Niklas Haas committed
1122 1123 1124 1125
             pool->current_params.type, pool->num_buffers);
    return true;
}

1126 1127 1128
const struct pl_buf *pl_buf_pool_get(const struct pl_gpu *gpu,
                                     struct pl_buf_pool *pool,
                                     const struct pl_buf_params *params)
Niklas Haas's avatar
Niklas Haas committed
1129
{
1130
    pl_assert(!params->initial_data);
Niklas Haas's avatar
Niklas Haas committed
1131

1132
    if (!pl_buf_params_superset(pool->current_params, *params)) {
1133
        pl_buf_pool_uninit(gpu, pool);
Niklas Haas's avatar
Niklas Haas committed
1134 1135 1136 1137
        pool->current_params = *params;
    }

    // Make sure we have at least one buffer available
1138
    if (!pool->buffers && !pl_buf_pool_grow(gpu, pool))
Niklas Haas's avatar
Niklas Haas committed
1139 1140
        return NULL;

1141 1142 1143 1144 1145 1146 1147 1148 1149
    bool usable = !pl_buf_poll(gpu, pool->buffers[pool->index], 0);
    if (usable)
        goto done;

    if (pool->num_buffers < PL_BUF_POOL_MAX_BUFFERS) {
        if (pl_buf_pool_grow(gpu, pool))
            goto done;

        // Failed growing the buffer pool, so just error out early
Niklas Haas's avatar
Niklas Haas committed
1150 1151 1152
        return NULL;
    }

1153 1154 1155 1156 1157
    // Can't resize any further, so just loop until the buffer is usable
    while (pl_buf_poll(gpu, pool->buffers[pool->index], 1000000000)) // 1s
        PL_TRACE(gpu, "Blocked on buffer pool availability! (slow path)");

done: ;
1158
    const struct pl_buf *buf = pool->buffers[pool->index++];
Niklas Haas's avatar
Niklas Haas committed
1159 1160 1161 1162 1163
    pool->index %= pool->num_buffers;

    return buf;
}

1164 1165
bool pl_tex_upload_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo,
                       const struct pl_tex_transfer_params *params)
Niklas Haas's avatar
Niklas Haas committed
1166 1167
{
    if (params->buf)
1168
        return pl_tex_upload(gpu, params);
Niklas Haas's avatar
Niklas Haas committed
1169

1170 1171 1172
    struct pl_buf_params bufparams = {
        .type = PL_BUF_TEX_TRANSFER,
        .size = pl_tex_transfer_size(params),
1173
        .host_writable = true,
Niklas Haas's avatar
Niklas Haas committed
1174 1175
    };

1176
    const struct pl_buf *buf = pl_buf_pool_get(gpu, pbo, &bufparams);
Niklas Haas's avatar
Niklas Haas committed
1177 1178 1179
    if (!buf)
        return false;

1180
    pl_buf_write(gpu, buf, 0, params->ptr, bufparams.size);
Niklas Haas's avatar
Niklas Haas committed
1181

1182
    struct pl_tex_transfer_params newparams = *params;
Niklas Haas's avatar
Niklas Haas committed
1183
    newparams.buf = buf;
1184
    newparams.ptr = NULL;
Niklas Haas's avatar
Niklas Haas committed
1185

1186
    return pl_tex_upload(gpu, &newparams);
Niklas Haas's avatar
Niklas Haas committed
1187 1188
}

1189 1190
bool pl_tex_download_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo,
                         const struct pl_tex_transfer_params *params)
Niklas Haas's avatar
Niklas Haas committed
1191
{
1192
    if (params->buf)
1193
        return pl_tex_download(gpu, params);
Niklas Haas's avatar
Niklas Haas committed
1194

1195 1196 1197
    struct pl_buf_params bufparams = {
        .type = PL_BUF_TEX_TRANSFER,
        .size = pl_tex_transfer_size(params),
1198 1199
        .host_readable = true,
    };
Niklas Haas's avatar
Niklas Haas committed
1200

1201
    const struct pl_buf *buf = pl_buf_pool_get(gpu, pbo, &bufparams);
1202 1203
    if (!buf)
        return false;
Niklas Haas's avatar
Niklas Haas committed
1204

1205
    struct pl_tex_transfer_params newparams = *params;
1206 1207 1208
    newparams.buf = buf;
    newparams.ptr = NULL;

1209
    if (!pl_tex_download(gpu, &newparams))
1210 1211
        return false;

1212 1213 1214
    if (pl_buf_poll(gpu, buf, 0)) {
        PL_TRACE(gpu, "pl_tex_download without buffer: blocking (slow path)");
        while (pl_buf_poll(gpu, buf, 1000000)) ; // 1 ms
1215 1216
    }

1217
    return pl_buf_read(gpu, buf, 0, params->ptr, bufparams.size);
Niklas Haas's avatar
Niklas Haas committed
1218
}
1219

1220 1221 1222