LCOV - code coverage report
Current view: top level - src/include/libplacebo - gpu.h (source / functions) Hit Total Coverage
Test: Code coverage Lines: 1 1 100.0 %
Date: 2025-03-29 09:04:10 Functions: 0 0 -
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 16 16 100.0 %

           Branch data     Line data    Source code
       1                 :            : /*
       2                 :            :  * This file is part of libplacebo.
       3                 :            :  *
       4                 :            :  * libplacebo is free software; you can redistribute it and/or
       5                 :            :  * modify it under the terms of the GNU Lesser General Public
       6                 :            :  * License as published by the Free Software Foundation; either
       7                 :            :  * version 2.1 of the License, or (at your option) any later version.
       8                 :            :  *
       9                 :            :  * libplacebo is distributed in the hope that it will be useful,
      10                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12                 :            :  * GNU Lesser General Public License for more details.
      13                 :            :  *
      14                 :            :  * You should have received a copy of the GNU Lesser General Public
      15                 :            :  * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
      16                 :            :  */
      17                 :            : 
      18                 :            : #ifndef LIBPLACEBO_GPU_H_
      19                 :            : #define LIBPLACEBO_GPU_H_
      20                 :            : 
      21                 :            : #include <stddef.h>
      22                 :            : #include <stdbool.h>
      23                 :            : #include <stdint.h>
      24                 :            : 
      25                 :            : #include <libplacebo/common.h>
      26                 :            : #include <libplacebo/cache.h>
      27                 :            : #include <libplacebo/log.h>
      28                 :            : 
      29                 :            : PL_API_BEGIN
      30                 :            : 
      31                 :            : // These are not memory managed, and should represent compile-time constants
      32                 :            : typedef const char *pl_debug_tag;
      33                 :            : #define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__))
      34                 :            : 
      35                 :            : // Type of a shader input descriptor.
      36                 :            : enum pl_desc_type {
      37                 :            :     PL_DESC_INVALID = 0,
      38                 :            :     PL_DESC_SAMPLED_TEX,    // C: pl_tex*    GLSL: combined texture sampler
      39                 :            :                             // (`pl_tex->params.sampleable` must be set)
      40                 :            :     PL_DESC_STORAGE_IMG,    // C: pl_tex*    GLSL: storage image
      41                 :            :                             // (`pl_tex->params.storable` must be set)
      42                 :            :     PL_DESC_BUF_UNIFORM,    // C: pl_buf*    GLSL: uniform buffer
      43                 :            :                             // (`pl_buf->params.uniform` must be set)
      44                 :            :     PL_DESC_BUF_STORAGE,    // C: pl_buf*    GLSL: storage buffer
      45                 :            :                             // (`pl_buf->params.storable` must be set)
      46                 :            :     PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf*  GLSL: uniform samplerBuffer
      47                 :            :                               // (`pl_buf->params.uniform` and `format` must be set)
      48                 :            :     PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf*  GLSL: uniform imageBuffer
      49                 :            :                               // (`pl_buf->params.uniform` and `format` must be set)
      50                 :            :     PL_DESC_TYPE_COUNT
      51                 :            : };
      52                 :            : 
      53                 :            : // This file contains the definition of an API which is designed to abstract
      54                 :            : // away from platform-specific APIs like the various OpenGL variants, Direct3D
      55                 :            : // and Vulkan in a common way. It is a much more limited API than those APIs,
      56                 :            : // since it tries targeting a very small common subset of features that is
      57                 :            : // needed to implement libplacebo's rendering.
      58                 :            : //
      59                 :            : // NOTE: Most, but not all, parameter conditions (phrases such as "must" or
      60                 :            : // "valid usage" are explicitly tested and result in error messages followed by
      61                 :            : // graceful failure. Exceptions are noted where they exist.
      62                 :            : 
      63                 :            : // Structure which wraps metadata describing GLSL capabilities.
      64                 :            : struct pl_glsl_version {
      65                 :            :     int version;        // GLSL version (e.g. 450), for #version
      66                 :            :     bool gles;          // GLSL ES semantics (ESSL)
      67                 :            :     bool vulkan;        // GL_KHR_vulkan_glsl semantics
      68                 :            : 
      69                 :            :     // Compute shader support and limits. If `compute` is false, then all
      70                 :            :     // of the remaining fields in this section are {0}.
      71                 :            :     bool compute;
      72                 :            :     size_t max_shmem_size;      // maximum compute shader shared memory size
      73                 :            :     uint32_t max_group_threads; // maximum number of local threads per work group
      74                 :            :     uint32_t max_group_size[3]; // maximum work group size per dimension
      75                 :            : 
      76                 :            :     // If nonzero, signals availability of shader subgroups. This guarantess
      77                 :            :     // availability of all of the following extensions:
      78                 :            :     // - GL_KHR_shader_subgroup_basic
      79                 :            :     // - GL_KHR_shader_subgroup_vote
      80                 :            :     // - GL_KHR_shader_subgroup_arithmetic
      81                 :            :     // - GL_KHR_shader_subgroup_ballot
      82                 :            :     // - GL_KHR_shader_subgroup_shuffle
      83                 :            :     uint32_t subgroup_size;
      84                 :            : 
      85                 :            :     // Miscellaneous shader limits
      86                 :            :     int16_t min_gather_offset;  // minimum `textureGatherOffset` offset
      87                 :            :     int16_t max_gather_offset;  // maximum `textureGatherOffset` offset
      88                 :            : };
      89                 :            : 
      90                 :            : // Backwards compatibility alias
      91                 :            : #define pl_glsl_desc pl_glsl_version
      92                 :            : 
      93                 :            : // Structure defining the physical limits and capabilities of this GPU
      94                 :            : // instance. If a limit is given as 0, that means that feature is unsupported.
      95                 :            : struct pl_gpu_limits {
      96                 :            :     // --- pl_gpu
      97                 :            :     bool thread_safe;           // `pl_gpu` calls are thread-safe
      98                 :            :     bool callbacks;             // supports asynchronous GPU callbacks
      99                 :            : 
     100                 :            :     // --- pl_buf
     101                 :            :     size_t max_buf_size;        // maximum size of any buffer
     102                 :            :     size_t max_ubo_size;        // maximum size of a `uniform` buffer
     103                 :            :     size_t max_ssbo_size;       // maximum size of a `storable` buffer
     104                 :            :     size_t max_vbo_size;        // maximum size of a `drawable` buffer
     105                 :            :     size_t max_mapped_size;     // maximum size of a `host_mapped` buffer
     106                 :            :     uint64_t max_buffer_texels; // maximum number of texels in a texel buffer
     107                 :            :     bool host_cached;           // if true, PL_BUF_MEM_HOST buffers are cached
     108                 :            :     size_t max_mapped_vram;     // maximum (known) size of a `host_mapped`
     109                 :            :                                 // PL_BUF_MEM_DEVICE buffer, or 0 if this
     110                 :            :                                 // combination is not supported
     111                 :            : 
     112                 :            :     // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided
     113                 :            :     // merely as a hint to the user. If the host pointer being imported is
     114                 :            :     // misaligned, libplacebo will internally round (over-map) the region.
     115                 :            :     size_t align_host_ptr;
     116                 :            : 
     117                 :            :     // --- pl_tex
     118                 :            :     uint32_t max_tex_1d_dim;    // maximum width for a 1D texture
     119                 :            :     uint32_t max_tex_2d_dim;    // maximum width/height for a 2D texture (required)
     120                 :            :     uint32_t max_tex_3d_dim;    // maximum width/height/depth for a 3D texture
     121                 :            :     bool blittable_1d_3d;       // supports blittable 1D/3D textures
     122                 :            :     bool buf_transfer;          // supports `pl_tex_transfer_params.buf`
     123                 :            : 
     124                 :            :     // These don't represent hard limits but indicate performance hints for
     125                 :            :     // optimal alignment. For best performance, the corresponding field
     126                 :            :     // should be aligned to a multiple of these. They will always be a power
     127                 :            :     // of two.
     128                 :            :     size_t align_tex_xfer_pitch;    // optimal `pl_tex_transfer_params.row_pitch`
     129                 :            :     size_t align_tex_xfer_offset;   // optimal `pl_tex_transfer_params.buf_offset`
     130                 :            : 
     131                 :            :     // --- pl_pass
     132                 :            :     size_t max_variable_comps;  // maximum components passed in variables
     133                 :            :     size_t max_constants;       // maximum `pl_pass_params.num_constants`
     134                 :            :     bool array_size_constants;  // push constants can be used to size arrays
     135                 :            :     size_t max_pushc_size;      // maximum `push_constants_size`
     136                 :            :     size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride`
     137                 :            :     uint32_t max_dispatch[3];   // maximum dispatch size per dimension
     138                 :            : 
     139                 :            :     // Note: At least one of `max_variable_comps` or `max_ubo_size` is
     140                 :            :     // guaranteed to be nonzero.
     141                 :            : 
     142                 :            :     // As a performance hint, the GPU may signal the number of command queues
     143                 :            :     // it has for fragment and compute shaders, respectively. Users may use
     144                 :            :     // this information to decide the appropriate type of shader to dispatch.
     145                 :            :     uint32_t fragment_queues;
     146                 :            :     uint32_t compute_queues;
     147                 :            : };
     148                 :            : 
     149                 :            : // Backwards compatibility aliases
     150                 :            : #define max_xfer_size max_buf_size
     151                 :            : #define align_tex_xfer_stride align_tex_xfer_pitch
     152                 :            : 
     153                 :            : // Some `pl_gpu` operations allow sharing GPU resources with external APIs -
     154                 :            : // examples include interop with other graphics APIs such as CUDA, and also
     155                 :            : // various hardware decoding APIs. This defines the mechanism underpinning the
     156                 :            : // communication of such an interoperation.
     157                 :            : typedef uint64_t pl_handle_caps;
     158                 :            : enum pl_handle_type {
     159                 :            :     PL_HANDLE_FD        = (1 << 0), // `int fd` for POSIX-style APIs
     160                 :            :     PL_HANDLE_WIN32     = (1 << 1), // `HANDLE` for win32 API
     161                 :            :     PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API
     162                 :            :     PL_HANDLE_DMA_BUF   = (1 << 3), // 'int fd' for a dma_buf fd
     163                 :            :     PL_HANDLE_HOST_PTR  = (1 << 4), // `void *` for a host-allocated pointer
     164                 :            :     PL_HANDLE_MTL_TEX   = (1 << 5), // `MTLTexture*` for Apple platforms
     165                 :            :     PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms
     166                 :            : };
     167                 :            : 
     168                 :            : struct pl_gpu_handle_caps {
     169                 :            :     pl_handle_caps tex;  // supported handles for `pl_tex` + `pl_shared_mem`
     170                 :            :     pl_handle_caps buf;  // supported handles for `pl_buf` + `pl_shared_mem`
     171                 :            :     pl_handle_caps sync; // supported handles for semaphores
     172                 :            : };
     173                 :            : 
     174                 :            : // Wrapper for the handle used to communicate a shared resource externally.
     175                 :            : // This handle is owned by the `pl_gpu` - if a user wishes to use it in a way
     176                 :            : // that takes over ownership (e.g. importing into some APIs), they must clone
     177                 :            : // the handle before doing so (e.g. using `dup` for fds). It is important to
     178                 :            : // read the external API documentation _very_ carefully as different handle
     179                 :            : // types may be managed in different ways. (eg: CUDA takes ownership of an fd,
     180                 :            : // but does not take ownership of a win32 handle).
     181                 :            : union pl_handle {
     182                 :            :     int fd;         // PL_HANDLE_FD / PL_HANDLE_DMA_BUF
     183                 :            :     void *handle;   // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE
     184                 :            :     void *ptr;      // PL_HANDLE_HOST_PTR
     185                 :            : };
     186                 :            : 
     187                 :            : // Structure encapsulating memory that is shared between libplacebo and the
     188                 :            : // user. This memory can be imported into external APIs using the handle.
     189                 :            : //
     190                 :            : // If the object a `pl_shared_mem` belongs to is destroyed (e.g. via
     191                 :            : // `pl_buf_destroy`), the handle becomes undefined, as do the contents of the
     192                 :            : // memory it points to, as well as any external API objects imported from it.
     193                 :            : struct pl_shared_mem {
     194                 :            :     union pl_handle handle;
     195                 :            :     size_t size;   // the total size of the memory referenced by this handle
     196                 :            :     size_t offset; // the offset of the object within the referenced memory
     197                 :            : 
     198                 :            :     // Note: `size` is optional for some APIs and handle types, in particular
     199                 :            :     // when importing DMABUFs or D3D11 textures.
     200                 :            : 
     201                 :            :     // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that
     202                 :            :     // describes this resource. Note that when importing `pl_buf`, this must
     203                 :            :     // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any
     204                 :            :     // format modifier supported by the implementation.
     205                 :            :     uint64_t drm_format_mod;
     206                 :            : 
     207                 :            :     // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to
     208                 :            :     // set the image stride (AKA pitch) in memory. If left as 0, defaults to
     209                 :            :     // the image width/height.
     210                 :            :     size_t stride_w;
     211                 :            :     size_t stride_h;
     212                 :            : 
     213                 :            :     // When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines
     214                 :            :     // which plane is imported (0 - 2).
     215                 :            :     unsigned plane;
     216                 :            : };
     217                 :            : 
     218                 :            : // Structure grouping PCI bus address fields for GPU devices
     219                 :            : struct pl_gpu_pci_address {
     220                 :            :     uint32_t domain;
     221                 :            :     uint32_t bus;
     222                 :            :     uint32_t device;
     223                 :            :     uint32_t function;
     224                 :            : };
     225                 :            : 
     226                 :            : typedef const struct pl_fmt_t *pl_fmt;
     227                 :            : 
     228                 :            : // Abstract device context which wraps an underlying graphics context and can
     229                 :            : // be used to dispatch rendering commands.
     230                 :            : //
     231                 :            : // Thread-safety: Depends on `pl_gpu_limits.thread_safe`
     232                 :            : typedef const struct pl_gpu_t {
     233                 :            :     pl_log log;
     234                 :            : 
     235                 :            :     struct pl_glsl_version glsl; // GLSL features supported by this GPU
     236                 :            :     struct pl_gpu_limits limits; // physical device limits and capabilities
     237                 :            : 
     238                 :            :     // Fields relevant to external API interop. If the underlying device does
     239                 :            :     // not support interop with other APIs, these will all be {0}.
     240                 :            :     struct pl_gpu_handle_caps export_caps; // supported handles for exporting
     241                 :            :     struct pl_gpu_handle_caps import_caps; // supported handles for importing
     242                 :            :     uint8_t uuid[16];                      // underlying device UUID
     243                 :            : 
     244                 :            :     // Supported texture formats, in preference order. (If there are multiple
     245                 :            :     // similar formats, the "better" ones come first)
     246                 :            :     pl_fmt *formats;
     247                 :            :     int num_formats;
     248                 :            : 
     249                 :            :     // PCI Bus address of the underlying device, to help with interop.
     250                 :            :     // This will only be filled in if interop is supported.
     251                 :            :     struct pl_gpu_pci_address pci;
     252                 :            : } *pl_gpu;
     253                 :            : 
     254                 :            : // Attach a pl_cache object to this GPU instance. This cache will be
     255                 :            : // used to cache all compiled shaders, as well as several other shader objects
     256                 :            : // (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache.
     257                 :            : //
     258                 :            : // Note: Calling this after shaders have already been compiled will not
     259                 :            : // retroactively add those shaders to the cache, so it's recommended to set
     260                 :            : // this early, before creating any passes.
     261                 :            : PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache);
     262                 :            : 
     263                 :            : enum pl_fmt_type {
     264                 :            :     PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats
     265                 :            :     PL_FMT_UNORM,       // unsigned, normalized integer format (sampled as float)
     266                 :            :     PL_FMT_SNORM,       // signed, normalized integer format (sampled as float)
     267                 :            :     PL_FMT_UINT,        // unsigned integer format (sampled as integer)
     268                 :            :     PL_FMT_SINT,        // signed integer format (sampled as integer)
     269                 :            :     PL_FMT_FLOAT,       // (signed) float formats, any bit size
     270                 :            :     PL_FMT_TYPE_COUNT,
     271                 :            : };
     272                 :            : 
     273                 :            : enum pl_fmt_caps {
     274                 :            :     PL_FMT_CAP_SAMPLEABLE    = 1 << 0,  // may be sampled from (PL_DESC_SAMPLED_TEX)
     275                 :            :     PL_FMT_CAP_STORABLE      = 1 << 1,  // may be used as storage image (PL_DESC_STORAGE_IMG)
     276                 :            :     PL_FMT_CAP_LINEAR        = 1 << 2,  // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR)
     277                 :            :     PL_FMT_CAP_RENDERABLE    = 1 << 3,  // may be rendered to (pl_pass_params.target_fmt)
     278                 :            :     PL_FMT_CAP_BLENDABLE     = 1 << 4,  // may be blended to (pl_pass_params.enable_blend)
     279                 :            :     PL_FMT_CAP_BLITTABLE     = 1 << 5,  // may be blitted from/to (pl_tex_blit)
     280                 :            :     PL_FMT_CAP_VERTEX        = 1 << 6,  // may be used as a vertex attribute
     281                 :            :     PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7,  // may be used as a texel uniform buffer
     282                 :            :     PL_FMT_CAP_TEXEL_STORAGE = 1 << 8,  // may be used as a texel storage buffer
     283                 :            :     PL_FMT_CAP_HOST_READABLE = 1 << 9,  // may be used with `host_readable` textures
     284                 :            :     PL_FMT_CAP_READWRITE     = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE
     285                 :            : 
     286                 :            :     // Notes:
     287                 :            :     // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE
     288                 :            :     // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute`
     289                 :            :     // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE
     290                 :            :     // - PL_FMT_CAP_VERTEX implies that the format is non-opaque
     291                 :            :     // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque
     292                 :            : };
     293                 :            : 
     294                 :            : struct pl_fmt_plane {
     295                 :            :     // Underlying format of this particular sub-plane. This describes the
     296                 :            :     // components, texel size and host representation for the purpose of
     297                 :            :     // e.g. transfers, blits, and sampling.
     298                 :            :     pl_fmt format;
     299                 :            : 
     300                 :            :     // X/Y subsampling shift factor for this plane.
     301                 :            :     uint8_t shift_x, shift_y;
     302                 :            : };
     303                 :            : 
     304                 :            : // Structure describing a texel/vertex format.
     305                 :            : struct pl_fmt_t {
     306                 :            :     const char *name;       // symbolic name for this format (e.g. rgba32f)
     307                 :            :     uint64_t signature;     // unique but stable signature (for pass reusability)
     308                 :            : 
     309                 :            :     enum pl_fmt_type type;  // the format's data type and interpretation
     310                 :            :     enum pl_fmt_caps caps;  // the features supported by this format
     311                 :            :     int num_components;     // number of components for this format
     312                 :            :     int component_depth[4]; // meaningful bits per component, texture precision
     313                 :            :     size_t internal_size;   // internal texel size (for blit compatibility)
     314                 :            : 
     315                 :            :     // For planar formats, this provides a description of each sub-plane.
     316                 :            :     //
     317                 :            :     // Note on planar formats: Planar formats are always opaque and typically
     318                 :            :     // support only a limit subset of capabilities (or none at all). Access
     319                 :            :     // should be done via sub-planes. (See `pl_tex.planes`)
     320                 :            :     struct pl_fmt_plane planes[4];
     321                 :            :     int num_planes;         // or 0 for non-planar textures
     322                 :            : 
     323                 :            :     // This controls the relationship between the data as seen by the host and
     324                 :            :     // the way it's interpreted by the texture. The host representation is
     325                 :            :     // always tightly packed (no padding bits in between each component).
     326                 :            :     //
     327                 :            :     // This representation assumes little endian ordering, i.e. components
     328                 :            :     // being ordered from LSB to MSB in memory. Note that for oddly packed
     329                 :            :     // formats like rgb10a2 or rgb565, this is inconsistent with the naming.
     330                 :            :     // (That is to say, rgb565 has sample order {2, 1, 0} under this convention
     331                 :            :     // - because rgb565 treats the R channel as the *most* significant bits)
     332                 :            :     //
     333                 :            :     // If `opaque` is true, then there's no meaningful correspondence between
     334                 :            :     // the two, and all of the remaining fields in this section are unset.
     335                 :            :     //
     336                 :            :     // If `emulated` is true, then this format doesn't actually exist on the
     337                 :            :     // GPU as an uploadable texture format - and any apparent support is being
     338                 :            :     // emulated (typically using compute shaders in the upload path).
     339                 :            :     bool opaque;
     340                 :            :     bool emulated;
     341                 :            :     size_t texel_size;      // total size in bytes per texel
     342                 :            :     size_t texel_align;     // texel alignment requirements (bytes)
     343                 :            :     int host_bits[4];       // number of meaningful bits in host memory
     344                 :            :     int sample_order[4];    // sampled index for each component, e.g.
     345                 :            :                             // {2, 1, 0, 3} for BGRA textures
     346                 :            : 
     347                 :            :     // For sampleable formats, this bool indicates whether or not the format
     348                 :            :     // is compatible with `textureGather()`
     349                 :            :     bool gatherable;
     350                 :            : 
     351                 :            :     // If usable as a vertex or texel buffer format, this gives the GLSL type
     352                 :            :     // corresponding to the data. (e.g. vec4)
     353                 :            :     const char *glsl_type;
     354                 :            : 
     355                 :            :     // If usable as a storage image or texel storage buffer
     356                 :            :     // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL
     357                 :            :     // texel format corresponding to the format (e.g. rgba16ui), if any. This
     358                 :            :     // field may be NULL, in which case the format modifier may be left
     359                 :            :     // unspecified.
     360                 :            :     const char *glsl_format;
     361                 :            : 
     362                 :            :     // If available, this gives the fourcc associated with the host
     363                 :            :     // representation. In particular, this is intended for use with
     364                 :            :     // PL_HANDLE_DMA_BUF, where this field will match the DRM format from
     365                 :            :     // <drm_fourcc.h>. May be 0, for formats without matching DRM fourcc.
     366                 :            :     uint32_t fourcc;
     367                 :            : 
     368                 :            :     // If `fourcc` is set, this contains the list of supported drm format
     369                 :            :     // modifiers for this format.
     370                 :            :     const uint64_t *modifiers;
     371                 :            :     int num_modifiers;
     372                 :            : };
     373                 :            : 
     374                 :            : // Returns whether or not a pl_fmt's components are ordered sequentially
     375                 :            : // in memory in the order RGBA.
     376                 :            : PL_API bool pl_fmt_is_ordered(pl_fmt fmt);
     377                 :            : 
     378                 :            : // Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM)
     379                 :            : PL_API bool pl_fmt_is_float(pl_fmt fmt);
     380                 :            : 
     381                 :            : // Returns whether or not a pl_fmt supports a given DRM modifier.
     382                 :            : PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier);
     383                 :            : 
     384                 :            : // Helper function to find a format with a given number of components and
     385                 :            : // minimum effective precision per component. If `host_bits` is set, then the
     386                 :            : // format will always be non-opaque, unpadded, ordered and have exactly this
     387                 :            : // bit depth for each component. Finally, all `caps` must be supported.
     388                 :            : PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
     389                 :            :                           int min_depth, int host_bits, enum pl_fmt_caps caps);
     390                 :            : 
     391                 :            : // Finds a vertex format for a given configuration. The resulting vertex will
     392                 :            : // have a component depth equivalent to the sizeof() the equivalent host type.
     393                 :            : // (e.g. PL_FMT_FLOAT will always have sizeof(float))
     394                 :            : PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components);
     395                 :            : 
     396                 :            : // Find a format based on its name.
     397                 :            : PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name);
     398                 :            : 
     399                 :            : // Find a format based on its fourcc.
     400                 :            : PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc);
     401                 :            : 
     402                 :            : // A generic 'timer query' object. These can be used to measure an
     403                 :            : // approximation of the GPU execution time of a given operation. Due to the
     404                 :            : // highly asynchronous nature of GPUs, the actual results of any individual
     405                 :            : // timer query may be delayed by quite a bit. As such, users should avoid
     406                 :            : // trying to pair any particular GPU command with any particular timer query
     407                 :            : // result, and only reuse `pl_timer` objects with identical operations. The
     408                 :            : // results of timer queries are guaranteed to be in-order, but individual
     409                 :            : // queries may be dropped, and some operations might not record timer results
     410                 :            : // at all. (For example, if the underlying hardware does not support timer
     411                 :            : // queries for a given operation type)
     412                 :            : //
     413                 :            : // Thread-safety: Unsafe
     414                 :            : typedef struct pl_timer_t *pl_timer;
     415                 :            : 
     416                 :            : // Creates a new timer object. This may return NULL, for example if the
     417                 :            : // implementation does not support timers, but since passing NULL to
     418                 :            : // `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not
     419                 :            : // concern themselves with handling this.
     420                 :            : PL_API pl_timer pl_timer_create(pl_gpu gpu);
     421                 :            : PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *);
     422                 :            : 
     423                 :            : // Queries any results that have been measured since the last execution of
     424                 :            : // `pl_timer_query`. There may be more than one result, in which case the user
     425                 :            : // should simply call the function again to get the subsequent values. This
     426                 :            : // function returns a value of 0 in the event that there are no more
     427                 :            : // unprocessed results.
     428                 :            : //
     429                 :            : // The results are reported in nanoseconds, but the actual precision of the
     430                 :            : // timestamp queries may be significantly lower.
     431                 :            : //
     432                 :            : // Note: Results do not queue up indefinitely. Generally, the implementation
     433                 :            : // will only keep track of a small, fixed number of results internally. Make
     434                 :            : // sure to include this function as part of your main rendering loop to process
     435                 :            : // all of its results, or older results will be overwritten by newer ones.
     436                 :            : PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer);
     437                 :            : 
     438                 :            : enum pl_buf_mem_type {
     439                 :            :     PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate
     440                 :            :     PL_BUF_MEM_HOST,     // try allocating from host memory (RAM)
     441                 :            :     PL_BUF_MEM_DEVICE,   // try allocating from device memory (VRAM)
     442                 :            :     PL_BUF_MEM_TYPE_COUNT,
     443                 :            : 
     444                 :            :     // Note: This distinction only matters for discrete GPUs
     445                 :            : };
     446                 :            : 
     447                 :            : // Structure describing a buffer.
     448                 :            : struct pl_buf_params {
     449                 :            :     size_t size;        // size in bytes (must be <= `pl_gpu_limits.max_buf_size`)
     450                 :            :     bool host_writable; // contents may be updated via pl_buf_write()
     451                 :            :     bool host_readable; // contents may be read back via pl_buf_read()
     452                 :            :     bool host_mapped;   // create a persistent, RW mapping (pl_buf.data)
     453                 :            : 
     454                 :            :     // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM.
     455                 :            :     // Requires `size <= pl_gpu_limits.max_ubo_size`
     456                 :            :     bool uniform;
     457                 :            : 
     458                 :            :     // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE.
     459                 :            :     // Requires `size <= pl_gpu_limits.max_ssbo_size`
     460                 :            :     bool storable;
     461                 :            : 
     462                 :            :     // May be used as the source of vertex data for `pl_pass_run`.
     463                 :            :     bool drawable;
     464                 :            : 
     465                 :            :     // Provide a hint for the memory type you want to use when allocating
     466                 :            :     // this buffer's memory.
     467                 :            :     //
     468                 :            :     // Note: Restrictions may apply depending on the usage flags. In
     469                 :            :     // particular, allocating buffers with `uniform` or `storable` enabled from
     470                 :            :     // non-device memory will almost surely fail.
     471                 :            :     enum pl_buf_mem_type memory_type;
     472                 :            : 
     473                 :            :     // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows
     474                 :            :     // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and
     475                 :            :     // `storage` are respectively also enabled.
     476                 :            :     pl_fmt format;
     477                 :            : 
     478                 :            :     // At most one of `export_handle` and `import_handle` can be set for a
     479                 :            :     // buffer.
     480                 :            : 
     481                 :            :     // Setting this indicates that the memory backing this buffer should be
     482                 :            :     // shared with external APIs, If so, this must be exactly *one* of
     483                 :            :     // `pl_gpu.export_caps.buf`.
     484                 :            :     enum pl_handle_type export_handle;
     485                 :            : 
     486                 :            :     // Setting this indicates that the memory backing this buffer will be
     487                 :            :     // imported from an external API. If so, this must be exactly *one* of
     488                 :            :     // `pl_gpu.import_caps.buf`.
     489                 :            :     enum pl_handle_type import_handle;
     490                 :            : 
     491                 :            :     // If the shared memory is being imported, the import handle must be
     492                 :            :     // specified here. Otherwise, this is ignored.
     493                 :            :     struct pl_shared_mem shared_mem;
     494                 :            : 
     495                 :            :     // If non-NULL, the buffer will be created with these contents. Otherwise,
     496                 :            :     // the initial data is undefined. Using this does *not* require setting
     497                 :            :     // host_writable.
     498                 :            :     const void *initial_data;
     499                 :            : 
     500                 :            :     // Arbitrary user data. libplacebo does not use this at all.
     501                 :            :     void *user_data;
     502                 :            : 
     503                 :            :     // Arbitrary identifying tag. Used only for debugging purposes.
     504                 :            :     pl_debug_tag debug_tag;
     505                 :            : };
     506                 :            : 
     507                 :            : #define pl_buf_params(...) (&(struct pl_buf_params) {   \
     508                 :            :         .debug_tag = PL_DEBUG_TAG,                      \
     509                 :            :         __VA_ARGS__                                     \
     510                 :            :     })
     511                 :            : 
     512                 :            : // A generic buffer, which can be used for multiple purposes (texture transfer,
     513                 :            : // storage buffer, uniform buffer, etc.)
     514                 :            : //
     515                 :            : // Note on efficiency: A pl_buf does not necessarily represent a true "buffer"
     516                 :            : // object on the underlying graphics API. It may also refer to a sub-slice of
     517                 :            : // a larger buffer, depending on the implementation details of the GPU. The
     518                 :            : // bottom line is that users do not need to worry about the efficiency of using
     519                 :            : // many small pl_buf objects. Having many small pl_bufs, even lots of few-byte
     520                 :            : // vertex buffers, is designed to be completely fine.
     521                 :            : //
     522                 :            : // Thread-safety: Unsafe
     523                 :            : typedef const struct pl_buf_t {
     524                 :            :     struct pl_buf_params params;
     525                 :            :     uint8_t *data; // for persistently mapped buffers, points to the first byte
     526                 :            : 
     527                 :            :     // If `params.handle_type` is set, this structure references the shared
     528                 :            :     // memory backing this buffer, via the requested handle type.
     529                 :            :     //
     530                 :            :     // While this buffer is not in an "exported" state, the contents of the
     531                 :            :     // memory are undefined. (See: `pl_buf_export`)
     532                 :            :     struct pl_shared_mem shared_mem;
     533                 :            : } *pl_buf;
     534                 :            : 
     535                 :            : // Create a buffer. The type of buffer depends on the parameters. The buffer
     536                 :            : // parameters must adhere to the restrictions imposed by the pl_gpu_limits.
     537                 :            : // Returns NULL on failure.
     538                 :            : //
     539                 :            : // For buffers with shared memory, the buffer is considered to be in an
     540                 :            : // "exported" state by default, and may be used directly by the external API
     541                 :            : // after being created (until the first libplacebo operation on the buffer).
     542                 :            : PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params);
     543                 :            : PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf);
     544                 :            : 
     545                 :            : // This behaves like `pl_buf_create`, but if the buffer already exists and has
     546                 :            : // incompatible parameters, it will get destroyed first. A buffer is considered
     547                 :            : // "compatible" if it has the same buffer type and texel format, a size greater
     548                 :            : // than or equal to the requested size, and it has a superset of the features
     549                 :            : // the user requested. After this operation, the contents of the buffer are
     550                 :            : // undefined.
     551                 :            : //
     552                 :            : // Note: Due to its unpredictability, it's not allowed to use this with
     553                 :            : // `params->initial_data` being set. Similarly, it's not allowed on a buffer
     554                 :            : // with `params->export_handle`. since this may invalidate the corresponding
     555                 :            : // external API's handle. Conversely, it *is* allowed on a buffer with
     556                 :            : // `params->host_mapped`, and the corresponding `buf->data` pointer *may*
     557                 :            : // change as a result of doing so.
     558                 :            : //
     559                 :            : // Note: If the `user_data` alone changes, this does not trigger a buffer
     560                 :            : // recreation. In theory, this can be used to detect when the buffer ended
     561                 :            : // up being recreated.
     562                 :            : PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params);
     563                 :            : 
     564                 :            : // Update the contents of a buffer, starting at a given offset (must be a
     565                 :            : // multiple of 4) and up to a given size, with the contents of *data.
     566                 :            : //
     567                 :            : // This function will block until the buffer is no longer in use. Use
     568                 :            : // `pl_buf_poll` to perform non-blocking queries of buffer availability.
     569                 :            : //
     570                 :            : // Note: This function can incur synchronization overhead, so it shouldn't be
     571                 :            : // used in tight loops. If you do need to loop (e.g. to perform a strided
     572                 :            : // write), consider using host-mapped buffers, or fixing the memory in RAM,
     573                 :            : // before calling this function.
     574                 :            : PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
     575                 :            :                          const void *data, size_t size);
     576                 :            : 
     577                 :            : // Read back the contents of a buffer, starting at a given offset, storing the
     578                 :            : // data into *dest. Returns whether successful.
     579                 :            : //
     580                 :            : // This function will block until the buffer is no longer in use. Use
     581                 :            : // `pl_buf_poll` to perform non-blocking queries of buffer availability.
     582                 :            : PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
     583                 :            :                         void *dest, size_t size);
     584                 :            : 
     585                 :            : // Copy `size` bytes from one buffer to another, reading from and writing to
     586                 :            : // the respective offsets.
     587                 :            : PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
     588                 :            :                         pl_buf src, size_t src_offset, size_t size);
     589                 :            : 
     590                 :            : // Initiates a buffer export operation, allowing a buffer to be accessed by an
     591                 :            : // external API. This is only valid for buffers with `params.handle_type`.
     592                 :            : // Calling this twice in a row is a harmless no-op. Returns whether successful.
     593                 :            : //
     594                 :            : // There is no corresponding "buffer import" operation, the next libplacebo
     595                 :            : // operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write
     596                 :            : // and pl_buf_read) will implicitly import the buffer back to libplacebo. Users
     597                 :            : // must ensure that all pending operations made by the external API are fully
     598                 :            : // completed before using it in libplacebo again. (Otherwise, the behaviour
     599                 :            : // is undefined)
     600                 :            : //
     601                 :            : // Please note that this function returning does not mean the memory is
     602                 :            : // immediately available as such. In general, it will mark a buffer as "in use"
     603                 :            : // in the same way any other buffer operation would, and it is the user's
     604                 :            : // responsibility to wait until `pl_buf_poll` returns false before accessing
     605                 :            : // the memory from the external API.
     606                 :            : //
     607                 :            : // In terms of the access performed by this operation, it is not considered a
     608                 :            : // "read" or "write" and therefore does not technically conflict with reads or
     609                 :            : // writes to the buffer performed by the host (via mapped memory - any use of
     610                 :            : // `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export).
     611                 :            : // However, restrictions made by the external API may apply that prevent this.
     612                 :            : //
     613                 :            : // The recommended use pattern is something like this:
     614                 :            : //
     615                 :            : // while (loop) {
     616                 :            : //    pl_buf buf = get_free_buffer(); // or block on pl_buf_poll
     617                 :            : //    // write to the buffer using the external API
     618                 :            : //    pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports
     619                 :            : //    pl_buf_export(gpu, buf);
     620                 :            : // }
     621                 :            : //
     622                 :            : // i.e. perform an external API operation, then use and immediately export the
     623                 :            : // buffer in libplacebo, and finally wait until `pl_buf_poll` is false before
     624                 :            : // re-using it in the external API. (Or get a new buffer in the meantime)
     625                 :            : PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf);
     626                 :            : 
     627                 :            : // Returns whether or not a buffer is currently "in use". This can either be
     628                 :            : // because of a pending read operation, a pending write operation or a pending
     629                 :            : // buffer export operation. Any access to the buffer by external APIs or via
     630                 :            : // the host pointer (for host-mapped buffers) is forbidden while a buffer is
     631                 :            : // "in use". The only exception to this rule is multiple reads, for example
     632                 :            : // reading from a buffer with `pl_tex_upload` while simultaneously reading from
     633                 :            : // it using mapped memory.
     634                 :            : //
     635                 :            : // The `timeout`, specified in nanoseconds, indicates how long to block for
     636                 :            : // before returning. If set to 0, this function will never block, and only
     637                 :            : // returns the current status of the buffer. The actual precision of the
     638                 :            : // timeout may be significantly longer than one nanosecond, and has no upper
     639                 :            : // bound. This function does not provide hard latency guarantees. This function
     640                 :            : // may also return at any time, even if the buffer is still in use. If the user
     641                 :            : // wishes to block until the buffer is definitely no longer in use, the
     642                 :            : // recommended usage is:
     643                 :            : //
     644                 :            : // while (pl_buf_poll(gpu, buf, UINT64_MAX))
     645                 :            : //      ; // do nothing
     646                 :            : //
     647                 :            : // Note: libplacebo operations on buffers are always internally synchronized,
     648                 :            : // so this is only needed for host-mapped or externally exported buffers.
     649                 :            : // However, it may be used to do non-blocking queries before calling blocking
     650                 :            : // functions such as `pl_buf_read`.
     651                 :            : //
     652                 :            : // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
     653                 :            : // synchronized, meaning it can safely be called on a `pl_buf` that is in use
     654                 :            : // by another thread.
     655                 :            : PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout);
     656                 :            : 
     657                 :            : enum pl_tex_sample_mode {
     658                 :            :     PL_TEX_SAMPLE_NEAREST,  // nearest neighbour sampling
     659                 :            :     PL_TEX_SAMPLE_LINEAR,   // linear filtering, requires PL_FMT_CAP_LINEAR
     660                 :            :     PL_TEX_SAMPLE_MODE_COUNT,
     661                 :            : };
     662                 :            : 
     663                 :            : enum pl_tex_address_mode {
     664                 :            :     PL_TEX_ADDRESS_CLAMP,  // clamp the nearest edge texel
     665                 :            :     PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture
     666                 :            :     PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture
     667                 :            :     PL_TEX_ADDRESS_MODE_COUNT,
     668                 :            : };
     669                 :            : 
     670                 :            : // Structure describing a texture.
     671                 :            : struct pl_tex_params {
     672                 :            :     int w, h, d;            // physical dimension; unused dimensions must be 0
     673                 :            :     pl_fmt format;
     674                 :            : 
     675                 :            :     // The following bools describe what operations can be performed. The
     676                 :            :     // corresponding pl_fmt capability must be set for every enabled
     677                 :            :     // operation type.
     678                 :            :     //
     679                 :            :     // Note: For planar formats, it is also possible to set capabilities only
     680                 :            :     // supported by sub-planes. In this case, the corresponding functionality
     681                 :            :     // will be available for the sub-plane, but not the planar texture itself.
     682                 :            :     bool sampleable;    // usable as a PL_DESC_SAMPLED_TEX
     683                 :            :     bool renderable;    // usable as a render target (pl_pass_run)
     684                 :            :                         // (must only be used with 2D textures)
     685                 :            :     bool storable;      // usable as a storage image (PL_DESC_IMG_*)
     686                 :            :     bool blit_src;      // usable as a blit source
     687                 :            :     bool blit_dst;      // usable as a blit destination
     688                 :            :     bool host_writable; // may be updated with pl_tex_upload()
     689                 :            :     bool host_readable; // may be fetched with pl_tex_download()
     690                 :            : 
     691                 :            :     // Note: For `blit_src`, `blit_dst`, the texture must either be
     692                 :            :     // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set.
     693                 :            : 
     694                 :            :     // At most one of `export_handle` and `import_handle` can be set for a
     695                 :            :     // texture.
     696                 :            : 
     697                 :            :     // Setting this indicates that the memory backing this texture should be
     698                 :            :     // shared with external APIs, If so, this must be exactly *one* of
     699                 :            :     // `pl_gpu.export_caps.tex`.
     700                 :            :     enum pl_handle_type export_handle;
     701                 :            : 
     702                 :            :     // Setting this indicates that the memory backing this texture will be
     703                 :            :     // imported from an external API. If so, this must be exactly *one* of
     704                 :            :     // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`.
     705                 :            :     enum pl_handle_type import_handle;
     706                 :            : 
     707                 :            :     // If the shared memory is being imported, the import handle must be
     708                 :            :     // specified here. Otherwise, this is ignored.
     709                 :            :     struct pl_shared_mem shared_mem;
     710                 :            : 
     711                 :            :     // If non-NULL, the texture will be created with these contents (tightly
     712                 :            :     // packed). Using this does *not* require setting host_writable. Otherwise,
     713                 :            :     // the initial data is undefined. Mutually exclusive with `import_handle`.
     714                 :            :     const void *initial_data;
     715                 :            : 
     716                 :            :     // Arbitrary user data. libplacebo does not use this at all.
     717                 :            :     void *user_data;
     718                 :            : 
     719                 :            :     // Arbitrary identifying tag. Used only for debugging purposes.
     720                 :            :     pl_debug_tag debug_tag;
     721                 :            : };
     722                 :            : 
     723                 :            : #define pl_tex_params(...) (&(struct pl_tex_params) {   \
     724                 :            :         .debug_tag = PL_DEBUG_TAG,                      \
     725                 :            :         __VA_ARGS__                                     \
     726                 :            :     })
     727                 :            : 
     728                 :            : static inline int pl_tex_params_dimension(const struct pl_tex_params params)
     729                 :            : {
     730   [ +  +  +  +  :      19414 :     return params.d ? 3 : params.h ? 2 : 1;
          +  +  +  +  +  
          +  +  +  +  +  
                   +  + ]
     731                 :            : }
     732                 :            : 
     733                 :            : enum pl_sampler_type {
     734                 :            :     PL_SAMPLER_NORMAL,      // gsampler2D, gsampler3D etc.
     735                 :            :     PL_SAMPLER_RECT,        // gsampler2DRect
     736                 :            :     PL_SAMPLER_EXTERNAL,    // gsamplerExternalOES
     737                 :            :     PL_SAMPLER_TYPE_COUNT,
     738                 :            : };
     739                 :            : 
     740                 :            : // Conflates the following typical GPU API concepts:
     741                 :            : // - texture itself
     742                 :            : // - sampler state
     743                 :            : // - staging buffers for texture upload
     744                 :            : // - framebuffer objects
     745                 :            : // - wrappers for swapchain framebuffers
     746                 :            : // - synchronization needed for upload/rendering/etc.
     747                 :            : //
     748                 :            : // Essentially a pl_tex can be anything ranging from a normal texture, a wrapped
     749                 :            : // external/real framebuffer, a framebuffer object + texture pair, a mapped
     750                 :            : // texture (via pl_hwdec), or other sorts of things that can be sampled from
     751                 :            : // and/or rendered to.
     752                 :            : //
     753                 :            : // Thread-safety: Unsafe
     754                 :            : typedef const struct pl_tex_t *pl_tex;
     755                 :            : struct pl_tex_t {
     756                 :            :     struct pl_tex_params params;
     757                 :            : 
     758                 :            :     // If `params.format` is a planar format, this contains `pl_tex` handles
     759                 :            :     // encapsulating individual texture planes. Conversely, if this is a
     760                 :            :     // sub-plane of a planar texture, `parent` points to the planar texture.
     761                 :            :     //
     762                 :            :     // Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior.
     763                 :            :     pl_tex planes[4];
     764                 :            :     pl_tex parent;
     765                 :            : 
     766                 :            :     // If `params.export_handle` is set, this structure references the shared
     767                 :            :     // memory backing this buffer, via the requested handle type.
     768                 :            :     //
     769                 :            :     // While this texture is not in an "exported" state, the contents of the
     770                 :            :     // memory are undefined. (See: `pl_tex_export`)
     771                 :            :     //
     772                 :            :     // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will
     773                 :            :     // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be
     774                 :            :     // made about the cross-driver compatibility of textures exported this way.
     775                 :            :     struct pl_shared_mem shared_mem;
     776                 :            : 
     777                 :            :     // If `params.sampleable` is true, this indicates the correct sampler type
     778                 :            :     // to use when sampling from this texture.
     779                 :            :     enum pl_sampler_type sampler_type;
     780                 :            : };
     781                 :            : 
     782                 :            : // Create a texture (with undefined contents). Returns NULL on failure. This is
     783                 :            : // assumed to be an expensive/rare operation, and may need to perform memory
     784                 :            : // allocation or framebuffer creation.
     785                 :            : PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params);
     786                 :            : PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex);
     787                 :            : 
     788                 :            : // This works like `pl_tex_create`, but if the texture already exists and has
     789                 :            : // incompatible texture parameters, it will get destroyed first. A texture is
     790                 :            : // considered "compatible" if it has the same texture format and sample/address
     791                 :            : // mode and it supports a superset of the features the user requested.
     792                 :            : //
     793                 :            : // Even if the texture is not recreated, calling this function will still
     794                 :            : // invalidate the contents of the texture. (Note: Because of this,
     795                 :            : // `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error)
     796                 :            : //
     797                 :            : // Note: If the `user_data` alone changes, this does not trigger a texture
     798                 :            : // recreation. In theory, this can be used to detect when the texture ended
     799                 :            : // up being recreated.
     800                 :            : PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params);
     801                 :            : 
     802                 :            : // Invalidates the contents of a texture. After this, the contents are fully
     803                 :            : // undefined.
     804                 :            : PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex);
     805                 :            : 
     806                 :            : union pl_clear_color {
     807                 :            :     float f[4];
     808                 :            :     int32_t i[4];
     809                 :            :     uint32_t u[4];
     810                 :            : };
     811                 :            : 
     812                 :            : // Clear the dst texture with the given color (rgba). This is functionally
     813                 :            : // identical to a blit operation, which means `dst->params.blit_dst` must be
     814                 :            : // set.
     815                 :            : PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color);
     816                 :            : 
     817                 :            : // Wrapper for `pl_tex_clear_ex` which only works for floating point textures.
     818                 :            : PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]);
     819                 :            : 
     820                 :            : struct pl_tex_blit_params {
     821                 :            :     // The texture to blit from. Must have `params.blit_src` enabled.
     822                 :            :     pl_tex src;
     823                 :            : 
     824                 :            :     // The texture to blit to. Must have `params.blit_dst` enabled, and a
     825                 :            :     // format that is loosely compatible with `src`. This essentially means
     826                 :            :     // that they must have the same `internal_size`. Additionally, UINT
     827                 :            :     // textures can only be blitted to other UINT textures, and SINT textures
     828                 :            :     // can only be blitted to other SINT textures.
     829                 :            :     pl_tex dst;
     830                 :            : 
     831                 :            :     // The region of the source texture to blit. Must be within the texture
     832                 :            :     // bounds of `src`. May be flipped. (Optional)
     833                 :            :     pl_rect3d src_rc;
     834                 :            : 
     835                 :            :     // The region of the destination texture to blit into. Must be within the
     836                 :            :     // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in
     837                 :            :     // `dst` are preserved. (Optional)
     838                 :            :     pl_rect3d dst_rc;
     839                 :            : 
     840                 :            :     // If `src_rc` and `dst_rc` have different sizes, the texture will be
     841                 :            :     // scaled using the given texture sampling mode.
     842                 :            :     enum pl_tex_sample_mode sample_mode;
     843                 :            : };
     844                 :            : 
     845                 :            : #define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ })
     846                 :            : 
     847                 :            : // Copy a sub-rectangle from one texture to another.
     848                 :            : PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params);
     849                 :            : 
     850                 :            : // Structure describing a texture transfer operation.
     851                 :            : struct pl_tex_transfer_params {
     852                 :            :     // Texture to transfer to/from. Depending on the type of the operation,
     853                 :            :     // this must have params.host_writable (uploads) or params.host_readable
     854                 :            :     // (downloads) set, respectively.
     855                 :            :     pl_tex tex;
     856                 :            : 
     857                 :            :     // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y
     858                 :            :     // and z fields of `rc`, as well as the corresponding pitches, are ignored.
     859                 :            :     // In all other cases, the pitch must be large enough to contain the
     860                 :            :     // corresponding dimension of `rc`, and the `rc` must be normalized and
     861                 :            :     // fully contained within the image dimensions. Missing fields in the `rc`
     862                 :            :     // are inferred from the image size. If unset, the pitch is inferred
     863                 :            :     // from `rc` (that is, it's assumed that the data is tightly packed in the
     864                 :            :     // buffer). Otherwise, `row_pitch` *must* be a multiple of
     865                 :            :     // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple
     866                 :            :     // of `row_pitch`.
     867                 :            :     pl_rect3d rc;       // region of the texture to transfer
     868                 :            :     size_t row_pitch;   // the number of bytes separating image rows
     869                 :            :     size_t depth_pitch; // the number of bytes separating image planes
     870                 :            : 
     871                 :            :     // An optional timer to report the approximate duration of the texture
     872                 :            :     // transfer to. Note that this is only an approximation, since the actual
     873                 :            :     // texture transfer may happen entirely in the background (in particular,
     874                 :            :     // for implementations with asynchronous transfer capabilities). It's also
     875                 :            :     // not guaranteed that all GPUs support this.
     876                 :            :     pl_timer timer;
     877                 :            : 
     878                 :            :     // An optional callback to fire after the operation completes. If this is
     879                 :            :     // specified, then the operation is performed asynchronously. Note that
     880                 :            :     // transfers to/from buffers are always asynchronous, even without, this
     881                 :            :     // field, so it's more useful for `ptr` transfers. (Though it can still be
     882                 :            :     // helpful to avoid having to manually poll buffers all the time)
     883                 :            :     //
     884                 :            :     // When this is *not* specified, uploads from `ptr` are still asynchronous
     885                 :            :     // but require a host memcpy, while downloads from `ptr` are blocking. As
     886                 :            :     // such, it's recommended to always try using asynchronous texture
     887                 :            :     // transfers wherever possible.
     888                 :            :     //
     889                 :            :     // Note: Requires `pl_gpu_limits.callbacks`
     890                 :            :     //
     891                 :            :     // Note: Callbacks are implicitly synchronized, meaning that callbacks are
     892                 :            :     // guaranteed to never execute concurrently with other callbacks. However,
     893                 :            :     // they may execute from any thread that the `pl_gpu` is used on.
     894                 :            :     void (*callback)(void *priv);
     895                 :            :     void *priv; // arbitrary user data
     896                 :            : 
     897                 :            :     // For the data source/target of a transfer operation, there are two valid
     898                 :            :     // options:
     899                 :            :     //
     900                 :            :     // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`)
     901                 :            :     pl_buf buf;         // buffer to use
     902                 :            :     size_t buf_offset;  // offset of data within buffer, should be a
     903                 :            :                         // multiple of `tex->params.format->texel_size`
     904                 :            :     // 2. Transferring to/from host memory directly:
     905                 :            :     void *ptr;          // address of data
     906                 :            :     bool no_import;     // always use memcpy, bypassing host ptr import
     907                 :            : 
     908                 :            :     // Note: The contents of the memory region / buffer must exactly match the
     909                 :            :     // texture format; i.e. there is no explicit conversion between formats.
     910                 :            : };
     911                 :            : 
     912                 :            : #define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ })
     913                 :            : 
     914                 :            : // Upload data to a texture. Returns whether successful.
     915                 :            : PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params);
     916                 :            : 
     917                 :            : // Download data from a texture. Returns whether successful.
     918                 :            : PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params);
     919                 :            : 
     920                 :            : // Returns whether or not a texture is currently "in use". This can either be
     921                 :            : // because of a pending read operation, a pending write operation or a pending
     922                 :            : // texture export operation. Note that this function's usefulness is extremely
     923                 :            : // limited under ordinary circumstances. In practically all cases, textures do
     924                 :            : // not need to be directly synchronized by the user, except when interfacing
     925                 :            : // with external libraries. This function should NOT, however, be used as a
     926                 :            : // crutch to avoid having to implement semaphore-based synchronization. Use
     927                 :            : // the API-specific functions such as `pl_vulkan_hold/release` for that.
     928                 :            : //
     929                 :            : // A good example of a use case in which this function is required is when
     930                 :            : // interoperating with external memory management that needs to know when an
     931                 :            : // imported texture is safe to free / reclaim internally, in which case
     932                 :            : // semaphores are insufficient because memory management is a host operation.
     933                 :            : //
     934                 :            : // The `timeout`, specified in nanoseconds, indicates how long to block for
     935                 :            : // before returning. If set to 0, this function will never block, and only
     936                 :            : // returns the current status of the texture. The actual precision of the
     937                 :            : // timeout may be significantly longer than one nanosecond, and has no upper
     938                 :            : // bound. This function does not provide hard latency guarantees. This function
     939                 :            : // may also return at any time, even if the texture is still in use. If the
     940                 :            : // user wishes to block until the texture is definitely no longer in use, the
     941                 :            : // recommended usage is:
     942                 :            : //
     943                 :            : // while (pl_tex_poll(gpu, buf, UINT64_MAX))
     944                 :            : //      ; // do nothing
     945                 :            : //
     946                 :            : // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
     947                 :            : // synchronized, meaning it can safely be called on a `pl_tex` that is in use
     948                 :            : // by another thread.
     949                 :            : PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout);
     950                 :            : 
     951                 :            : // Data type of a shader input variable (e.g. uniform, or UBO member)
     952                 :            : enum pl_var_type {
     953                 :            :     PL_VAR_INVALID = 0,
     954                 :            :     PL_VAR_SINT,        // C: int           GLSL: int/ivec
     955                 :            :     PL_VAR_UINT,        // C: unsigned int  GLSL: uint/uvec
     956                 :            :     PL_VAR_FLOAT,       // C: float         GLSL: float/vec/mat
     957                 :            :     PL_VAR_TYPE_COUNT
     958                 :            : };
     959                 :            : 
     960                 :            : // Returns the host size (in bytes) of a pl_var_type.
     961                 :            : PL_API size_t pl_var_type_size(enum pl_var_type type);
     962                 :            : 
     963                 :            : // Represents a shader input variable (concrete data, e.g. vector, matrix)
     964                 :            : struct pl_var {
     965                 :            :     const char *name;       // name as used in the shader
     966                 :            :     enum pl_var_type type;
     967                 :            :     // The total number of values is given by dim_v * dim_m. For example, a
     968                 :            :     // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4
     969                 :            :     // and dim_m = 3.
     970                 :            :     int dim_v;              // vector dimension
     971                 :            :     int dim_m;              // matrix dimension (number of columns, see below)
     972                 :            :     int dim_a;              // array dimension
     973                 :            : };
     974                 :            : 
     975                 :            : // Helper functions for constructing the most common pl_vars, with names
     976                 :            : // corresponding to their corresponding GLSL built-in types.
     977                 :            : PL_API struct pl_var pl_var_float(const char *name);
     978                 :            : PL_API struct pl_var pl_var_vec2(const char *name);
     979                 :            : PL_API struct pl_var pl_var_vec3(const char *name);
     980                 :            : PL_API struct pl_var pl_var_vec4(const char *name);
     981                 :            : PL_API struct pl_var pl_var_mat2(const char *name);
     982                 :            : PL_API struct pl_var pl_var_mat2x3(const char *name);
     983                 :            : PL_API struct pl_var pl_var_mat2x4(const char *name);
     984                 :            : PL_API struct pl_var pl_var_mat3(const char *name);
     985                 :            : PL_API struct pl_var pl_var_mat3x4(const char *name);
     986                 :            : PL_API struct pl_var pl_var_mat4x2(const char *name);
     987                 :            : PL_API struct pl_var pl_var_mat4x3(const char *name);
     988                 :            : PL_API struct pl_var pl_var_mat4(const char *name);
     989                 :            : PL_API struct pl_var pl_var_int(const char *name);
     990                 :            : PL_API struct pl_var pl_var_ivec2(const char *name);
     991                 :            : PL_API struct pl_var pl_var_ivec3(const char *name);
     992                 :            : PL_API struct pl_var pl_var_ivec4(const char *name);
     993                 :            : PL_API struct pl_var pl_var_uint(const char *name);
     994                 :            : PL_API struct pl_var pl_var_uvec2(const char *name);
     995                 :            : PL_API struct pl_var pl_var_uvec3(const char *name);
     996                 :            : PL_API struct pl_var pl_var_uvec4(const char *name);
     997                 :            : 
     998                 :            : struct pl_named_var {
     999                 :            :     const char *glsl_name;
    1000                 :            :     struct pl_var var;
    1001                 :            : };
    1002                 :            : 
    1003                 :            : // The same list as above, tagged by name and terminated with a {0} entry.
    1004                 :            : PL_API extern const struct pl_named_var pl_var_glsl_types[];
    1005                 :            : 
    1006                 :            : // Efficient helper function for performing a lookup in the above array.
    1007                 :            : // Returns NULL if the variable is not legal. Note that the array dimension is
    1008                 :            : // ignored, since it's usually part of the variable name and not the type name.
    1009                 :            : PL_API const char *pl_var_glsl_type_name(struct pl_var var);
    1010                 :            : 
    1011                 :            : // Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means
    1012                 :            : // that the pl_var's type will be the same as the vertex's sampled type (e.g.
    1013                 :            : // PL_FMT_UNORM gets turned into PL_VAR_FLOAT).
    1014                 :            : PL_API struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name);
    1015                 :            : 
    1016                 :            : // Describes the memory layout of a variable, relative to some starting location
    1017                 :            : // (typically the offset within a uniform/storage/pushconstant buffer)
    1018                 :            : //
    1019                 :            : // Note on matrices: All GPUs expect column major matrices, for both buffers and
    1020                 :            : // input variables. Care needs to be taken to avoid trying to use e.g. a
    1021                 :            : // pl_matrix3x3 (which is row major) directly as a pl_var_update.data!
    1022                 :            : //
    1023                 :            : // In terms of the host layout, a column-major matrix (e.g. matCxR) with C
    1024                 :            : // columns and R rows is treated like an array vecR[C]. The `stride` here refers
    1025                 :            : // to the separation between these array elements, i.e. the separation between
    1026                 :            : // the individual columns.
    1027                 :            : //
    1028                 :            : // Visualization of a mat4x3:
    1029                 :            : //
    1030                 :            : //       0   1   2   3  <- columns
    1031                 :            : // 0  [ (A) (D) (G) (J) ]
    1032                 :            : // 1  [ (B) (E) (H) (K) ]
    1033                 :            : // 2  [ (C) (F) (I) (L) ]
    1034                 :            : // ^ rows
    1035                 :            : //
    1036                 :            : // Layout in GPU memory: (stride=16, size=60)
    1037                 :            : //
    1038                 :            : // [ A B C ] X <- column 0, offset +0
    1039                 :            : // [ D E F ] X <- column 1, offset +16
    1040                 :            : // [ G H I ] X <- column 2, offset +32
    1041                 :            : // [ J K L ]   <- column 3, offset +48
    1042                 :            : //
    1043                 :            : // Note the lack of padding on the last column in this example.
    1044                 :            : // In general: size <= stride * dim_m
    1045                 :            : //
    1046                 :            : // C representation: (stride=12, size=48)
    1047                 :            : //
    1048                 :            : // { { A, B, C },
    1049                 :            : //   { D, E, F },
    1050                 :            : //   { G, H, I },
    1051                 :            : //   { J, K, L } }
    1052                 :            : //
    1053                 :            : // Note on arrays: `stride` represents both the stride between elements of a
    1054                 :            : // matrix, and the stride between elements of an array. That is, there is no
    1055                 :            : // distinction between the columns of a matrix and the rows of an array. For
    1056                 :            : // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride
    1057                 :            : // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10.
    1058                 :            : //
    1059                 :            : // For non-array/matrix types, `stride` is equal to `size`.
    1060                 :            : 
    1061                 :            : struct pl_var_layout {
    1062                 :            :     size_t offset; // the starting offset of the first byte
    1063                 :            :     size_t stride; // the delta between two elements of an array/matrix
    1064                 :            :     size_t size;   // the total size of the input
    1065                 :            : };
    1066                 :            : 
    1067                 :            : // Returns the host layout of an input variable as required for a
    1068                 :            : // tightly-packed, byte-aligned C data type, given a starting offset.
    1069                 :            : PL_API struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var);
    1070                 :            : 
    1071                 :            : // Returns the GLSL std140 layout of an input variable given a current buffer
    1072                 :            : // offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM
    1073                 :            : //
    1074                 :            : // The normal way to use this function is when calculating the size and offset
    1075                 :            : // requirements of a uniform buffer in an incremental fashion, to calculate the
    1076                 :            : // new offset of the next variable in this buffer.
    1077                 :            : PL_API struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var);
    1078                 :            : 
    1079                 :            : // Returns the GLSL std430 layout of an input variable given a current buffer
    1080                 :            : // offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and
    1081                 :            : // for push constants.
    1082                 :            : PL_API struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var);
    1083                 :            : 
    1084                 :            : // Convenience definitions / friendly names for these
    1085                 :            : #define pl_buf_uniform_layout pl_std140_layout
    1086                 :            : #define pl_buf_storage_layout pl_std430_layout
    1087                 :            : #define pl_push_constant_layout pl_std430_layout
    1088                 :            : 
    1089                 :            : // Like memcpy, but copies bytes from `src` to `dst` in a manner governed by
    1090                 :            : // the stride and size of `dst_layout` as well as `src_layout`. Also takes
    1091                 :            : // into account the respective `offset`.
    1092                 :            : PL_API void memcpy_layout(void *dst, struct pl_var_layout dst_layout,
    1093                 :            :                           const void *src, struct pl_var_layout src_layout);
    1094                 :            : 
    1095                 :            : // Represents a compile-time constant.
    1096                 :            : struct pl_constant {
    1097                 :            :     enum pl_var_type type;  // constant data type
    1098                 :            :     uint32_t id;            // GLSL `constant_id`
    1099                 :            :     size_t offset;          // byte offset in `constant_data`
    1100                 :            : };
    1101                 :            : 
    1102                 :            : // Represents a vertex attribute.
    1103                 :            : struct pl_vertex_attrib {
    1104                 :            :     const char *name;   // name as used in the shader
    1105                 :            :     pl_fmt fmt;         // data format (must have PL_FMT_CAP_VERTEX)
    1106                 :            :     size_t offset;      // byte offset into the vertex struct
    1107                 :            :     int location;       // vertex location (as used in the shader)
    1108                 :            : };
    1109                 :            : 
    1110                 :            : // Returns an abstract namespace index for a given descriptor type. This will
    1111                 :            : // always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use
    1112                 :            : // this to figure out which descriptors may share the same value of `binding`.
    1113                 :            : // Bindings must only be unique for all descriptors within the same namespace.
    1114                 :            : PL_API int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type);
    1115                 :            : 
    1116                 :            : // Access mode of a shader input descriptor.
    1117                 :            : enum pl_desc_access {
    1118                 :            :     PL_DESC_ACCESS_READWRITE,
    1119                 :            :     PL_DESC_ACCESS_READONLY,
    1120                 :            :     PL_DESC_ACCESS_WRITEONLY,
    1121                 :            :     PL_DESC_ACCESS_COUNT,
    1122                 :            : };
    1123                 :            : 
    1124                 :            : // Returns the GLSL syntax for a given access mode (e.g. "readonly").
    1125                 :            : PL_API const char *pl_desc_access_glsl_name(enum pl_desc_access mode);
    1126                 :            : 
    1127                 :            : // Represents a shader descriptor (e.g. texture or buffer binding)
    1128                 :            : struct pl_desc {
    1129                 :            :     const char *name;       // name as used in the shader
    1130                 :            :     enum pl_desc_type type;
    1131                 :            : 
    1132                 :            :     // The binding of this descriptor, as used in the shader. All bindings
    1133                 :            :     // within a namespace must be unique. (see: pl_desc_namespace)
    1134                 :            :     int binding;
    1135                 :            : 
    1136                 :            :     // For storage images and storage buffers, this can be used to restrict
    1137                 :            :     // the type of access that may be performed on the descriptor. Ignored for
    1138                 :            :     // the other descriptor types (uniform buffers and sampled textures are
    1139                 :            :     // always read-only).
    1140                 :            :     enum pl_desc_access access;
    1141                 :            : };
    1142                 :            : 
    1143                 :            : // Framebuffer blending mode (for raster passes)
    1144                 :            : enum pl_blend_mode {
    1145                 :            :     PL_BLEND_ZERO,
    1146                 :            :     PL_BLEND_ONE,
    1147                 :            :     PL_BLEND_SRC_ALPHA,
    1148                 :            :     PL_BLEND_ONE_MINUS_SRC_ALPHA,
    1149                 :            :     PL_BLEND_MODE_COUNT,
    1150                 :            : };
    1151                 :            : 
    1152                 :            : struct pl_blend_params {
    1153                 :            :     enum pl_blend_mode src_rgb;
    1154                 :            :     enum pl_blend_mode dst_rgb;
    1155                 :            :     enum pl_blend_mode src_alpha;
    1156                 :            :     enum pl_blend_mode dst_alpha;
    1157                 :            : };
    1158                 :            : 
    1159                 :            : #define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ })
    1160                 :            : 
    1161                 :            : // Typical alpha compositing
    1162                 :            : PL_API extern const struct pl_blend_params pl_alpha_overlay;
    1163                 :            : 
    1164                 :            : enum pl_prim_type {
    1165                 :            :     PL_PRIM_TRIANGLE_LIST,
    1166                 :            :     PL_PRIM_TRIANGLE_STRIP,
    1167                 :            :     PL_PRIM_TYPE_COUNT,
    1168                 :            : };
    1169                 :            : 
    1170                 :            : enum pl_index_format {
    1171                 :            :     PL_INDEX_UINT16 = 0,
    1172                 :            :     PL_INDEX_UINT32,
    1173                 :            :     PL_INDEX_FORMAT_COUNT,
    1174                 :            : };
    1175                 :            : 
    1176                 :            : enum pl_pass_type {
    1177                 :            :     PL_PASS_INVALID = 0,
    1178                 :            :     PL_PASS_RASTER,  // vertex+fragment shader
    1179                 :            :     PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`)
    1180                 :            :     PL_PASS_TYPE_COUNT,
    1181                 :            : };
    1182                 :            : 
    1183                 :            : // Description of a rendering pass. It conflates the following:
    1184                 :            : //  - GLSL shader(s) and its list of inputs
    1185                 :            : //  - target parameters (for raster passes)
    1186                 :            : struct pl_pass_params {
    1187                 :            :     enum pl_pass_type type;
    1188                 :            : 
    1189                 :            :     // Input variables.
    1190                 :            :     struct pl_var *variables;
    1191                 :            :     int num_variables;
    1192                 :            : 
    1193                 :            :     // Input descriptors.
    1194                 :            :     struct pl_desc *descriptors;
    1195                 :            :     int num_descriptors;
    1196                 :            : 
    1197                 :            :     // Compile-time specialization constants.
    1198                 :            :     struct pl_constant *constants;
    1199                 :            :     int num_constants;
    1200                 :            : 
    1201                 :            :     // Initial data for the specialization constants. Optional. If NULL,
    1202                 :            :     // specialization constants receive the values from the shader text.
    1203                 :            :     void *constant_data;
    1204                 :            : 
    1205                 :            :     // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size
    1206                 :            :     size_t push_constants_size;
    1207                 :            : 
    1208                 :            :     // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted
    1209                 :            :     // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as
    1210                 :            :     // a compute shader.
    1211                 :            :     const char *glsl_shader;
    1212                 :            : 
    1213                 :            :     // --- type==PL_PASS_RASTER only
    1214                 :            : 
    1215                 :            :     // Describes the interpretation and layout of the vertex data.
    1216                 :            :     enum pl_prim_type vertex_type;
    1217                 :            :     struct pl_vertex_attrib *vertex_attribs;
    1218                 :            :     int num_vertex_attribs;
    1219                 :            :     size_t vertex_stride; // must be a multiple of limits.align_vertex_stride
    1220                 :            : 
    1221                 :            :     // The vertex shader itself.
    1222                 :            :     const char *vertex_shader;
    1223                 :            : 
    1224                 :            :     // Target format. The format must support PL_FMT_CAP_RENDERABLE. The
    1225                 :            :     // resulting pass may only be used on textures that have a format with a
    1226                 :            :     // `pl_fmt.signature` compatible to this format.
    1227                 :            :     pl_fmt target_format;
    1228                 :            : 
    1229                 :            :     // Target blending mode. If this is NULL, blending is disabled. Otherwise,
    1230                 :            :     // the `target_format` must also support PL_FMT_CAP_BLENDABLE.
    1231                 :            :     const struct pl_blend_params *blend_params;
    1232                 :            : 
    1233                 :            :     // If false, the target's existing contents will be discarded before the
    1234                 :            :     // pass is run. (Semantically equivalent to calling pl_tex_invalidate
    1235                 :            :     // before every pl_pass_run, but slightly more efficient)
    1236                 :            :     //
    1237                 :            :     // Specifying `blend_params` requires `load_target` to be true.
    1238                 :            :     bool load_target;
    1239                 :            : 
    1240                 :            :     // --- Deprecated / removed fields.
    1241                 :            :     PL_DEPRECATED_IN(v6.322) const uint8_t *cached_program; // Non-functional
    1242                 :            :     PL_DEPRECATED_IN(v6.322) size_t cached_program_len;
    1243                 :            : };
    1244                 :            : 
    1245                 :            : #define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ })
    1246                 :            : 
    1247                 :            : // Conflates the following typical GPU API concepts:
    1248                 :            : // - various kinds of shaders
    1249                 :            : // - rendering pipelines
    1250                 :            : // - descriptor sets, uniforms, other bindings
    1251                 :            : // - all synchronization necessary
    1252                 :            : // - the current values of all inputs
    1253                 :            : //
    1254                 :            : // Thread-safety: Unsafe
    1255                 :            : typedef const struct pl_pass_t {
    1256                 :            :     struct pl_pass_params params;
    1257                 :            : } *pl_pass;
    1258                 :            : 
    1259                 :            : // Compile a shader and create a render pass. This is a rare/expensive
    1260                 :            : // operation and may take a significant amount of time, even if a cached
    1261                 :            : // program is used. Returns NULL on failure.
    1262                 :            : PL_API pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params);
    1263                 :            : PL_API void pl_pass_destroy(pl_gpu gpu, pl_pass *pass);
    1264                 :            : 
    1265                 :            : struct pl_desc_binding {
    1266                 :            :     const void *object; // pl_* object with type corresponding to pl_desc_type
    1267                 :            : 
    1268                 :            :     // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler.
    1269                 :            :     enum pl_tex_address_mode address_mode;
    1270                 :            :     enum pl_tex_sample_mode sample_mode;
    1271                 :            : };
    1272                 :            : 
    1273                 :            : struct pl_var_update {
    1274                 :            :     int index;        // index into params.variables[]
    1275                 :            :     const void *data; // pointer to raw byte data corresponding to pl_var_host_layout()
    1276                 :            : };
    1277                 :            : 
    1278                 :            : struct pl_pass_run_params {
    1279                 :            :     pl_pass pass;
    1280                 :            : 
    1281                 :            :     // If present, the shader will be re-specialized with the new constants
    1282                 :            :     // provided. This is a significantly cheaper operation than recompiling a
    1283                 :            :     // brand new shader, but should still be avoided if possible.
    1284                 :            :     //
    1285                 :            :     // Leaving it as NULL re-uses the existing specialization values. Ignored
    1286                 :            :     // if the shader has no specialization constants. Guaranteed to be a no-op
    1287                 :            :     // if the values have not changed since the last invocation.
    1288                 :            :     void *constant_data;
    1289                 :            : 
    1290                 :            :     // This list only contains descriptors/variables which have changed
    1291                 :            :     // since the previous invocation. All non-mentioned variables implicitly
    1292                 :            :     // preserve their state from the last invocation.
    1293                 :            :     struct pl_var_update *var_updates;
    1294                 :            :     int num_var_updates;
    1295                 :            : 
    1296                 :            :     // This list contains all descriptors used by this pass. It must
    1297                 :            :     // always be filled, even if the descriptors haven't changed. The order
    1298                 :            :     // must match that of pass->params.descriptors
    1299                 :            :     struct pl_desc_binding *desc_bindings;
    1300                 :            : 
    1301                 :            :     // The push constants for this invocation. This must always be set and
    1302                 :            :     // fully defined for every invocation if params.push_constants_size > 0.
    1303                 :            :     void *push_constants;
    1304                 :            : 
    1305                 :            :     // An optional timer to report the approximate runtime of this shader pass
    1306                 :            :     // invocation to. Note that this is only an approximation, since shaders
    1307                 :            :     // may overlap their execution times and contend for GPU time.
    1308                 :            :     pl_timer timer;
    1309                 :            : 
    1310                 :            :     // --- pass->params.type==PL_PASS_RASTER only
    1311                 :            : 
    1312                 :            :     // Target must be a 2D texture, `target->params.renderable` must be true,
    1313                 :            :     // and `target->params.format->signature` must match the signature provided
    1314                 :            :     // in `pass->params.target_format`.
    1315                 :            :     //
    1316                 :            :     // If the viewport or scissors are left blank, they are inferred from
    1317                 :            :     // target->params.
    1318                 :            :     //
    1319                 :            :     // WARNING: Rendering to a *target that is being read from by the same
    1320                 :            :     // shader is undefined behavior. In general, trying to bind the same
    1321                 :            :     // resource multiple times to the same shader is undefined behavior.
    1322                 :            :     pl_tex target;
    1323                 :            :     pl_rect2d viewport; // screen space viewport (must be normalized)
    1324                 :            :     pl_rect2d scissors; // target render scissors (must be normalized)
    1325                 :            : 
    1326                 :            :     // Number of vertices to render
    1327                 :            :     int vertex_count;
    1328                 :            : 
    1329                 :            :     // Vertex data may be provided in one of two forms:
    1330                 :            :     //
    1331                 :            :     // 1. Drawing from host memory directly
    1332                 :            :     const void *vertex_data;
    1333                 :            :     // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`)
    1334                 :            :     pl_buf vertex_buf;
    1335                 :            :     size_t buf_offset;
    1336                 :            : 
    1337                 :            :     // (Optional) Index data may be provided in the form given by `index_fmt`.
    1338                 :            :     // These will be used for instanced rendering. Similar to vertex data, this
    1339                 :            :     // can be provided in two forms:
    1340                 :            :     // 1. From host memory
    1341                 :            :     const void *index_data;
    1342                 :            :     enum pl_index_format index_fmt;
    1343                 :            :     // 2. From an index buffer (requires `index_buf->params.drawable`)
    1344                 :            :     pl_buf index_buf;
    1345                 :            :     size_t index_offset;
    1346                 :            :     // Note: Drawing from an index buffer requires vertex data to also be
    1347                 :            :     // present in buffer form, i.e. it's forbidden to mix `index_buf` with
    1348                 :            :     // `vertex_data` (though vice versa is allowed).
    1349                 :            : 
    1350                 :            :     // --- pass->params.type==PL_PASS_COMPUTE only
    1351                 :            : 
    1352                 :            :     // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the
    1353                 :            :     // corresponding index of limits.max_dispatch
    1354                 :            :     int compute_groups[3];
    1355                 :            : };
    1356                 :            : 
    1357                 :            : #define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ })
    1358                 :            : 
    1359                 :            : // Execute a render pass.
    1360                 :            : PL_API void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params);
    1361                 :            : 
    1362                 :            : // This is semantically a no-op, but it provides a hint that you want to flush
    1363                 :            : // any partially queued up commands and begin execution. There is normally no
    1364                 :            : // need to call this, because queued commands will always be implicitly flushed
    1365                 :            : // whenever necessary to make forward progress on commands like `pl_buf_poll`,
    1366                 :            : // or when submitting a frame to a swapchain for display. In fact, calling this
    1367                 :            : // function can negatively impact performance, because some GPUs rely on being
    1368                 :            : // able to re-order and modify queued commands in order to enable optimizations
    1369                 :            : // retroactively.
    1370                 :            : //
    1371                 :            : // The only time this might be beneficial to call explicitly is if you're doing
    1372                 :            : // lots of offline processing, i.e. you aren't rendering to a swapchain but to
    1373                 :            : // textures that you download from again. In that case you should call this
    1374                 :            : // function after each "work item" to ensure good parallelism between them.
    1375                 :            : //
    1376                 :            : // It's worth noting that this function may block if you're over-feeding the
    1377                 :            : // GPU without waiting for existing results to finish.
    1378                 :            : PL_API void pl_gpu_flush(pl_gpu gpu);
    1379                 :            : 
    1380                 :            : // This is like `pl_gpu_flush` but also blocks until the GPU is fully idle
    1381                 :            : // before returning. Using this in your rendering loop is seriously disadvised,
    1382                 :            : // and almost never the right solution. The intended use case is for deinit
    1383                 :            : // logic, where users may want to force the all pending GPU operations to
    1384                 :            : // finish so they can clean up their state more easily.
    1385                 :            : //
    1386                 :            : // After this operation is called, it's guaranteed that all pending buffer
    1387                 :            : // operations are complete - i.e. `pl_buf_poll` is guaranteed to return false.
    1388                 :            : // It's also guaranteed that any outstanding timer query results are available.
    1389                 :            : //
    1390                 :            : // Note: If you only care about buffer operations, you can accomplish this more
    1391                 :            : // easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if
    1392                 :            : // you have many buffers it may be more convenient to call this function
    1393                 :            : // instead. The difference is that this function will also affect e.g. renders
    1394                 :            : // to a `pl_swapchain`.
    1395                 :            : PL_API void pl_gpu_finish(pl_gpu gpu);
    1396                 :            : 
    1397                 :            : // Returns true if the GPU is considered to be in a "failed" state, which
    1398                 :            : // during normal operation is typically the result of things like the device
    1399                 :            : // being lost (due to e.g. power management).
    1400                 :            : //
    1401                 :            : // If this returns true, users *should* destroy and recreate the `pl_gpu`,
    1402                 :            : // including all associated resources, via the appropriate mechanism.
    1403                 :            : PL_API bool pl_gpu_is_failed(pl_gpu gpu);
    1404                 :            : 
    1405                 :            : PL_API_END
    1406                 :            : 
    1407                 :            : #endif // LIBPLACEBO_GPU_H_

Generated by: LCOV version 1.16