Branch data Line data Source code
1 : : /*
2 : : * This file is part of libplacebo.
3 : : *
4 : : * libplacebo is free software; you can redistribute it and/or
5 : : * modify it under the terms of the GNU Lesser General Public
6 : : * License as published by the Free Software Foundation; either
7 : : * version 2.1 of the License, or (at your option) any later version.
8 : : *
9 : : * libplacebo is distributed in the hope that it will be useful,
10 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : : * GNU Lesser General Public License for more details.
13 : : *
14 : : * You should have received a copy of the GNU Lesser General Public
15 : : * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 : : */
17 : :
18 : : #ifndef LIBPLACEBO_GPU_H_
19 : : #define LIBPLACEBO_GPU_H_
20 : :
21 : : #include <stddef.h>
22 : : #include <stdbool.h>
23 : : #include <stdint.h>
24 : :
25 : : #include <libplacebo/common.h>
26 : : #include <libplacebo/cache.h>
27 : : #include <libplacebo/log.h>
28 : :
29 : : PL_API_BEGIN
30 : :
31 : : // These are not memory managed, and should represent compile-time constants
32 : : typedef const char *pl_debug_tag;
33 : : #define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__))
34 : :
35 : : // Type of a shader input descriptor.
36 : : enum pl_desc_type {
37 : : PL_DESC_INVALID = 0,
38 : : PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler
39 : : // (`pl_tex->params.sampleable` must be set)
40 : : PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image
41 : : // (`pl_tex->params.storable` must be set)
42 : : PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer
43 : : // (`pl_buf->params.uniform` must be set)
44 : : PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer
45 : : // (`pl_buf->params.storable` must be set)
46 : : PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer
47 : : // (`pl_buf->params.uniform` and `format` must be set)
48 : : PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer
49 : : // (`pl_buf->params.uniform` and `format` must be set)
50 : : PL_DESC_TYPE_COUNT
51 : : };
52 : :
53 : : // This file contains the definition of an API which is designed to abstract
54 : : // away from platform-specific APIs like the various OpenGL variants, Direct3D
55 : : // and Vulkan in a common way. It is a much more limited API than those APIs,
56 : : // since it tries targeting a very small common subset of features that is
57 : : // needed to implement libplacebo's rendering.
58 : : //
59 : : // NOTE: Most, but not all, parameter conditions (phrases such as "must" or
60 : : // "valid usage" are explicitly tested and result in error messages followed by
61 : : // graceful failure. Exceptions are noted where they exist.
62 : :
63 : : // Structure which wraps metadata describing GLSL capabilities.
64 : : struct pl_glsl_version {
65 : : int version; // GLSL version (e.g. 450), for #version
66 : : bool gles; // GLSL ES semantics (ESSL)
67 : : bool vulkan; // GL_KHR_vulkan_glsl semantics
68 : :
69 : : // Compute shader support and limits. If `compute` is false, then all
70 : : // of the remaining fields in this section are {0}.
71 : : bool compute;
72 : : size_t max_shmem_size; // maximum compute shader shared memory size
73 : : uint32_t max_group_threads; // maximum number of local threads per work group
74 : : uint32_t max_group_size[3]; // maximum work group size per dimension
75 : :
76 : : // If nonzero, signals availability of shader subgroups. This guarantess
77 : : // availability of all of the following extensions:
78 : : // - GL_KHR_shader_subgroup_basic
79 : : // - GL_KHR_shader_subgroup_vote
80 : : // - GL_KHR_shader_subgroup_arithmetic
81 : : // - GL_KHR_shader_subgroup_ballot
82 : : // - GL_KHR_shader_subgroup_shuffle
83 : : uint32_t subgroup_size;
84 : :
85 : : // Miscellaneous shader limits
86 : : int16_t min_gather_offset; // minimum `textureGatherOffset` offset
87 : : int16_t max_gather_offset; // maximum `textureGatherOffset` offset
88 : : };
89 : :
90 : : // Backwards compatibility alias
91 : : #define pl_glsl_desc pl_glsl_version
92 : :
93 : : // Structure defining the physical limits and capabilities of this GPU
94 : : // instance. If a limit is given as 0, that means that feature is unsupported.
95 : : struct pl_gpu_limits {
96 : : // --- pl_gpu
97 : : bool thread_safe; // `pl_gpu` calls are thread-safe
98 : : bool callbacks; // supports asynchronous GPU callbacks
99 : :
100 : : // --- pl_buf
101 : : size_t max_buf_size; // maximum size of any buffer
102 : : size_t max_ubo_size; // maximum size of a `uniform` buffer
103 : : size_t max_ssbo_size; // maximum size of a `storable` buffer
104 : : size_t max_vbo_size; // maximum size of a `drawable` buffer
105 : : size_t max_mapped_size; // maximum size of a `host_mapped` buffer
106 : : uint64_t max_buffer_texels; // maximum number of texels in a texel buffer
107 : : bool host_cached; // if true, PL_BUF_MEM_HOST buffers are cached
108 : : size_t max_mapped_vram; // maximum (known) size of a `host_mapped`
109 : : // PL_BUF_MEM_DEVICE buffer, or 0 if this
110 : : // combination is not supported
111 : :
112 : : // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided
113 : : // merely as a hint to the user. If the host pointer being imported is
114 : : // misaligned, libplacebo will internally round (over-map) the region.
115 : : size_t align_host_ptr;
116 : :
117 : : // --- pl_tex
118 : : uint32_t max_tex_1d_dim; // maximum width for a 1D texture
119 : : uint32_t max_tex_2d_dim; // maximum width/height for a 2D texture (required)
120 : : uint32_t max_tex_3d_dim; // maximum width/height/depth for a 3D texture
121 : : bool blittable_1d_3d; // supports blittable 1D/3D textures
122 : : bool buf_transfer; // supports `pl_tex_transfer_params.buf`
123 : :
124 : : // These don't represent hard limits but indicate performance hints for
125 : : // optimal alignment. For best performance, the corresponding field
126 : : // should be aligned to a multiple of these. They will always be a power
127 : : // of two.
128 : : size_t align_tex_xfer_pitch; // optimal `pl_tex_transfer_params.row_pitch`
129 : : size_t align_tex_xfer_offset; // optimal `pl_tex_transfer_params.buf_offset`
130 : :
131 : : // --- pl_pass
132 : : size_t max_variable_comps; // maximum components passed in variables
133 : : size_t max_constants; // maximum `pl_pass_params.num_constants`
134 : : bool array_size_constants; // push constants can be used to size arrays
135 : : size_t max_pushc_size; // maximum `push_constants_size`
136 : : size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride`
137 : : uint32_t max_dispatch[3]; // maximum dispatch size per dimension
138 : :
139 : : // Note: At least one of `max_variable_comps` or `max_ubo_size` is
140 : : // guaranteed to be nonzero.
141 : :
142 : : // As a performance hint, the GPU may signal the number of command queues
143 : : // it has for fragment and compute shaders, respectively. Users may use
144 : : // this information to decide the appropriate type of shader to dispatch.
145 : : uint32_t fragment_queues;
146 : : uint32_t compute_queues;
147 : : };
148 : :
149 : : // Backwards compatibility aliases
150 : : #define max_xfer_size max_buf_size
151 : : #define align_tex_xfer_stride align_tex_xfer_pitch
152 : :
153 : : // Some `pl_gpu` operations allow sharing GPU resources with external APIs -
154 : : // examples include interop with other graphics APIs such as CUDA, and also
155 : : // various hardware decoding APIs. This defines the mechanism underpinning the
156 : : // communication of such an interoperation.
157 : : typedef uint64_t pl_handle_caps;
158 : : enum pl_handle_type {
159 : : PL_HANDLE_FD = (1 << 0), // `int fd` for POSIX-style APIs
160 : : PL_HANDLE_WIN32 = (1 << 1), // `HANDLE` for win32 API
161 : : PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API
162 : : PL_HANDLE_DMA_BUF = (1 << 3), // 'int fd' for a dma_buf fd
163 : : PL_HANDLE_HOST_PTR = (1 << 4), // `void *` for a host-allocated pointer
164 : : PL_HANDLE_MTL_TEX = (1 << 5), // `MTLTexture*` for Apple platforms
165 : : PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms
166 : : };
167 : :
168 : : struct pl_gpu_handle_caps {
169 : : pl_handle_caps tex; // supported handles for `pl_tex` + `pl_shared_mem`
170 : : pl_handle_caps buf; // supported handles for `pl_buf` + `pl_shared_mem`
171 : : pl_handle_caps sync; // supported handles for semaphores
172 : : };
173 : :
174 : : // Wrapper for the handle used to communicate a shared resource externally.
175 : : // This handle is owned by the `pl_gpu` - if a user wishes to use it in a way
176 : : // that takes over ownership (e.g. importing into some APIs), they must clone
177 : : // the handle before doing so (e.g. using `dup` for fds). It is important to
178 : : // read the external API documentation _very_ carefully as different handle
179 : : // types may be managed in different ways. (eg: CUDA takes ownership of an fd,
180 : : // but does not take ownership of a win32 handle).
181 : : union pl_handle {
182 : : int fd; // PL_HANDLE_FD / PL_HANDLE_DMA_BUF
183 : : void *handle; // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE
184 : : void *ptr; // PL_HANDLE_HOST_PTR
185 : : };
186 : :
187 : : // Structure encapsulating memory that is shared between libplacebo and the
188 : : // user. This memory can be imported into external APIs using the handle.
189 : : //
190 : : // If the object a `pl_shared_mem` belongs to is destroyed (e.g. via
191 : : // `pl_buf_destroy`), the handle becomes undefined, as do the contents of the
192 : : // memory it points to, as well as any external API objects imported from it.
193 : : struct pl_shared_mem {
194 : : union pl_handle handle;
195 : : size_t size; // the total size of the memory referenced by this handle
196 : : size_t offset; // the offset of the object within the referenced memory
197 : :
198 : : // Note: `size` is optional for some APIs and handle types, in particular
199 : : // when importing DMABUFs or D3D11 textures.
200 : :
201 : : // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that
202 : : // describes this resource. Note that when importing `pl_buf`, this must
203 : : // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any
204 : : // format modifier supported by the implementation.
205 : : uint64_t drm_format_mod;
206 : :
207 : : // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to
208 : : // set the image stride (AKA pitch) in memory. If left as 0, defaults to
209 : : // the image width/height.
210 : : size_t stride_w;
211 : : size_t stride_h;
212 : :
213 : : // When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines
214 : : // which plane is imported (0 - 2).
215 : : unsigned plane;
216 : : };
217 : :
218 : : // Structure grouping PCI bus address fields for GPU devices
219 : : struct pl_gpu_pci_address {
220 : : uint32_t domain;
221 : : uint32_t bus;
222 : : uint32_t device;
223 : : uint32_t function;
224 : : };
225 : :
226 : : typedef const struct pl_fmt_t *pl_fmt;
227 : :
228 : : // Abstract device context which wraps an underlying graphics context and can
229 : : // be used to dispatch rendering commands.
230 : : //
231 : : // Thread-safety: Depends on `pl_gpu_limits.thread_safe`
232 : : typedef const struct pl_gpu_t {
233 : : pl_log log;
234 : :
235 : : struct pl_glsl_version glsl; // GLSL features supported by this GPU
236 : : struct pl_gpu_limits limits; // physical device limits and capabilities
237 : :
238 : : // Fields relevant to external API interop. If the underlying device does
239 : : // not support interop with other APIs, these will all be {0}.
240 : : struct pl_gpu_handle_caps export_caps; // supported handles for exporting
241 : : struct pl_gpu_handle_caps import_caps; // supported handles for importing
242 : : uint8_t uuid[16]; // underlying device UUID
243 : :
244 : : // Supported texture formats, in preference order. (If there are multiple
245 : : // similar formats, the "better" ones come first)
246 : : pl_fmt *formats;
247 : : int num_formats;
248 : :
249 : : // PCI Bus address of the underlying device, to help with interop.
250 : : // This will only be filled in if interop is supported.
251 : : struct pl_gpu_pci_address pci;
252 : : } *pl_gpu;
253 : :
254 : : // Attach a pl_cache object to this GPU instance. This cache will be
255 : : // used to cache all compiled shaders, as well as several other shader objects
256 : : // (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache.
257 : : //
258 : : // Note: Calling this after shaders have already been compiled will not
259 : : // retroactively add those shaders to the cache, so it's recommended to set
260 : : // this early, before creating any passes.
261 : : PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache);
262 : :
263 : : enum pl_fmt_type {
264 : : PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats
265 : : PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float)
266 : : PL_FMT_SNORM, // signed, normalized integer format (sampled as float)
267 : : PL_FMT_UINT, // unsigned integer format (sampled as integer)
268 : : PL_FMT_SINT, // signed integer format (sampled as integer)
269 : : PL_FMT_FLOAT, // (signed) float formats, any bit size
270 : : PL_FMT_TYPE_COUNT,
271 : : };
272 : :
273 : : enum pl_fmt_caps {
274 : : PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX)
275 : : PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG)
276 : : PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR)
277 : : PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt)
278 : : PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend)
279 : : PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit)
280 : : PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute
281 : : PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer
282 : : PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer
283 : : PL_FMT_CAP_HOST_READABLE = 1 << 9, // may be used with `host_readable` textures
284 : : PL_FMT_CAP_READWRITE = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE
285 : :
286 : : // Notes:
287 : : // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE
288 : : // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute`
289 : : // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE
290 : : // - PL_FMT_CAP_VERTEX implies that the format is non-opaque
291 : : // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque
292 : : };
293 : :
294 : : struct pl_fmt_plane {
295 : : // Underlying format of this particular sub-plane. This describes the
296 : : // components, texel size and host representation for the purpose of
297 : : // e.g. transfers, blits, and sampling.
298 : : pl_fmt format;
299 : :
300 : : // X/Y subsampling shift factor for this plane.
301 : : uint8_t shift_x, shift_y;
302 : : };
303 : :
304 : : // Structure describing a texel/vertex format.
305 : : struct pl_fmt_t {
306 : : const char *name; // symbolic name for this format (e.g. rgba32f)
307 : : uint64_t signature; // unique but stable signature (for pass reusability)
308 : :
309 : : enum pl_fmt_type type; // the format's data type and interpretation
310 : : enum pl_fmt_caps caps; // the features supported by this format
311 : : int num_components; // number of components for this format
312 : : int component_depth[4]; // meaningful bits per component, texture precision
313 : : size_t internal_size; // internal texel size (for blit compatibility)
314 : :
315 : : // For planar formats, this provides a description of each sub-plane.
316 : : //
317 : : // Note on planar formats: Planar formats are always opaque and typically
318 : : // support only a limit subset of capabilities (or none at all). Access
319 : : // should be done via sub-planes. (See `pl_tex.planes`)
320 : : struct pl_fmt_plane planes[4];
321 : : int num_planes; // or 0 for non-planar textures
322 : :
323 : : // This controls the relationship between the data as seen by the host and
324 : : // the way it's interpreted by the texture. The host representation is
325 : : // always tightly packed (no padding bits in between each component).
326 : : //
327 : : // This representation assumes little endian ordering, i.e. components
328 : : // being ordered from LSB to MSB in memory. Note that for oddly packed
329 : : // formats like rgb10a2 or rgb565, this is inconsistent with the naming.
330 : : // (That is to say, rgb565 has sample order {2, 1, 0} under this convention
331 : : // - because rgb565 treats the R channel as the *most* significant bits)
332 : : //
333 : : // If `opaque` is true, then there's no meaningful correspondence between
334 : : // the two, and all of the remaining fields in this section are unset.
335 : : //
336 : : // If `emulated` is true, then this format doesn't actually exist on the
337 : : // GPU as an uploadable texture format - and any apparent support is being
338 : : // emulated (typically using compute shaders in the upload path).
339 : : bool opaque;
340 : : bool emulated;
341 : : size_t texel_size; // total size in bytes per texel
342 : : size_t texel_align; // texel alignment requirements (bytes)
343 : : int host_bits[4]; // number of meaningful bits in host memory
344 : : int sample_order[4]; // sampled index for each component, e.g.
345 : : // {2, 1, 0, 3} for BGRA textures
346 : :
347 : : // For sampleable formats, this bool indicates whether or not the format
348 : : // is compatible with `textureGather()`
349 : : bool gatherable;
350 : :
351 : : // If usable as a vertex or texel buffer format, this gives the GLSL type
352 : : // corresponding to the data. (e.g. vec4)
353 : : const char *glsl_type;
354 : :
355 : : // If usable as a storage image or texel storage buffer
356 : : // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL
357 : : // texel format corresponding to the format (e.g. rgba16ui), if any. This
358 : : // field may be NULL, in which case the format modifier may be left
359 : : // unspecified.
360 : : const char *glsl_format;
361 : :
362 : : // If available, this gives the fourcc associated with the host
363 : : // representation. In particular, this is intended for use with
364 : : // PL_HANDLE_DMA_BUF, where this field will match the DRM format from
365 : : // <drm_fourcc.h>. May be 0, for formats without matching DRM fourcc.
366 : : uint32_t fourcc;
367 : :
368 : : // If `fourcc` is set, this contains the list of supported drm format
369 : : // modifiers for this format.
370 : : const uint64_t *modifiers;
371 : : int num_modifiers;
372 : : };
373 : :
374 : : // Returns whether or not a pl_fmt's components are ordered sequentially
375 : : // in memory in the order RGBA.
376 : : PL_API bool pl_fmt_is_ordered(pl_fmt fmt);
377 : :
378 : : // Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM)
379 : : PL_API bool pl_fmt_is_float(pl_fmt fmt);
380 : :
381 : : // Returns whether or not a pl_fmt supports a given DRM modifier.
382 : : PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier);
383 : :
384 : : // Helper function to find a format with a given number of components and
385 : : // minimum effective precision per component. If `host_bits` is set, then the
386 : : // format will always be non-opaque, unpadded, ordered and have exactly this
387 : : // bit depth for each component. Finally, all `caps` must be supported.
388 : : PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
389 : : int min_depth, int host_bits, enum pl_fmt_caps caps);
390 : :
391 : : // Finds a vertex format for a given configuration. The resulting vertex will
392 : : // have a component depth equivalent to the sizeof() the equivalent host type.
393 : : // (e.g. PL_FMT_FLOAT will always have sizeof(float))
394 : : PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components);
395 : :
396 : : // Find a format based on its name.
397 : : PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name);
398 : :
399 : : // Find a format based on its fourcc.
400 : : PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc);
401 : :
402 : : // A generic 'timer query' object. These can be used to measure an
403 : : // approximation of the GPU execution time of a given operation. Due to the
404 : : // highly asynchronous nature of GPUs, the actual results of any individual
405 : : // timer query may be delayed by quite a bit. As such, users should avoid
406 : : // trying to pair any particular GPU command with any particular timer query
407 : : // result, and only reuse `pl_timer` objects with identical operations. The
408 : : // results of timer queries are guaranteed to be in-order, but individual
409 : : // queries may be dropped, and some operations might not record timer results
410 : : // at all. (For example, if the underlying hardware does not support timer
411 : : // queries for a given operation type)
412 : : //
413 : : // Thread-safety: Unsafe
414 : : typedef struct pl_timer_t *pl_timer;
415 : :
416 : : // Creates a new timer object. This may return NULL, for example if the
417 : : // implementation does not support timers, but since passing NULL to
418 : : // `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not
419 : : // concern themselves with handling this.
420 : : PL_API pl_timer pl_timer_create(pl_gpu gpu);
421 : : PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *);
422 : :
423 : : // Queries any results that have been measured since the last execution of
424 : : // `pl_timer_query`. There may be more than one result, in which case the user
425 : : // should simply call the function again to get the subsequent values. This
426 : : // function returns a value of 0 in the event that there are no more
427 : : // unprocessed results.
428 : : //
429 : : // The results are reported in nanoseconds, but the actual precision of the
430 : : // timestamp queries may be significantly lower.
431 : : //
432 : : // Note: Results do not queue up indefinitely. Generally, the implementation
433 : : // will only keep track of a small, fixed number of results internally. Make
434 : : // sure to include this function as part of your main rendering loop to process
435 : : // all of its results, or older results will be overwritten by newer ones.
436 : : PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer);
437 : :
438 : : enum pl_buf_mem_type {
439 : : PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate
440 : : PL_BUF_MEM_HOST, // try allocating from host memory (RAM)
441 : : PL_BUF_MEM_DEVICE, // try allocating from device memory (VRAM)
442 : : PL_BUF_MEM_TYPE_COUNT,
443 : :
444 : : // Note: This distinction only matters for discrete GPUs
445 : : };
446 : :
447 : : // Structure describing a buffer.
448 : : struct pl_buf_params {
449 : : size_t size; // size in bytes (must be <= `pl_gpu_limits.max_buf_size`)
450 : : bool host_writable; // contents may be updated via pl_buf_write()
451 : : bool host_readable; // contents may be read back via pl_buf_read()
452 : : bool host_mapped; // create a persistent, RW mapping (pl_buf.data)
453 : :
454 : : // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM.
455 : : // Requires `size <= pl_gpu_limits.max_ubo_size`
456 : : bool uniform;
457 : :
458 : : // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE.
459 : : // Requires `size <= pl_gpu_limits.max_ssbo_size`
460 : : bool storable;
461 : :
462 : : // May be used as the source of vertex data for `pl_pass_run`.
463 : : bool drawable;
464 : :
465 : : // Provide a hint for the memory type you want to use when allocating
466 : : // this buffer's memory.
467 : : //
468 : : // Note: Restrictions may apply depending on the usage flags. In
469 : : // particular, allocating buffers with `uniform` or `storable` enabled from
470 : : // non-device memory will almost surely fail.
471 : : enum pl_buf_mem_type memory_type;
472 : :
473 : : // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows
474 : : // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and
475 : : // `storage` are respectively also enabled.
476 : : pl_fmt format;
477 : :
478 : : // At most one of `export_handle` and `import_handle` can be set for a
479 : : // buffer.
480 : :
481 : : // Setting this indicates that the memory backing this buffer should be
482 : : // shared with external APIs, If so, this must be exactly *one* of
483 : : // `pl_gpu.export_caps.buf`.
484 : : enum pl_handle_type export_handle;
485 : :
486 : : // Setting this indicates that the memory backing this buffer will be
487 : : // imported from an external API. If so, this must be exactly *one* of
488 : : // `pl_gpu.import_caps.buf`.
489 : : enum pl_handle_type import_handle;
490 : :
491 : : // If the shared memory is being imported, the import handle must be
492 : : // specified here. Otherwise, this is ignored.
493 : : struct pl_shared_mem shared_mem;
494 : :
495 : : // If non-NULL, the buffer will be created with these contents. Otherwise,
496 : : // the initial data is undefined. Using this does *not* require setting
497 : : // host_writable.
498 : : const void *initial_data;
499 : :
500 : : // Arbitrary user data. libplacebo does not use this at all.
501 : : void *user_data;
502 : :
503 : : // Arbitrary identifying tag. Used only for debugging purposes.
504 : : pl_debug_tag debug_tag;
505 : : };
506 : :
507 : : #define pl_buf_params(...) (&(struct pl_buf_params) { \
508 : : .debug_tag = PL_DEBUG_TAG, \
509 : : __VA_ARGS__ \
510 : : })
511 : :
512 : : // A generic buffer, which can be used for multiple purposes (texture transfer,
513 : : // storage buffer, uniform buffer, etc.)
514 : : //
515 : : // Note on efficiency: A pl_buf does not necessarily represent a true "buffer"
516 : : // object on the underlying graphics API. It may also refer to a sub-slice of
517 : : // a larger buffer, depending on the implementation details of the GPU. The
518 : : // bottom line is that users do not need to worry about the efficiency of using
519 : : // many small pl_buf objects. Having many small pl_bufs, even lots of few-byte
520 : : // vertex buffers, is designed to be completely fine.
521 : : //
522 : : // Thread-safety: Unsafe
523 : : typedef const struct pl_buf_t {
524 : : struct pl_buf_params params;
525 : : uint8_t *data; // for persistently mapped buffers, points to the first byte
526 : :
527 : : // If `params.handle_type` is set, this structure references the shared
528 : : // memory backing this buffer, via the requested handle type.
529 : : //
530 : : // While this buffer is not in an "exported" state, the contents of the
531 : : // memory are undefined. (See: `pl_buf_export`)
532 : : struct pl_shared_mem shared_mem;
533 : : } *pl_buf;
534 : :
535 : : // Create a buffer. The type of buffer depends on the parameters. The buffer
536 : : // parameters must adhere to the restrictions imposed by the pl_gpu_limits.
537 : : // Returns NULL on failure.
538 : : //
539 : : // For buffers with shared memory, the buffer is considered to be in an
540 : : // "exported" state by default, and may be used directly by the external API
541 : : // after being created (until the first libplacebo operation on the buffer).
542 : : PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params);
543 : : PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf);
544 : :
545 : : // This behaves like `pl_buf_create`, but if the buffer already exists and has
546 : : // incompatible parameters, it will get destroyed first. A buffer is considered
547 : : // "compatible" if it has the same buffer type and texel format, a size greater
548 : : // than or equal to the requested size, and it has a superset of the features
549 : : // the user requested. After this operation, the contents of the buffer are
550 : : // undefined.
551 : : //
552 : : // Note: Due to its unpredictability, it's not allowed to use this with
553 : : // `params->initial_data` being set. Similarly, it's not allowed on a buffer
554 : : // with `params->export_handle`. since this may invalidate the corresponding
555 : : // external API's handle. Conversely, it *is* allowed on a buffer with
556 : : // `params->host_mapped`, and the corresponding `buf->data` pointer *may*
557 : : // change as a result of doing so.
558 : : //
559 : : // Note: If the `user_data` alone changes, this does not trigger a buffer
560 : : // recreation. In theory, this can be used to detect when the buffer ended
561 : : // up being recreated.
562 : : PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params);
563 : :
564 : : // Update the contents of a buffer, starting at a given offset (must be a
565 : : // multiple of 4) and up to a given size, with the contents of *data.
566 : : //
567 : : // This function will block until the buffer is no longer in use. Use
568 : : // `pl_buf_poll` to perform non-blocking queries of buffer availability.
569 : : //
570 : : // Note: This function can incur synchronization overhead, so it shouldn't be
571 : : // used in tight loops. If you do need to loop (e.g. to perform a strided
572 : : // write), consider using host-mapped buffers, or fixing the memory in RAM,
573 : : // before calling this function.
574 : : PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
575 : : const void *data, size_t size);
576 : :
577 : : // Read back the contents of a buffer, starting at a given offset, storing the
578 : : // data into *dest. Returns whether successful.
579 : : //
580 : : // This function will block until the buffer is no longer in use. Use
581 : : // `pl_buf_poll` to perform non-blocking queries of buffer availability.
582 : : PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
583 : : void *dest, size_t size);
584 : :
585 : : // Copy `size` bytes from one buffer to another, reading from and writing to
586 : : // the respective offsets.
587 : : PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
588 : : pl_buf src, size_t src_offset, size_t size);
589 : :
590 : : // Initiates a buffer export operation, allowing a buffer to be accessed by an
591 : : // external API. This is only valid for buffers with `params.handle_type`.
592 : : // Calling this twice in a row is a harmless no-op. Returns whether successful.
593 : : //
594 : : // There is no corresponding "buffer import" operation, the next libplacebo
595 : : // operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write
596 : : // and pl_buf_read) will implicitly import the buffer back to libplacebo. Users
597 : : // must ensure that all pending operations made by the external API are fully
598 : : // completed before using it in libplacebo again. (Otherwise, the behaviour
599 : : // is undefined)
600 : : //
601 : : // Please note that this function returning does not mean the memory is
602 : : // immediately available as such. In general, it will mark a buffer as "in use"
603 : : // in the same way any other buffer operation would, and it is the user's
604 : : // responsibility to wait until `pl_buf_poll` returns false before accessing
605 : : // the memory from the external API.
606 : : //
607 : : // In terms of the access performed by this operation, it is not considered a
608 : : // "read" or "write" and therefore does not technically conflict with reads or
609 : : // writes to the buffer performed by the host (via mapped memory - any use of
610 : : // `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export).
611 : : // However, restrictions made by the external API may apply that prevent this.
612 : : //
613 : : // The recommended use pattern is something like this:
614 : : //
615 : : // while (loop) {
616 : : // pl_buf buf = get_free_buffer(); // or block on pl_buf_poll
617 : : // // write to the buffer using the external API
618 : : // pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports
619 : : // pl_buf_export(gpu, buf);
620 : : // }
621 : : //
622 : : // i.e. perform an external API operation, then use and immediately export the
623 : : // buffer in libplacebo, and finally wait until `pl_buf_poll` is false before
624 : : // re-using it in the external API. (Or get a new buffer in the meantime)
625 : : PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf);
626 : :
627 : : // Returns whether or not a buffer is currently "in use". This can either be
628 : : // because of a pending read operation, a pending write operation or a pending
629 : : // buffer export operation. Any access to the buffer by external APIs or via
630 : : // the host pointer (for host-mapped buffers) is forbidden while a buffer is
631 : : // "in use". The only exception to this rule is multiple reads, for example
632 : : // reading from a buffer with `pl_tex_upload` while simultaneously reading from
633 : : // it using mapped memory.
634 : : //
635 : : // The `timeout`, specified in nanoseconds, indicates how long to block for
636 : : // before returning. If set to 0, this function will never block, and only
637 : : // returns the current status of the buffer. The actual precision of the
638 : : // timeout may be significantly longer than one nanosecond, and has no upper
639 : : // bound. This function does not provide hard latency guarantees. This function
640 : : // may also return at any time, even if the buffer is still in use. If the user
641 : : // wishes to block until the buffer is definitely no longer in use, the
642 : : // recommended usage is:
643 : : //
644 : : // while (pl_buf_poll(gpu, buf, UINT64_MAX))
645 : : // ; // do nothing
646 : : //
647 : : // Note: libplacebo operations on buffers are always internally synchronized,
648 : : // so this is only needed for host-mapped or externally exported buffers.
649 : : // However, it may be used to do non-blocking queries before calling blocking
650 : : // functions such as `pl_buf_read`.
651 : : //
652 : : // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
653 : : // synchronized, meaning it can safely be called on a `pl_buf` that is in use
654 : : // by another thread.
655 : : PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout);
656 : :
657 : : enum pl_tex_sample_mode {
658 : : PL_TEX_SAMPLE_NEAREST, // nearest neighbour sampling
659 : : PL_TEX_SAMPLE_LINEAR, // linear filtering, requires PL_FMT_CAP_LINEAR
660 : : PL_TEX_SAMPLE_MODE_COUNT,
661 : : };
662 : :
663 : : enum pl_tex_address_mode {
664 : : PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel
665 : : PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture
666 : : PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture
667 : : PL_TEX_ADDRESS_MODE_COUNT,
668 : : };
669 : :
670 : : // Structure describing a texture.
671 : : struct pl_tex_params {
672 : : int w, h, d; // physical dimension; unused dimensions must be 0
673 : : pl_fmt format;
674 : :
675 : : // The following bools describe what operations can be performed. The
676 : : // corresponding pl_fmt capability must be set for every enabled
677 : : // operation type.
678 : : //
679 : : // Note: For planar formats, it is also possible to set capabilities only
680 : : // supported by sub-planes. In this case, the corresponding functionality
681 : : // will be available for the sub-plane, but not the planar texture itself.
682 : : bool sampleable; // usable as a PL_DESC_SAMPLED_TEX
683 : : bool renderable; // usable as a render target (pl_pass_run)
684 : : // (must only be used with 2D textures)
685 : : bool storable; // usable as a storage image (PL_DESC_IMG_*)
686 : : bool blit_src; // usable as a blit source
687 : : bool blit_dst; // usable as a blit destination
688 : : bool host_writable; // may be updated with pl_tex_upload()
689 : : bool host_readable; // may be fetched with pl_tex_download()
690 : :
691 : : // Note: For `blit_src`, `blit_dst`, the texture must either be
692 : : // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set.
693 : :
694 : : // At most one of `export_handle` and `import_handle` can be set for a
695 : : // texture.
696 : :
697 : : // Setting this indicates that the memory backing this texture should be
698 : : // shared with external APIs, If so, this must be exactly *one* of
699 : : // `pl_gpu.export_caps.tex`.
700 : : enum pl_handle_type export_handle;
701 : :
702 : : // Setting this indicates that the memory backing this texture will be
703 : : // imported from an external API. If so, this must be exactly *one* of
704 : : // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`.
705 : : enum pl_handle_type import_handle;
706 : :
707 : : // If the shared memory is being imported, the import handle must be
708 : : // specified here. Otherwise, this is ignored.
709 : : struct pl_shared_mem shared_mem;
710 : :
711 : : // If non-NULL, the texture will be created with these contents (tightly
712 : : // packed). Using this does *not* require setting host_writable. Otherwise,
713 : : // the initial data is undefined. Mutually exclusive with `import_handle`.
714 : : const void *initial_data;
715 : :
716 : : // Arbitrary user data. libplacebo does not use this at all.
717 : : void *user_data;
718 : :
719 : : // Arbitrary identifying tag. Used only for debugging purposes.
720 : : pl_debug_tag debug_tag;
721 : : };
722 : :
723 : : #define pl_tex_params(...) (&(struct pl_tex_params) { \
724 : : .debug_tag = PL_DEBUG_TAG, \
725 : : __VA_ARGS__ \
726 : : })
727 : :
728 : : static inline int pl_tex_params_dimension(const struct pl_tex_params params)
729 : : {
730 [ + + + + : 19414 : return params.d ? 3 : params.h ? 2 : 1;
+ + + + +
+ + + + +
+ + ]
731 : : }
732 : :
733 : : enum pl_sampler_type {
734 : : PL_SAMPLER_NORMAL, // gsampler2D, gsampler3D etc.
735 : : PL_SAMPLER_RECT, // gsampler2DRect
736 : : PL_SAMPLER_EXTERNAL, // gsamplerExternalOES
737 : : PL_SAMPLER_TYPE_COUNT,
738 : : };
739 : :
740 : : // Conflates the following typical GPU API concepts:
741 : : // - texture itself
742 : : // - sampler state
743 : : // - staging buffers for texture upload
744 : : // - framebuffer objects
745 : : // - wrappers for swapchain framebuffers
746 : : // - synchronization needed for upload/rendering/etc.
747 : : //
748 : : // Essentially a pl_tex can be anything ranging from a normal texture, a wrapped
749 : : // external/real framebuffer, a framebuffer object + texture pair, a mapped
750 : : // texture (via pl_hwdec), or other sorts of things that can be sampled from
751 : : // and/or rendered to.
752 : : //
753 : : // Thread-safety: Unsafe
754 : : typedef const struct pl_tex_t *pl_tex;
755 : : struct pl_tex_t {
756 : : struct pl_tex_params params;
757 : :
758 : : // If `params.format` is a planar format, this contains `pl_tex` handles
759 : : // encapsulating individual texture planes. Conversely, if this is a
760 : : // sub-plane of a planar texture, `parent` points to the planar texture.
761 : : //
762 : : // Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior.
763 : : pl_tex planes[4];
764 : : pl_tex parent;
765 : :
766 : : // If `params.export_handle` is set, this structure references the shared
767 : : // memory backing this buffer, via the requested handle type.
768 : : //
769 : : // While this texture is not in an "exported" state, the contents of the
770 : : // memory are undefined. (See: `pl_tex_export`)
771 : : //
772 : : // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will
773 : : // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be
774 : : // made about the cross-driver compatibility of textures exported this way.
775 : : struct pl_shared_mem shared_mem;
776 : :
777 : : // If `params.sampleable` is true, this indicates the correct sampler type
778 : : // to use when sampling from this texture.
779 : : enum pl_sampler_type sampler_type;
780 : : };
781 : :
782 : : // Create a texture (with undefined contents). Returns NULL on failure. This is
783 : : // assumed to be an expensive/rare operation, and may need to perform memory
784 : : // allocation or framebuffer creation.
785 : : PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params);
786 : : PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex);
787 : :
788 : : // This works like `pl_tex_create`, but if the texture already exists and has
789 : : // incompatible texture parameters, it will get destroyed first. A texture is
790 : : // considered "compatible" if it has the same texture format and sample/address
791 : : // mode and it supports a superset of the features the user requested.
792 : : //
793 : : // Even if the texture is not recreated, calling this function will still
794 : : // invalidate the contents of the texture. (Note: Because of this,
795 : : // `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error)
796 : : //
797 : : // Note: If the `user_data` alone changes, this does not trigger a texture
798 : : // recreation. In theory, this can be used to detect when the texture ended
799 : : // up being recreated.
800 : : PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params);
801 : :
802 : : // Invalidates the contents of a texture. After this, the contents are fully
803 : : // undefined.
804 : : PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex);
805 : :
806 : : union pl_clear_color {
807 : : float f[4];
808 : : int32_t i[4];
809 : : uint32_t u[4];
810 : : };
811 : :
812 : : // Clear the dst texture with the given color (rgba). This is functionally
813 : : // identical to a blit operation, which means `dst->params.blit_dst` must be
814 : : // set.
815 : : PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color);
816 : :
817 : : // Wrapper for `pl_tex_clear_ex` which only works for floating point textures.
818 : : PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]);
819 : :
820 : : struct pl_tex_blit_params {
821 : : // The texture to blit from. Must have `params.blit_src` enabled.
822 : : pl_tex src;
823 : :
824 : : // The texture to blit to. Must have `params.blit_dst` enabled, and a
825 : : // format that is loosely compatible with `src`. This essentially means
826 : : // that they must have the same `internal_size`. Additionally, UINT
827 : : // textures can only be blitted to other UINT textures, and SINT textures
828 : : // can only be blitted to other SINT textures.
829 : : pl_tex dst;
830 : :
831 : : // The region of the source texture to blit. Must be within the texture
832 : : // bounds of `src`. May be flipped. (Optional)
833 : : pl_rect3d src_rc;
834 : :
835 : : // The region of the destination texture to blit into. Must be within the
836 : : // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in
837 : : // `dst` are preserved. (Optional)
838 : : pl_rect3d dst_rc;
839 : :
840 : : // If `src_rc` and `dst_rc` have different sizes, the texture will be
841 : : // scaled using the given texture sampling mode.
842 : : enum pl_tex_sample_mode sample_mode;
843 : : };
844 : :
845 : : #define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ })
846 : :
847 : : // Copy a sub-rectangle from one texture to another.
848 : : PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params);
849 : :
850 : : // Structure describing a texture transfer operation.
851 : : struct pl_tex_transfer_params {
852 : : // Texture to transfer to/from. Depending on the type of the operation,
853 : : // this must have params.host_writable (uploads) or params.host_readable
854 : : // (downloads) set, respectively.
855 : : pl_tex tex;
856 : :
857 : : // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y
858 : : // and z fields of `rc`, as well as the corresponding pitches, are ignored.
859 : : // In all other cases, the pitch must be large enough to contain the
860 : : // corresponding dimension of `rc`, and the `rc` must be normalized and
861 : : // fully contained within the image dimensions. Missing fields in the `rc`
862 : : // are inferred from the image size. If unset, the pitch is inferred
863 : : // from `rc` (that is, it's assumed that the data is tightly packed in the
864 : : // buffer). Otherwise, `row_pitch` *must* be a multiple of
865 : : // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple
866 : : // of `row_pitch`.
867 : : pl_rect3d rc; // region of the texture to transfer
868 : : size_t row_pitch; // the number of bytes separating image rows
869 : : size_t depth_pitch; // the number of bytes separating image planes
870 : :
871 : : // An optional timer to report the approximate duration of the texture
872 : : // transfer to. Note that this is only an approximation, since the actual
873 : : // texture transfer may happen entirely in the background (in particular,
874 : : // for implementations with asynchronous transfer capabilities). It's also
875 : : // not guaranteed that all GPUs support this.
876 : : pl_timer timer;
877 : :
878 : : // An optional callback to fire after the operation completes. If this is
879 : : // specified, then the operation is performed asynchronously. Note that
880 : : // transfers to/from buffers are always asynchronous, even without, this
881 : : // field, so it's more useful for `ptr` transfers. (Though it can still be
882 : : // helpful to avoid having to manually poll buffers all the time)
883 : : //
884 : : // When this is *not* specified, uploads from `ptr` are still asynchronous
885 : : // but require a host memcpy, while downloads from `ptr` are blocking. As
886 : : // such, it's recommended to always try using asynchronous texture
887 : : // transfers wherever possible.
888 : : //
889 : : // Note: Requires `pl_gpu_limits.callbacks`
890 : : //
891 : : // Note: Callbacks are implicitly synchronized, meaning that callbacks are
892 : : // guaranteed to never execute concurrently with other callbacks. However,
893 : : // they may execute from any thread that the `pl_gpu` is used on.
894 : : void (*callback)(void *priv);
895 : : void *priv; // arbitrary user data
896 : :
897 : : // For the data source/target of a transfer operation, there are two valid
898 : : // options:
899 : : //
900 : : // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`)
901 : : pl_buf buf; // buffer to use
902 : : size_t buf_offset; // offset of data within buffer, should be a
903 : : // multiple of `tex->params.format->texel_size`
904 : : // 2. Transferring to/from host memory directly:
905 : : void *ptr; // address of data
906 : : bool no_import; // always use memcpy, bypassing host ptr import
907 : :
908 : : // Note: The contents of the memory region / buffer must exactly match the
909 : : // texture format; i.e. there is no explicit conversion between formats.
910 : : };
911 : :
912 : : #define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ })
913 : :
914 : : // Upload data to a texture. Returns whether successful.
915 : : PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params);
916 : :
917 : : // Download data from a texture. Returns whether successful.
918 : : PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params);
919 : :
920 : : // Returns whether or not a texture is currently "in use". This can either be
921 : : // because of a pending read operation, a pending write operation or a pending
922 : : // texture export operation. Note that this function's usefulness is extremely
923 : : // limited under ordinary circumstances. In practically all cases, textures do
924 : : // not need to be directly synchronized by the user, except when interfacing
925 : : // with external libraries. This function should NOT, however, be used as a
926 : : // crutch to avoid having to implement semaphore-based synchronization. Use
927 : : // the API-specific functions such as `pl_vulkan_hold/release` for that.
928 : : //
929 : : // A good example of a use case in which this function is required is when
930 : : // interoperating with external memory management that needs to know when an
931 : : // imported texture is safe to free / reclaim internally, in which case
932 : : // semaphores are insufficient because memory management is a host operation.
933 : : //
934 : : // The `timeout`, specified in nanoseconds, indicates how long to block for
935 : : // before returning. If set to 0, this function will never block, and only
936 : : // returns the current status of the texture. The actual precision of the
937 : : // timeout may be significantly longer than one nanosecond, and has no upper
938 : : // bound. This function does not provide hard latency guarantees. This function
939 : : // may also return at any time, even if the texture is still in use. If the
940 : : // user wishes to block until the texture is definitely no longer in use, the
941 : : // recommended usage is:
942 : : //
943 : : // while (pl_tex_poll(gpu, buf, UINT64_MAX))
944 : : // ; // do nothing
945 : : //
946 : : // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
947 : : // synchronized, meaning it can safely be called on a `pl_tex` that is in use
948 : : // by another thread.
949 : : PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout);
950 : :
951 : : // Data type of a shader input variable (e.g. uniform, or UBO member)
952 : : enum pl_var_type {
953 : : PL_VAR_INVALID = 0,
954 : : PL_VAR_SINT, // C: int GLSL: int/ivec
955 : : PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec
956 : : PL_VAR_FLOAT, // C: float GLSL: float/vec/mat
957 : : PL_VAR_TYPE_COUNT
958 : : };
959 : :
960 : : // Returns the host size (in bytes) of a pl_var_type.
961 : : PL_API size_t pl_var_type_size(enum pl_var_type type);
962 : :
963 : : // Represents a shader input variable (concrete data, e.g. vector, matrix)
964 : : struct pl_var {
965 : : const char *name; // name as used in the shader
966 : : enum pl_var_type type;
967 : : // The total number of values is given by dim_v * dim_m. For example, a
968 : : // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4
969 : : // and dim_m = 3.
970 : : int dim_v; // vector dimension
971 : : int dim_m; // matrix dimension (number of columns, see below)
972 : : int dim_a; // array dimension
973 : : };
974 : :
975 : : // Helper functions for constructing the most common pl_vars, with names
976 : : // corresponding to their corresponding GLSL built-in types.
977 : : PL_API struct pl_var pl_var_float(const char *name);
978 : : PL_API struct pl_var pl_var_vec2(const char *name);
979 : : PL_API struct pl_var pl_var_vec3(const char *name);
980 : : PL_API struct pl_var pl_var_vec4(const char *name);
981 : : PL_API struct pl_var pl_var_mat2(const char *name);
982 : : PL_API struct pl_var pl_var_mat2x3(const char *name);
983 : : PL_API struct pl_var pl_var_mat2x4(const char *name);
984 : : PL_API struct pl_var pl_var_mat3(const char *name);
985 : : PL_API struct pl_var pl_var_mat3x4(const char *name);
986 : : PL_API struct pl_var pl_var_mat4x2(const char *name);
987 : : PL_API struct pl_var pl_var_mat4x3(const char *name);
988 : : PL_API struct pl_var pl_var_mat4(const char *name);
989 : : PL_API struct pl_var pl_var_int(const char *name);
990 : : PL_API struct pl_var pl_var_ivec2(const char *name);
991 : : PL_API struct pl_var pl_var_ivec3(const char *name);
992 : : PL_API struct pl_var pl_var_ivec4(const char *name);
993 : : PL_API struct pl_var pl_var_uint(const char *name);
994 : : PL_API struct pl_var pl_var_uvec2(const char *name);
995 : : PL_API struct pl_var pl_var_uvec3(const char *name);
996 : : PL_API struct pl_var pl_var_uvec4(const char *name);
997 : :
998 : : struct pl_named_var {
999 : : const char *glsl_name;
1000 : : struct pl_var var;
1001 : : };
1002 : :
1003 : : // The same list as above, tagged by name and terminated with a {0} entry.
1004 : : PL_API extern const struct pl_named_var pl_var_glsl_types[];
1005 : :
1006 : : // Efficient helper function for performing a lookup in the above array.
1007 : : // Returns NULL if the variable is not legal. Note that the array dimension is
1008 : : // ignored, since it's usually part of the variable name and not the type name.
1009 : : PL_API const char *pl_var_glsl_type_name(struct pl_var var);
1010 : :
1011 : : // Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means
1012 : : // that the pl_var's type will be the same as the vertex's sampled type (e.g.
1013 : : // PL_FMT_UNORM gets turned into PL_VAR_FLOAT).
1014 : : PL_API struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name);
1015 : :
1016 : : // Describes the memory layout of a variable, relative to some starting location
1017 : : // (typically the offset within a uniform/storage/pushconstant buffer)
1018 : : //
1019 : : // Note on matrices: All GPUs expect column major matrices, for both buffers and
1020 : : // input variables. Care needs to be taken to avoid trying to use e.g. a
1021 : : // pl_matrix3x3 (which is row major) directly as a pl_var_update.data!
1022 : : //
1023 : : // In terms of the host layout, a column-major matrix (e.g. matCxR) with C
1024 : : // columns and R rows is treated like an array vecR[C]. The `stride` here refers
1025 : : // to the separation between these array elements, i.e. the separation between
1026 : : // the individual columns.
1027 : : //
1028 : : // Visualization of a mat4x3:
1029 : : //
1030 : : // 0 1 2 3 <- columns
1031 : : // 0 [ (A) (D) (G) (J) ]
1032 : : // 1 [ (B) (E) (H) (K) ]
1033 : : // 2 [ (C) (F) (I) (L) ]
1034 : : // ^ rows
1035 : : //
1036 : : // Layout in GPU memory: (stride=16, size=60)
1037 : : //
1038 : : // [ A B C ] X <- column 0, offset +0
1039 : : // [ D E F ] X <- column 1, offset +16
1040 : : // [ G H I ] X <- column 2, offset +32
1041 : : // [ J K L ] <- column 3, offset +48
1042 : : //
1043 : : // Note the lack of padding on the last column in this example.
1044 : : // In general: size <= stride * dim_m
1045 : : //
1046 : : // C representation: (stride=12, size=48)
1047 : : //
1048 : : // { { A, B, C },
1049 : : // { D, E, F },
1050 : : // { G, H, I },
1051 : : // { J, K, L } }
1052 : : //
1053 : : // Note on arrays: `stride` represents both the stride between elements of a
1054 : : // matrix, and the stride between elements of an array. That is, there is no
1055 : : // distinction between the columns of a matrix and the rows of an array. For
1056 : : // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride
1057 : : // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10.
1058 : : //
1059 : : // For non-array/matrix types, `stride` is equal to `size`.
1060 : :
1061 : : struct pl_var_layout {
1062 : : size_t offset; // the starting offset of the first byte
1063 : : size_t stride; // the delta between two elements of an array/matrix
1064 : : size_t size; // the total size of the input
1065 : : };
1066 : :
1067 : : // Returns the host layout of an input variable as required for a
1068 : : // tightly-packed, byte-aligned C data type, given a starting offset.
1069 : : PL_API struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var);
1070 : :
1071 : : // Returns the GLSL std140 layout of an input variable given a current buffer
1072 : : // offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM
1073 : : //
1074 : : // The normal way to use this function is when calculating the size and offset
1075 : : // requirements of a uniform buffer in an incremental fashion, to calculate the
1076 : : // new offset of the next variable in this buffer.
1077 : : PL_API struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var);
1078 : :
1079 : : // Returns the GLSL std430 layout of an input variable given a current buffer
1080 : : // offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and
1081 : : // for push constants.
1082 : : PL_API struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var);
1083 : :
1084 : : // Convenience definitions / friendly names for these
1085 : : #define pl_buf_uniform_layout pl_std140_layout
1086 : : #define pl_buf_storage_layout pl_std430_layout
1087 : : #define pl_push_constant_layout pl_std430_layout
1088 : :
1089 : : // Like memcpy, but copies bytes from `src` to `dst` in a manner governed by
1090 : : // the stride and size of `dst_layout` as well as `src_layout`. Also takes
1091 : : // into account the respective `offset`.
1092 : : PL_API void memcpy_layout(void *dst, struct pl_var_layout dst_layout,
1093 : : const void *src, struct pl_var_layout src_layout);
1094 : :
1095 : : // Represents a compile-time constant.
1096 : : struct pl_constant {
1097 : : enum pl_var_type type; // constant data type
1098 : : uint32_t id; // GLSL `constant_id`
1099 : : size_t offset; // byte offset in `constant_data`
1100 : : };
1101 : :
1102 : : // Represents a vertex attribute.
1103 : : struct pl_vertex_attrib {
1104 : : const char *name; // name as used in the shader
1105 : : pl_fmt fmt; // data format (must have PL_FMT_CAP_VERTEX)
1106 : : size_t offset; // byte offset into the vertex struct
1107 : : int location; // vertex location (as used in the shader)
1108 : : };
1109 : :
1110 : : // Returns an abstract namespace index for a given descriptor type. This will
1111 : : // always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use
1112 : : // this to figure out which descriptors may share the same value of `binding`.
1113 : : // Bindings must only be unique for all descriptors within the same namespace.
1114 : : PL_API int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type);
1115 : :
1116 : : // Access mode of a shader input descriptor.
1117 : : enum pl_desc_access {
1118 : : PL_DESC_ACCESS_READWRITE,
1119 : : PL_DESC_ACCESS_READONLY,
1120 : : PL_DESC_ACCESS_WRITEONLY,
1121 : : PL_DESC_ACCESS_COUNT,
1122 : : };
1123 : :
1124 : : // Returns the GLSL syntax for a given access mode (e.g. "readonly").
1125 : : PL_API const char *pl_desc_access_glsl_name(enum pl_desc_access mode);
1126 : :
1127 : : // Represents a shader descriptor (e.g. texture or buffer binding)
1128 : : struct pl_desc {
1129 : : const char *name; // name as used in the shader
1130 : : enum pl_desc_type type;
1131 : :
1132 : : // The binding of this descriptor, as used in the shader. All bindings
1133 : : // within a namespace must be unique. (see: pl_desc_namespace)
1134 : : int binding;
1135 : :
1136 : : // For storage images and storage buffers, this can be used to restrict
1137 : : // the type of access that may be performed on the descriptor. Ignored for
1138 : : // the other descriptor types (uniform buffers and sampled textures are
1139 : : // always read-only).
1140 : : enum pl_desc_access access;
1141 : : };
1142 : :
1143 : : // Framebuffer blending mode (for raster passes)
1144 : : enum pl_blend_mode {
1145 : : PL_BLEND_ZERO,
1146 : : PL_BLEND_ONE,
1147 : : PL_BLEND_SRC_ALPHA,
1148 : : PL_BLEND_ONE_MINUS_SRC_ALPHA,
1149 : : PL_BLEND_MODE_COUNT,
1150 : : };
1151 : :
1152 : : struct pl_blend_params {
1153 : : enum pl_blend_mode src_rgb;
1154 : : enum pl_blend_mode dst_rgb;
1155 : : enum pl_blend_mode src_alpha;
1156 : : enum pl_blend_mode dst_alpha;
1157 : : };
1158 : :
1159 : : #define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ })
1160 : :
1161 : : // Typical alpha compositing
1162 : : PL_API extern const struct pl_blend_params pl_alpha_overlay;
1163 : :
1164 : : enum pl_prim_type {
1165 : : PL_PRIM_TRIANGLE_LIST,
1166 : : PL_PRIM_TRIANGLE_STRIP,
1167 : : PL_PRIM_TYPE_COUNT,
1168 : : };
1169 : :
1170 : : enum pl_index_format {
1171 : : PL_INDEX_UINT16 = 0,
1172 : : PL_INDEX_UINT32,
1173 : : PL_INDEX_FORMAT_COUNT,
1174 : : };
1175 : :
1176 : : enum pl_pass_type {
1177 : : PL_PASS_INVALID = 0,
1178 : : PL_PASS_RASTER, // vertex+fragment shader
1179 : : PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`)
1180 : : PL_PASS_TYPE_COUNT,
1181 : : };
1182 : :
1183 : : // Description of a rendering pass. It conflates the following:
1184 : : // - GLSL shader(s) and its list of inputs
1185 : : // - target parameters (for raster passes)
1186 : : struct pl_pass_params {
1187 : : enum pl_pass_type type;
1188 : :
1189 : : // Input variables.
1190 : : struct pl_var *variables;
1191 : : int num_variables;
1192 : :
1193 : : // Input descriptors.
1194 : : struct pl_desc *descriptors;
1195 : : int num_descriptors;
1196 : :
1197 : : // Compile-time specialization constants.
1198 : : struct pl_constant *constants;
1199 : : int num_constants;
1200 : :
1201 : : // Initial data for the specialization constants. Optional. If NULL,
1202 : : // specialization constants receive the values from the shader text.
1203 : : void *constant_data;
1204 : :
1205 : : // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size
1206 : : size_t push_constants_size;
1207 : :
1208 : : // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted
1209 : : // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as
1210 : : // a compute shader.
1211 : : const char *glsl_shader;
1212 : :
1213 : : // --- type==PL_PASS_RASTER only
1214 : :
1215 : : // Describes the interpretation and layout of the vertex data.
1216 : : enum pl_prim_type vertex_type;
1217 : : struct pl_vertex_attrib *vertex_attribs;
1218 : : int num_vertex_attribs;
1219 : : size_t vertex_stride; // must be a multiple of limits.align_vertex_stride
1220 : :
1221 : : // The vertex shader itself.
1222 : : const char *vertex_shader;
1223 : :
1224 : : // Target format. The format must support PL_FMT_CAP_RENDERABLE. The
1225 : : // resulting pass may only be used on textures that have a format with a
1226 : : // `pl_fmt.signature` compatible to this format.
1227 : : pl_fmt target_format;
1228 : :
1229 : : // Target blending mode. If this is NULL, blending is disabled. Otherwise,
1230 : : // the `target_format` must also support PL_FMT_CAP_BLENDABLE.
1231 : : const struct pl_blend_params *blend_params;
1232 : :
1233 : : // If false, the target's existing contents will be discarded before the
1234 : : // pass is run. (Semantically equivalent to calling pl_tex_invalidate
1235 : : // before every pl_pass_run, but slightly more efficient)
1236 : : //
1237 : : // Specifying `blend_params` requires `load_target` to be true.
1238 : : bool load_target;
1239 : :
1240 : : // --- Deprecated / removed fields.
1241 : : PL_DEPRECATED_IN(v6.322) const uint8_t *cached_program; // Non-functional
1242 : : PL_DEPRECATED_IN(v6.322) size_t cached_program_len;
1243 : : };
1244 : :
1245 : : #define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ })
1246 : :
1247 : : // Conflates the following typical GPU API concepts:
1248 : : // - various kinds of shaders
1249 : : // - rendering pipelines
1250 : : // - descriptor sets, uniforms, other bindings
1251 : : // - all synchronization necessary
1252 : : // - the current values of all inputs
1253 : : //
1254 : : // Thread-safety: Unsafe
1255 : : typedef const struct pl_pass_t {
1256 : : struct pl_pass_params params;
1257 : : } *pl_pass;
1258 : :
1259 : : // Compile a shader and create a render pass. This is a rare/expensive
1260 : : // operation and may take a significant amount of time, even if a cached
1261 : : // program is used. Returns NULL on failure.
1262 : : PL_API pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params);
1263 : : PL_API void pl_pass_destroy(pl_gpu gpu, pl_pass *pass);
1264 : :
1265 : : struct pl_desc_binding {
1266 : : const void *object; // pl_* object with type corresponding to pl_desc_type
1267 : :
1268 : : // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler.
1269 : : enum pl_tex_address_mode address_mode;
1270 : : enum pl_tex_sample_mode sample_mode;
1271 : : };
1272 : :
1273 : : struct pl_var_update {
1274 : : int index; // index into params.variables[]
1275 : : const void *data; // pointer to raw byte data corresponding to pl_var_host_layout()
1276 : : };
1277 : :
1278 : : struct pl_pass_run_params {
1279 : : pl_pass pass;
1280 : :
1281 : : // If present, the shader will be re-specialized with the new constants
1282 : : // provided. This is a significantly cheaper operation than recompiling a
1283 : : // brand new shader, but should still be avoided if possible.
1284 : : //
1285 : : // Leaving it as NULL re-uses the existing specialization values. Ignored
1286 : : // if the shader has no specialization constants. Guaranteed to be a no-op
1287 : : // if the values have not changed since the last invocation.
1288 : : void *constant_data;
1289 : :
1290 : : // This list only contains descriptors/variables which have changed
1291 : : // since the previous invocation. All non-mentioned variables implicitly
1292 : : // preserve their state from the last invocation.
1293 : : struct pl_var_update *var_updates;
1294 : : int num_var_updates;
1295 : :
1296 : : // This list contains all descriptors used by this pass. It must
1297 : : // always be filled, even if the descriptors haven't changed. The order
1298 : : // must match that of pass->params.descriptors
1299 : : struct pl_desc_binding *desc_bindings;
1300 : :
1301 : : // The push constants for this invocation. This must always be set and
1302 : : // fully defined for every invocation if params.push_constants_size > 0.
1303 : : void *push_constants;
1304 : :
1305 : : // An optional timer to report the approximate runtime of this shader pass
1306 : : // invocation to. Note that this is only an approximation, since shaders
1307 : : // may overlap their execution times and contend for GPU time.
1308 : : pl_timer timer;
1309 : :
1310 : : // --- pass->params.type==PL_PASS_RASTER only
1311 : :
1312 : : // Target must be a 2D texture, `target->params.renderable` must be true,
1313 : : // and `target->params.format->signature` must match the signature provided
1314 : : // in `pass->params.target_format`.
1315 : : //
1316 : : // If the viewport or scissors are left blank, they are inferred from
1317 : : // target->params.
1318 : : //
1319 : : // WARNING: Rendering to a *target that is being read from by the same
1320 : : // shader is undefined behavior. In general, trying to bind the same
1321 : : // resource multiple times to the same shader is undefined behavior.
1322 : : pl_tex target;
1323 : : pl_rect2d viewport; // screen space viewport (must be normalized)
1324 : : pl_rect2d scissors; // target render scissors (must be normalized)
1325 : :
1326 : : // Number of vertices to render
1327 : : int vertex_count;
1328 : :
1329 : : // Vertex data may be provided in one of two forms:
1330 : : //
1331 : : // 1. Drawing from host memory directly
1332 : : const void *vertex_data;
1333 : : // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`)
1334 : : pl_buf vertex_buf;
1335 : : size_t buf_offset;
1336 : :
1337 : : // (Optional) Index data may be provided in the form given by `index_fmt`.
1338 : : // These will be used for instanced rendering. Similar to vertex data, this
1339 : : // can be provided in two forms:
1340 : : // 1. From host memory
1341 : : const void *index_data;
1342 : : enum pl_index_format index_fmt;
1343 : : // 2. From an index buffer (requires `index_buf->params.drawable`)
1344 : : pl_buf index_buf;
1345 : : size_t index_offset;
1346 : : // Note: Drawing from an index buffer requires vertex data to also be
1347 : : // present in buffer form, i.e. it's forbidden to mix `index_buf` with
1348 : : // `vertex_data` (though vice versa is allowed).
1349 : :
1350 : : // --- pass->params.type==PL_PASS_COMPUTE only
1351 : :
1352 : : // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the
1353 : : // corresponding index of limits.max_dispatch
1354 : : int compute_groups[3];
1355 : : };
1356 : :
1357 : : #define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ })
1358 : :
1359 : : // Execute a render pass.
1360 : : PL_API void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params);
1361 : :
1362 : : // This is semantically a no-op, but it provides a hint that you want to flush
1363 : : // any partially queued up commands and begin execution. There is normally no
1364 : : // need to call this, because queued commands will always be implicitly flushed
1365 : : // whenever necessary to make forward progress on commands like `pl_buf_poll`,
1366 : : // or when submitting a frame to a swapchain for display. In fact, calling this
1367 : : // function can negatively impact performance, because some GPUs rely on being
1368 : : // able to re-order and modify queued commands in order to enable optimizations
1369 : : // retroactively.
1370 : : //
1371 : : // The only time this might be beneficial to call explicitly is if you're doing
1372 : : // lots of offline processing, i.e. you aren't rendering to a swapchain but to
1373 : : // textures that you download from again. In that case you should call this
1374 : : // function after each "work item" to ensure good parallelism between them.
1375 : : //
1376 : : // It's worth noting that this function may block if you're over-feeding the
1377 : : // GPU without waiting for existing results to finish.
1378 : : PL_API void pl_gpu_flush(pl_gpu gpu);
1379 : :
1380 : : // This is like `pl_gpu_flush` but also blocks until the GPU is fully idle
1381 : : // before returning. Using this in your rendering loop is seriously disadvised,
1382 : : // and almost never the right solution. The intended use case is for deinit
1383 : : // logic, where users may want to force the all pending GPU operations to
1384 : : // finish so they can clean up their state more easily.
1385 : : //
1386 : : // After this operation is called, it's guaranteed that all pending buffer
1387 : : // operations are complete - i.e. `pl_buf_poll` is guaranteed to return false.
1388 : : // It's also guaranteed that any outstanding timer query results are available.
1389 : : //
1390 : : // Note: If you only care about buffer operations, you can accomplish this more
1391 : : // easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if
1392 : : // you have many buffers it may be more convenient to call this function
1393 : : // instead. The difference is that this function will also affect e.g. renders
1394 : : // to a `pl_swapchain`.
1395 : : PL_API void pl_gpu_finish(pl_gpu gpu);
1396 : :
1397 : : // Returns true if the GPU is considered to be in a "failed" state, which
1398 : : // during normal operation is typically the result of things like the device
1399 : : // being lost (due to e.g. power management).
1400 : : //
1401 : : // If this returns true, users *should* destroy and recreate the `pl_gpu`,
1402 : : // including all associated resources, via the appropriate mechanism.
1403 : : PL_API bool pl_gpu_is_failed(pl_gpu gpu);
1404 : :
1405 : : PL_API_END
1406 : :
1407 : : #endif // LIBPLACEBO_GPU_H_
|