Branch data Line data Source code
1 : : /*
2 : : * This file is part of libplacebo.
3 : : *
4 : : * libplacebo is free software; you can redistribute it and/or
5 : : * modify it under the terms of the GNU Lesser General Public
6 : : * License as published by the Free Software Foundation; either
7 : : * version 2.1 of the License, or (at your option) any later version.
8 : : *
9 : : * libplacebo is distributed in the hope that it will be useful,
10 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : : * GNU Lesser General Public License for more details.
13 : : *
14 : : * You should have received a copy of the GNU Lesser General Public
15 : : * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 : : */
17 : :
18 : : #include <math.h>
19 : : #include "shaders.h"
20 : :
21 : : #include <libplacebo/shaders/dithering.h>
22 : :
23 : : const struct pl_dither_params pl_dither_default_params = { PL_DITHER_DEFAULTS };
24 : :
25 : : struct sh_dither_obj {
26 : : pl_shader_obj lut;
27 : : };
28 : :
29 : 5 : static void sh_dither_uninit(pl_gpu gpu, void *ptr)
30 : : {
31 : : struct sh_dither_obj *obj = ptr;
32 : 5 : pl_shader_obj_destroy(&obj->lut);
33 : 5 : *obj = (struct sh_dither_obj) {0};
34 : 5 : }
35 : :
36 : 12 : static void fill_dither_matrix(void *data, const struct sh_lut_params *params)
37 : : {
38 [ + - + - : 12 : pl_assert(params->width > 0 && params->height > 0 && params->comps == 1);
- + ]
39 : :
40 : 12 : const struct pl_dither_params *dpar = params->priv;
41 [ + + - - ]: 12 : switch (dpar->method) {
42 : 4 : case PL_DITHER_ORDERED_LUT:
43 [ - + ]: 4 : pl_assert(params->width == params->height);
44 : 4 : pl_generate_bayer_matrix(data, params->width);
45 : 4 : return;
46 : :
47 : 8 : case PL_DITHER_BLUE_NOISE:
48 [ - + ]: 8 : pl_assert(params->width == params->height);
49 : 8 : pl_generate_blue_noise(data, params->width);
50 : 8 : return;
51 : :
52 : : case PL_DITHER_ORDERED_FIXED:
53 : : case PL_DITHER_WHITE_NOISE:
54 : : case PL_DITHER_METHOD_COUNT:
55 : : return;
56 : : }
57 : :
58 : 0 : pl_unreachable();
59 : : }
60 : :
61 : 143 : static bool dither_method_is_lut(enum pl_dither_method method)
62 : : {
63 [ + - + ]: 143 : switch (method) {
64 : : case PL_DITHER_BLUE_NOISE:
65 : : case PL_DITHER_ORDERED_LUT:
66 : : return true;
67 : 8 : case PL_DITHER_ORDERED_FIXED:
68 : : case PL_DITHER_WHITE_NOISE:
69 : 8 : return false;
70 : : case PL_DITHER_METHOD_COUNT:
71 : : break;
72 : : }
73 : :
74 : 0 : pl_unreachable();
75 : : }
76 : :
77 : : static inline float approx_gamma(enum pl_color_transfer trc)
78 : : {
79 : : switch (trc) {
80 : : case PL_COLOR_TRC_UNKNOWN: return 1.0f;
81 : : case PL_COLOR_TRC_LINEAR: return 1.0f;
82 : : case PL_COLOR_TRC_PRO_PHOTO:return 1.8f;
83 : : case PL_COLOR_TRC_GAMMA18: return 1.8f;
84 : : case PL_COLOR_TRC_GAMMA20: return 2.0f;
85 : : case PL_COLOR_TRC_GAMMA24: return 2.4f;
86 : : case PL_COLOR_TRC_GAMMA26: return 2.6f;
87 : : case PL_COLOR_TRC_ST428: return 2.6f;
88 : : case PL_COLOR_TRC_GAMMA28: return 2.8f;
89 : :
90 : : case PL_COLOR_TRC_SRGB:
91 : : case PL_COLOR_TRC_BT_1886:
92 : : case PL_COLOR_TRC_GAMMA22:
93 : : return 2.2f;
94 : :
95 : : case PL_COLOR_TRC_PQ:
96 : : case PL_COLOR_TRC_HLG:
97 : : case PL_COLOR_TRC_V_LOG:
98 : : case PL_COLOR_TRC_S_LOG1:
99 : : case PL_COLOR_TRC_S_LOG2:
100 : : return 2.0f; // TODO: handle this better
101 : :
102 : : case PL_COLOR_TRC_COUNT: break;
103 : : }
104 : :
105 : 0 : pl_unreachable();
106 : : }
107 : :
108 : 143 : void pl_shader_dither(pl_shader sh, int new_depth,
109 : : pl_shader_obj *dither_state,
110 : : const struct pl_dither_params *params)
111 : : {
112 [ + - ]: 143 : if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0))
113 : : return;
114 : :
115 [ - + ]: 143 : if (new_depth <= 0 || new_depth > 256) {
116 : 0 : PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth);
117 : 0 : return;
118 : : }
119 : :
120 : 143 : sh_describef(sh, "dithering (%d bits)", new_depth);
121 : 143 : GLSL("// pl_shader_dither \n"
122 : : "{ \n"
123 : : "float bias; \n");
124 : :
125 [ + + ]: 143 : params = PL_DEF(params, &pl_dither_default_params);
126 [ - + ]: 143 : if (params->lut_size < 0 || params->lut_size > 8) {
127 : 0 : SH_FAIL(sh, "Invalid `lut_size` specified: %d", params->lut_size);
128 : 0 : return;
129 : : }
130 : :
131 : 143 : enum pl_dither_method method = params->method;
132 : : ident_t lut = NULL_IDENT;
133 : : int lut_size = 0;
134 : :
135 [ + + ]: 143 : if (dither_method_is_lut(method)) {
136 [ - + ]: 135 : if (!dither_state) {
137 : 0 : PL_WARN(sh, "LUT-based dither method specified but no dither state "
138 : : "object given, falling back to non-LUT based methods.");
139 : 1 : goto fallback;
140 : : }
141 : :
142 : : struct sh_dither_obj *obj;
143 : 135 : obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER,
144 : : struct sh_dither_obj, sh_dither_uninit);
145 [ - + ]: 135 : if (!obj)
146 : 0 : goto fallback;
147 : :
148 : : bool cache = method == PL_DITHER_BLUE_NOISE;
149 [ + - ]: 135 : lut_size = 1 << PL_DEF(params->lut_size, pl_dither_default_params.lut_size);
150 [ + + ]: 135 : lut = sh_lut(sh, sh_lut_params(
151 : : .object = &obj->lut,
152 : : .var_type = PL_VAR_FLOAT,
153 : : .width = lut_size,
154 : : .height = lut_size,
155 : : .comps = 1,
156 : : .fill = fill_dither_matrix,
157 : : .signature = (CACHE_KEY_DITHER ^ method) * lut_size,
158 : : .cache = cache ? SH_CACHE(sh) : NULL,
159 : : .priv = (void *) params,
160 : : ));
161 [ + + ]: 135 : if (!lut)
162 : 1 : goto fallback;
163 : : }
164 : :
165 : 142 : goto done;
166 : :
167 : : fallback:
168 : : method = PL_DITHER_ORDERED_FIXED;
169 : : // fall through
170 : :
171 : 143 : done: ;
172 : :
173 : : int size = 0;
174 [ + + ]: 143 : if (lut) {
175 : : size = lut_size;
176 [ + + ]: 9 : } else if (method == PL_DITHER_ORDERED_FIXED) {
177 : : size = 16; // hard-coded size
178 : : }
179 : :
180 [ + - ]: 134 : if (size) {
181 : : // Transform the screen position to the cyclic range [0,1)
182 : 139 : GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/"$"); \n", SH_FLOAT(size));
183 : :
184 [ + + ]: 139 : if (params->temporal) {
185 : 4 : int phase = SH_PARAMS(sh).index % 8;
186 : 4 : float r = phase * (M_PI / 2); // rotate
187 [ + - ]: 4 : float m = phase < 4 ? 1 : -1; // mirror
188 : 4 : float mat[2][2] = {
189 : 4 : {cos(r), -sin(r) },
190 : 4 : {sin(r) * m, cos(r) * m},
191 : : };
192 : :
193 : 4 : ident_t rot = sh_var(sh, (struct pl_shader_var) {
194 : 4 : .var = pl_var_mat2("dither_rot"),
195 : : .data = &mat[0][0],
196 : : .dynamic = true,
197 : : });
198 : 4 : GLSL("pos = fract("$" * pos + vec2(1.0));\n", rot);
199 : : }
200 : : }
201 : :
202 [ + + + - : 143 : switch (method) {
- ]
203 : 4 : case PL_DITHER_WHITE_NOISE: {
204 : 4 : ident_t prng = sh_prng(sh, params->temporal, NULL);
205 : 4 : GLSL("bias = "$".x;\n", prng);
206 : : break;
207 : : }
208 : :
209 : : case PL_DITHER_ORDERED_FIXED:
210 : : // Bitwise ordered dither using only 32-bit uints
211 : 5 : GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u; \n"
212 : : // Bitwise merge (morton number)
213 : : "xy.x = xy.x ^ xy.y; \n"
214 : : "xy = (xy | xy << 2) & uvec2(0x33333333); \n"
215 : : "xy = (xy | xy << 1) & uvec2(0x55555555); \n"
216 : : // Bitwise inversion
217 : : "uint b = xy.x + (xy.y << 1); \n"
218 : : "b = (b * 0x0802u & 0x22110u) | \n"
219 : : " (b * 0x8020u & 0x88440u); \n"
220 : : "b = 0x10101u * b; \n"
221 : : "b = (b >> 16) & 0xFFu; \n"
222 : : // Generate bias value
223 : : "bias = float(b) * 1.0/256.0; \n");
224 : 5 : break;
225 : :
226 : 134 : case PL_DITHER_BLUE_NOISE:
227 : : case PL_DITHER_ORDERED_LUT:
228 [ - + ]: 134 : pl_assert(lut);
229 : 134 : GLSL("bias = "$"(ivec2(pos * "$"));\n", lut, SH_FLOAT(lut_size));
230 : 134 : break;
231 : :
232 : : case PL_DITHER_METHOD_COUNT:
233 : 0 : pl_unreachable();
234 : : }
235 : :
236 : : // Scale factor for dither rounding
237 : 143 : GLSL("const float scale = %llu.0; \n", (1LLU << new_depth) - 1);
238 : :
239 [ + - ]: 143 : const float gamma = approx_gamma(params->transfer);
240 [ + + ]: 143 : if (gamma != 1.0f && new_depth <= 4) {
241 : 28 : GLSL("const float gamma = "$"; \n"
242 : : "vec4 color_lin = pow(color, vec4(gamma)); \n",
243 : : SH_FLOAT(gamma));
244 : :
245 [ - + ]: 28 : if (new_depth == 1) {
246 : : // Special case for bit depth 1 dithering, in this case we can just
247 : : // ignore the low/high rounding because we know we are always
248 : : // dithering between 0.0 and 1.0.
249 : 0 : GLSL("const vec4 low = vec4(0.0); \n"
250 : : "const vec4 high = vec4(1.0); \n"
251 : : "vec4 offset = color_lin; \n");
252 : : } else {
253 : : // Linearize the low, high and current color values
254 : 28 : GLSL("vec4 low = floor(color * scale) / scale; \n"
255 : : "vec4 high = ceil(color * scale) / scale; \n"
256 : : "vec4 low_lin = pow(low, vec4(gamma)); \n"
257 : : "vec4 high_lin = pow(high, vec4(gamma)); \n"
258 : : "vec4 range = high_lin - low_lin; \n"
259 : : "vec4 offset = (color_lin - low_lin) / \n"
260 : : " max(range, 1e-6); \n");
261 : : }
262 : :
263 : : // Mix in the correct ratio corresponding to the offset and bias
264 : 28 : GLSL("color = mix(low, high, greaterThan(offset, vec4(bias))); \n");
265 : : } else {
266 : : // Approximate each gamma segment as a straight line, this simplifies
267 : : // the process of dithering down to a single scale and (biased) round.
268 : 115 : GLSL("color = scale * color + vec4(bias); \n"
269 : : "color = floor(color) * (1.0 / scale); \n");
270 : : }
271 : :
272 : 143 : GLSL("} \n");
273 : : }
274 : :
275 : : /* Error diffusion code is taken from mpv, original copyright (c) 2019 Bin Jin
276 : : *
277 : : * mpv is free software; you can redistribute it and/or
278 : : * modify it under the terms of the GNU Lesser General Public
279 : : * License as published by the Free Software Foundation; either
280 : : * version 2.1 of the License, or (at your option) any later version.
281 : : *
282 : : * mpv is distributed in the hope that it will be useful,
283 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
284 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
285 : : * GNU Lesser General Public License for more details.
286 : : *
287 : : * You should have received a copy of the GNU Lesser General Public
288 : : * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
289 : : */
290 : :
291 : : // After a (y, x) -> (y, x + y * shift) mapping, find the right most column that
292 : : // will be affected by the current column.
293 : 20 : static int compute_rightmost_shifted_column(const struct pl_error_diffusion_kernel *k)
294 : : {
295 : : int ret = 0;
296 [ + + ]: 80 : for (int y = 0; y <= PL_EDF_MAX_DY; y++) {
297 [ + + ]: 360 : for (int x = PL_EDF_MIN_DX; x <= PL_EDF_MAX_DX; x++) {
298 [ + + ]: 300 : if (k->pattern[y][x - PL_EDF_MIN_DX] != 0) {
299 : 132 : int shifted_x = x + y * k->shift;
300 : :
301 : : // The shift mapping guarantees current column (or left of it)
302 : : // won't be affected by error diffusion.
303 [ - + ]: 132 : assert(shifted_x > 0);
304 : :
305 : 132 : ret = PL_MAX(ret, shifted_x);
306 : : }
307 : : }
308 : : }
309 : 20 : return ret;
310 : : }
311 : :
312 : 0 : size_t pl_error_diffusion_shmem_req(const struct pl_error_diffusion_kernel *kernel,
313 : : int height)
314 : : {
315 : : // We add PL_EDF_MAX_DY empty lines on the bottom to handle errors
316 : : // propagated out from bottom side.
317 : 0 : int rows = height + PL_EDF_MAX_DY;
318 : 0 : int shifted_columns = compute_rightmost_shifted_column(kernel) + 1;
319 : :
320 : : // The shared memory is an array of size rows*shifted_columns. Each element
321 : : // is a single uint for three RGB component.
322 : 0 : return rows * shifted_columns * sizeof(uint32_t);
323 : : }
324 : :
325 : 20 : bool pl_shader_error_diffusion(pl_shader sh, const struct pl_error_diffusion_params *params)
326 : : {
327 : 20 : const int width = params->input_tex->params.w, height = params->input_tex->params.h;
328 : 20 : const struct pl_glsl_version glsl = sh_glsl(sh);
329 : : const struct pl_error_diffusion_kernel *kernel =
330 [ + - ]: 20 : PL_DEF(params->kernel, &pl_error_diffusion_sierra_lite);
331 : :
332 [ - + ]: 20 : pl_assert(params->output_tex->params.w == width);
333 [ - + ]: 20 : pl_assert(params->output_tex->params.h == height);
334 [ - + ]: 20 : if (!sh_require(sh, PL_SHADER_SIG_NONE, width, height))
335 : : return false;
336 : :
337 [ - + ]: 20 : if (params->new_depth <= 0 || params->new_depth > 256) {
338 : 0 : PL_WARN(sh, "Invalid dither depth: %d.. ignoring", params->new_depth);
339 : 0 : return false;
340 : : }
341 : :
342 : : // The parallel error diffusion works by applying the shift mapping first.
343 : : // Taking the Floyd and Steinberg algorithm for example. After applying
344 : : // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are
345 : : // propagated into the next few columns, which makes parallel processing on
346 : : // the same column possible.
347 : : //
348 : : // X 7/16 X 7/16
349 : : // 3/16 5/16 1/16 ==> 0 0 3/16 5/16 1/16
350 : :
351 : : // Figuring out the size of rectangle containing all shifted pixels.
352 : : // The rectangle height is not changed.
353 : 20 : int shifted_width = width + (height - 1) * kernel->shift;
354 : :
355 : : // We process all pixels from the shifted rectangles column by column, with
356 : : // a single global work group of size |block_size|.
357 : : // Figuring out how many block are required to process all pixels. We need
358 : : // this explicitly to make the number of barrier() calls match.
359 : 20 : int block_size = PL_MIN(glsl.max_group_threads, height);
360 : 20 : int blocks = PL_DIV_UP(height * shifted_width, block_size);
361 : :
362 : : // If we figure out how many of the next columns will be affected while the
363 : : // current columns is being processed. We can store errors of only a few
364 : : // columns in the shared memory. Using a ring buffer will further save the
365 : : // cost while iterating to next column.
366 : : //
367 : 20 : int ring_buffer_rows = height + PL_EDF_MAX_DY;
368 : 20 : int ring_buffer_columns = compute_rightmost_shifted_column(kernel) + 1;
369 : 20 : ident_t ring_buffer_size = sh_const(sh, (struct pl_shader_const) {
370 : : .type = PL_VAR_UINT,
371 : : .name = "ring_buffer_size",
372 : 20 : .data = &(unsigned) { ring_buffer_rows * ring_buffer_columns },
373 : : .compile_time = true,
374 : : });
375 : :
376 : : // Compute shared memory requirements and try enabling compute shader.
377 : 20 : size_t shmem_req = ring_buffer_rows * ring_buffer_columns * sizeof(uint32_t);
378 [ - + ]: 20 : if (!sh_try_compute(sh, block_size, 1, false, shmem_req)) {
379 : 0 : PL_ERR(sh, "Cannot execute error diffusion kernel: too old GPU or "
380 : : "insufficient compute shader memory!");
381 : 0 : return false;
382 : : }
383 : :
384 : 20 : ident_t in_tex = sh_desc(sh, (struct pl_shader_desc) {
385 : 20 : .binding.object = params->input_tex,
386 : : .desc = {
387 : : .name = "input_tex",
388 : : .type = PL_DESC_SAMPLED_TEX,
389 : : },
390 : : });
391 : :
392 : 20 : ident_t out_img = sh_desc(sh, (struct pl_shader_desc) {
393 : 20 : .binding.object = params->output_tex,
394 : : .desc = {
395 : : .name = "output_tex",
396 : : .type = PL_DESC_STORAGE_IMG,
397 : : .access = PL_DESC_ACCESS_WRITEONLY,
398 : : },
399 : : });
400 : :
401 : 20 : sh->output = PL_SHADER_SIG_NONE;
402 : 20 : sh_describef(sh, "error diffusion (%s, %d bits)",
403 : 20 : kernel->name, params->new_depth);
404 : :
405 : : // Defines the ring buffer in shared memory.
406 : 20 : GLSLH("shared uint err_rgb8["$"]; \n", ring_buffer_size);
407 : 20 : GLSL("// pl_shader_error_diffusion \n"
408 : : // Safeguard against accidental over-execution
409 : : "if (gl_WorkGroupID != uvec3(0)) \n"
410 : : " return; \n"
411 : : // Initialize the ring buffer.
412 : : "for (uint i = gl_LocalInvocationIndex; i < "$"; i+=gl_WorkGroupSize.x)\n"
413 : : " err_rgb8[i] = 0u; \n"
414 : :
415 : : // Main block loop, add barrier here to have previous block all
416 : : // processed before starting the processing of the next.
417 : : "for (uint block_id = 0; block_id < "$"; block_id++) { \n"
418 : : "barrier(); \n"
419 : : // Compute the coordinate of the pixel we are currently processing,
420 : : // both before and after the shift mapping.
421 : : "uint id = block_id * gl_WorkGroupSize.x + gl_LocalInvocationIndex; \n"
422 : : "const uint height = "$"; \n"
423 : : "int y = int(id %% height), x_shifted = int(id / height); \n"
424 : : "int x = x_shifted - y * %d; \n"
425 : : // Proceed only if we are processing a valid pixel.
426 : : "if (x >= 0 && x < "$") { \n"
427 : : // The index that the current pixel have on the ring buffer.
428 : : "uint idx = uint(x_shifted * "$" + y) %% "$"; \n"
429 : : // Fetch the current pixel.
430 : : "vec4 pix_orig = texelFetch("$", ivec2(x, y), 0); \n"
431 : : "vec3 pix = pix_orig.rgb; \n",
432 : : ring_buffer_size,
433 : : SH_UINT(blocks),
434 : : SH_UINT(height),
435 : : kernel->shift,
436 : : SH_INT(width),
437 : : SH_INT(ring_buffer_rows),
438 : : ring_buffer_size,
439 : : in_tex);
440 : :
441 : : // The dithering will quantize pixel value into multiples of 1/dither_quant.
442 : 20 : int dither_quant = (1 << params->new_depth) - 1;
443 : :
444 : : // We encode errors in RGB components into a single 32-bit unsigned integer.
445 : : // The error we propagate from the current pixel is in range of
446 : : // [-0.5 / dither_quant, 0.5 / dither_quant]. While not quite obvious, the
447 : : // sum of all errors been propagated into a pixel is also in the same range.
448 : : // It's possible to map errors in this range into [-127, 127], and use an
449 : : // unsigned 8-bit integer to store it (using standard two's complement).
450 : : // The three 8-bit unsigned integers can then be encoded into a single
451 : : // 32-bit unsigned integer, with two 4-bit padding to prevent addition
452 : : // operation overflows affecting other component. There are at most 12
453 : : // addition operations on each pixel, so 4-bit padding should be enough.
454 : : // The overflow from R component will be discarded.
455 : : //
456 : : // The following figure is how the encoding looks like.
457 : : //
458 : : // +------------------------------------+
459 : : // |RRRRRRRR|0000|GGGGGGGG|0000|BBBBBBBB|
460 : : // +------------------------------------+
461 : : //
462 : :
463 : : // The bitshift position for R and G component.
464 : : const int bitshift_r = 24, bitshift_g = 12;
465 : : // The multiplier we use to map [-0.5, 0.5] to [-127, 127].
466 : : const int uint8_mul = 127 * 2;
467 : :
468 : 20 : GLSL(// Add the error previously propagated into current pixel, and clear
469 : : // it in the ring buffer.
470 : : "uint err_u32 = err_rgb8[idx] + %uu; \n"
471 : : "pix = pix * %d.0 + vec3(int((err_u32 >> %d) & 0xFFu) - 128, \n"
472 : : " int((err_u32 >> %d) & 0xFFu) - 128, \n"
473 : : " int( err_u32 & 0xFFu) - 128) / %d.0; \n"
474 : : "err_rgb8[idx] = 0u; \n"
475 : : // Write the dithered pixel.
476 : : "vec3 dithered = round(pix); \n"
477 : : "imageStore("$", ivec2(x, y), vec4(dithered / %d.0, pix_orig.a)); \n"
478 : : // Prepare for error propagation pass
479 : : "vec3 err_divided = (pix - dithered) * %d.0 / %d.0; \n"
480 : : "ivec3 tmp; \n",
481 : : (128u << bitshift_r) | (128u << bitshift_g) | 128u,
482 : : dither_quant, bitshift_r, bitshift_g, uint8_mul,
483 : : out_img, dither_quant,
484 : : uint8_mul, kernel->divisor);
485 : :
486 : : // Group error propagation with same weight factor together, in order to
487 : : // reduce the number of annoying error encoding.
488 [ + + ]: 436 : for (int dividend = 1; dividend <= kernel->divisor; dividend++) {
489 : : bool err_assigned = false;
490 : :
491 [ + + ]: 1664 : for (int y = 0; y <= PL_EDF_MAX_DY; y++) {
492 [ + + ]: 7488 : for (int x = PL_EDF_MIN_DX; x <= PL_EDF_MAX_DX; x++) {
493 [ + + ]: 6240 : if (kernel->pattern[y][x - PL_EDF_MIN_DX] != dividend)
494 : 6108 : continue;
495 : :
496 [ + + ]: 132 : if (!err_assigned) {
497 : : err_assigned = true;
498 : :
499 : 58 : GLSL("tmp = ivec3(round(err_divided * %d.0)); \n"
500 : : "err_u32 = (uint(tmp.r & 0xFF) << %d) | \n"
501 : : " (uint(tmp.g & 0xFF) << %d) | \n"
502 : : " uint(tmp.b & 0xFF); \n",
503 : : dividend,
504 : : bitshift_r, bitshift_g);
505 : : }
506 : :
507 : 132 : int shifted_x = x + y * kernel->shift;
508 : :
509 : : // Unlike the right border, errors propagated out from left
510 : : // border will remain in the ring buffer. This will produce
511 : : // visible artifacts near the left border, especially for
512 : : // shift=3 kernels.
513 [ + + ]: 132 : if (x < 0)
514 : 36 : GLSL("if (x >= %d) \n", -x);
515 : :
516 : : // Calculate the new position in the ring buffer to propagate
517 : : // the error into.
518 : 132 : int ring_buffer_delta = shifted_x * ring_buffer_rows + y;
519 : 132 : GLSL("atomicAdd(err_rgb8[(idx + %du) %% "$"], err_u32); \n",
520 : : ring_buffer_delta, ring_buffer_size);
521 : : }
522 : : }
523 : : }
524 : :
525 : 20 : GLSL("}} \n"); // end of main loop + valid pixel conditional
526 : 20 : return true;
527 : : }
|