Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
diffuse.c
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2021-2023, 2025-2026 Aurélien PIERRE.
4 Copyright (C) 2021 Chris Elston.
5 Copyright (C) 2021 Hubert Kowalski.
6 Copyright (C) 2021 luzpaz.
7 Copyright (C) 2021-2022 Pascal Obry.
8 Copyright (C) 2021-2022 quovadit.
9 Copyright (C) 2021 Ralf Brown.
10 Copyright (C) 2021-2022 Sakari Kapanen.
11 Copyright (C) 2021 Victor Forsiuk.
12 Copyright (C) 2022 Diederik Ter Rahe.
13 Copyright (C) 2022 Hanno Schwalm.
14 Copyright (C) 2022 Martin Bařinka.
15 Copyright (C) 2022 Philipp Lutz.
16 Copyright (C) 2023, 2025 Guillaume Stutin.
17 Copyright (C) 2023 Luca Zulberti.
18 Copyright (C) 2024 Alynx Zhou.
19
20 Ansel is free software: you can redistribute it and/or modify
21 it under the terms of the GNU General Public License as published by
22 the Free Software Foundation, either version 3 of the License, or
23 (at your option) any later version.
24
25 Ansel is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 GNU General Public License for more details.
29
30 You should have received a copy of the GNU General Public License
31 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
32*/
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37#include "bauhaus/bauhaus.h"
38#include "common/bspline.h"
39#include "common/darktable.h"
40#include "common/dwt.h"
41#include "common/gaussian.h"
42#include "common/image.h"
43#include "common/imagebuf.h"
44#include "common/iop_profile.h"
45#include "common/opencl.h"
46#include "control/control.h"
47#include "develop/develop.h"
48#include "develop/imageop_gui.h"
52#include "develop/tiling.h"
53#include "dtgtk/button.h"
54#include "dtgtk/drawingarea.h"
55#include "dtgtk/expander.h"
56#include "dtgtk/paint.h"
57
58#include "gui/gtk.h"
59#include "gui/presets.h"
60#include "iop/iop_api.h"
61
62// Set to one to output intermediate image steps as PFM in /tmp
63#define DEBUG_DUMP_PFM 0
64
65// Diffuse v3 adds a new parameter that allows more aggressive "sharpening"
66// (and mathematically-accurate multiscale handling...)
67// on coarse scales, ensuring each HF details band is normalized to the same
68// energy. This makes the `radius span` parameter much more impactful.
69#define DIFFUSE_V3 0
70
71#if DIFFUSE_V3
73#else
75#endif
76
77#define MAX_NUM_SCALES 10
79{
80 // global parameters
81 int iterations; // $MIN: 0 $MAX: 500 $DEFAULT: 1 $DESCRIPTION: "iterations"
82 float sharpness; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "sharpness"
83 int radius; // $MIN: 0 $MAX: 2048 $DEFAULT: 8 $DESCRIPTION: "radius span"
84 float regularization; // $MIN: 0. $MAX: 6. $DEFAULT: 0. $DESCRIPTION: "edge sensitivity"
85 float variance_threshold; // $MIN: -2. $MAX: 2. $DEFAULT: 0. $DESCRIPTION: "edge threshold"
86
87 float anisotropy_first; // $MIN: -10. $MAX: 10. $DEFAULT: 0. $DESCRIPTION: "1st order anisotropy"
88 float anisotropy_second; // $MIN: -10. $MAX: 10. $DEFAULT: 0. $DESCRIPTION: "2nd order anisotropy"
89 float anisotropy_third; // $MIN: -10. $MAX: 10. $DEFAULT: 0. $DESCRIPTION: "3rd order anisotropy"
90 float anisotropy_fourth; // $MIN: -10. $MAX: 10. $DEFAULT: 0. $DESCRIPTION: "4th order anisotropy"
91
92 float threshold; // $MIN: 0. $MAX: 8. $DEFAULT: 0. $DESCRIPTION: "luminance masking threshold"
93
94 float first; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "1st order speed"
95 float second; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "2nd order speed"
96 float third; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "3rd order speed"
97 float fourth; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "4th order speed"
98
99 // v2
100 int radius_center; // $MIN: 0 $MAX: 1024 $DEFAULT: 0 $DESCRIPTION: "central radius"
101
102 // new versions add params mandatorily at the end, so we can memcpy old parameters at the beginning
103
104 // v3 : Ansel 1.0
105 // bool normalize_band_energy; // $DEFAULT: FALSE $DESCRIPTION: "normalize coarse scales"
106 // When disabled, this will boost coarse scale sharpening by a lot.
107 // There is no reason to enable it for new edits,
108 // it's there to keep compatiblity with old edits.
109
111
112
118
131
132
133// only copy params struct to avoid a commit_params()
135
138{
139 memcpy(piece->data, params, self->params_size);
140 piece->cache_output_on_ram = TRUE;
141}
142
143
144typedef enum dt_isotropy_t
145{
146 DT_ISOTROPY_ISOTROPE = 0, // diffuse in all directions with same intensity
147 DT_ISOTROPY_ISOPHOTE = 1, // diffuse more in the isophote direction (orthogonal to gradient)
148 DT_ISOTROPY_GRADIENT = 2 // diffuse more in the gradient direction
150
151
153static inline dt_isotropy_t check_isotropy_mode(const float anisotropy)
154{
155 // user param is negative, positive or zero. The sign encodes the direction of diffusion, the magnitude encodes the ratio of anisotropy
156 // ultimately, the anisotropy factor needs to be positive before going into the exponential
157 if(anisotropy == 0.f)
159 else if(anisotropy > 0.f)
161 else
162 return DT_ISOTROPY_GRADIENT; // if(anisotropy > 0.f)
163}
164
165
166const char *name()
167{
168 return _("diffuse or _sharpen");
169}
170
171const char *aliases()
172{
173 return _("diffusion|deconvolution|blur|sharpening");
174}
175
176const char **description(struct dt_iop_module_t *self)
177{
178 return dt_iop_set_description(self,
179 _("simulate directional diffusion of light with heat transfer model\n"
180 "to apply an iterative edge-oriented blur,\n"
181 "inpaint damaged parts of the image,"
182 "or to remove blur with blind deconvolution."),
183 _("corrective and creative"),
184 _("linear, RGB, scene-referred"),
185 _("linear, RGB"),
186 _("linear, RGB, scene-referred"));
187}
188
190{
191 return IOP_GROUP_SHARPNESS;
192}
193
198
200{
201 return IOP_CS_RGB;
202}
203
204int legacy_params(dt_iop_module_t *self, const void *const old_params, const int old_version, void *new_params,
205 const int new_version)
206{
207 if(old_version == 1 && new_version == 2)
208 {
209 typedef struct dt_iop_diffuse_params_v1_t
210 {
211 // global parameters
212 int iterations;
213 float sharpness;
214 int radius;
215 float regularization;
216 float variance_threshold;
217
218 float anisotropy_first;
219 float anisotropy_second;
220 float anisotropy_third;
221 float anisotropy_fourth;
222
223 float threshold;
224
225 float first;
226 float second;
227 float third;
228 float fourth;
229 } dt_iop_diffuse_params_v1_t;
230
231 dt_iop_diffuse_params_v1_t *o = (dt_iop_diffuse_params_v1_t *)old_params;
234
235 *n = *d; // start with a fresh copy of default parameters
236
237 // copy common parameters
238 memcpy(n, o, sizeof(dt_iop_diffuse_params_v1_t));
239
240 // init only new parameters
241 n->radius_center = 0;
242
243#if !DIFFUSE_V3
244 // When version 3 will be out, we need to handle v1 -> v2 -> v3 conversion,
245 // so don't return just yet.
246 return 0;
247#endif
248 }
249
250#if DIFFUSE_V3
251 if(old_version == 2 && new_version == 3)
252 {
253 typedef struct dt_iop_diffuse_params_v2_t
254 {
255 // global parameters
256 int iterations; // $MIN: 0 $MAX: 500 $DEFAULT: 1 $DESCRIPTION: "iterations"
257 float sharpness; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "sharpness"
258 int radius; // $MIN: 0 $MAX: 2048 $DEFAULT: 8 $DESCRIPTION: "radius span"
259 float regularization; // $MIN: 0. $MAX: 8. $DEFAULT: 0. $DESCRIPTION: "edge sensitivity"
260 float variance_threshold; // $MIN: -3. $MAX: 3. $DEFAULT: 0. $DESCRIPTION: "edge threshold"
261
262 float anisotropy_first; // $MIN: -100. $MAX: 100. $DEFAULT: 0. $DESCRIPTION: "1st order anisotropy"
263 float anisotropy_second; // $MIN: -100. $MAX: 100. $DEFAULT: 0. $DESCRIPTION: "2nd order anisotropy"
264 float anisotropy_third; // $MIN: -100. $MAX: 100. $DEFAULT: 0. $DESCRIPTION: "3rd order anisotropy"
265 float anisotropy_fourth; // $MIN: -100. $MAX: 100. $DEFAULT: 0. $DESCRIPTION: "4th order anisotropy"
266
267 float threshold; // $MIN: 0. $MAX: 8. $DEFAULT: 0. $DESCRIPTION: "luminance masking threshold"
268
269 float first; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "1st order speed"
270 float second; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "2nd order speed"
271 float third; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "3rd order speed"
272 float fourth; // $MIN: -1. $MAX: 1. $DEFAULT: 0. $DESCRIPTION: "4th order speed"
273
274 // v2
275 int radius_center; // $MIN: 0 $MAX: 1024 $DEFAULT: 0 $DESCRIPTION: "central radius"
276
277 } dt_iop_diffuse_params_v2_t;
278
279 dt_iop_diffuse_params_v2_t *o = (dt_iop_diffuse_params_v2_t *)old_params;
282
283 *n = *d; // start with a fresh copy of default parameters
284
285 // copy common parameters
286 memcpy(n, o, sizeof(dt_iop_diffuse_params_v2_t));
287
288 // init only new parameters
289 n->normalize_band_energy = 1; // legacy compatiblity
290
291 return 0;
292 }
293#endif
294
295 return 1;
296}
297
299{
301 memset(&p, 0, sizeof(p));
302 p.radius_center = 0;
303
304 // deblurring presets
305 p.sharpness = 0.0f;
306 p.threshold = 0.0f;
307 p.variance_threshold = +0.f;
308 p.regularization = 1.f;
309
310 p.anisotropy_first = +2.f;
311 p.anisotropy_second = 0.f;
312 p.anisotropy_third = +2.f;
313 p.anisotropy_fourth = 0.f;
314
315 p.first = -0.25f;
316 p.second = +0.125f;
317 p.third = -0.125f;
318 p.fourth = +0.0625f;
319
320 p.radius = 8;
321 p.iterations = 8;
322 dt_gui_presets_add_generic(_("lens deblur: soft"), self->op, self->version(), &p, sizeof(p), 1,
324
325 p.radius = 10;
326 p.iterations = 16;
327 dt_gui_presets_add_generic(_("lens deblur: medium"), self->op, self->version(), &p, sizeof(p), 1,
329
330 p.radius = 12;
331 p.iterations = 24;
332 dt_gui_presets_add_generic(_("lens deblur: hard"), self->op, self->version(), &p, sizeof(p), 1,
334
335 p.iterations = 10;
336 p.radius = 512;
337 p.sharpness = 0.f;
338 p.variance_threshold = 0.f;
339 p.regularization = 2.5f;
340
341 p.first = -0.20f;
342 p.second = +0.10f;
343 p.third = -0.20f;
344 p.fourth = +0.10f;
345
346 p.anisotropy_first = 2.f;
347 p.anisotropy_second = 0.f;
348 p.anisotropy_third = 2.f;
349 p.anisotropy_fourth = 0.f;
350
351 dt_gui_presets_add_generic(_("dehaze"), self->op, self->version(), &p, sizeof(p), 1,
353
354 p.iterations = 32;
355 p.sharpness = 0.f;
356 p.threshold = 0.f;
357 p.variance_threshold = -0.f;
358 p.regularization = 4.f;
359
360 p.anisotropy_first = +2.f;
361 p.anisotropy_second = 0.f;
362 p.anisotropy_third = +2.f;
363 p.anisotropy_fourth = 0.f;
364
365 p.radius = 1;
366 p.radius_center = 2;
367
368 p.first = +0.06f;
369 p.second = 0.f;
370 p.third = +0.06f;
371 p.fourth = 0.f;
372 dt_gui_presets_add_generic(_("denoise: fine"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
373
374 p.radius = 3;
375 p.radius_center = 4;
376
377 p.first = +0.05f;
378 p.second = 0.f;
379 p.third = +0.05f;
380 p.fourth = 0.f;
381 dt_gui_presets_add_generic(_("denoise: medium"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
382
383 p.radius = 6;
384 p.radius_center = 8;
385
386 p.first = +0.04f;
387 p.second = 0.f;
388 p.third = +0.04f;
389 p.fourth = 0.f;
390 dt_gui_presets_add_generic(_("denoise: coarse"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
391
392 p.radius_center = 0;
393
394 p.iterations = 2;
395 p.radius = 32;
396 p.sharpness = 0.0f;
397 p.threshold = 0.0f;
398 p.variance_threshold = 0.f;
399 p.regularization = 4.f;
400
401 p.anisotropy_first = +4.f;
402 p.anisotropy_second = +4.f;
403 p.anisotropy_third = +4.f;
404 p.anisotropy_fourth = +4.f;
405
406 p.first = +1.f;
407 p.second = +1.f;
408 p.third = +1.f;
409 p.fourth = +1.f;
410 dt_gui_presets_add_generic(_("surface blur"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
411
412 p.iterations = 1;
413 p.radius = 32;
414 p.sharpness = 0.0f;
415 p.threshold = 0.0f;
416 p.variance_threshold = 0.f;
417 p.regularization = 0.f;
418
419 p.anisotropy_first = 0.f;
420 p.anisotropy_second = 0.f;
421 p.anisotropy_third = 0.f;
422 p.anisotropy_fourth = 0.f;
423
424 p.first = +0.5f;
425 p.second = +0.5f;
426 p.third = +0.5f;
427 p.fourth = +0.5f;
428 dt_gui_presets_add_generic(_("bloom"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
429
430 p.iterations = 1;
431 p.radius = 4;
432 p.sharpness = 0.0f;
433 p.threshold = 0.0f;
434 p.variance_threshold = 0.f;
435 p.regularization = 1.f;
436
437 p.anisotropy_first = +1.f;
438 p.anisotropy_second = +1.f;
439 p.anisotropy_third = +1.f;
440 p.anisotropy_fourth = +1.f;
441
442 p.first = -0.25f;
443 p.second = -0.25f;
444 p.third = -0.25f;
445 p.fourth = -0.25f;
446 dt_gui_presets_add_generic(_("sharpen demosaicing (no AA filter)"), self->op, self->version(), &p, sizeof(p), 1,
448
449 p.radius = 8;
450 dt_gui_presets_add_generic(_("sharpen demosaicing (AA filter)"), self->op, self->version(), &p, sizeof(p), 1,
452
453 p.iterations = 4;
454 p.radius = 64;
455 p.sharpness = 0.0f;
456 p.threshold = 0.0f;
457 p.variance_threshold = 0.f;
458 p.regularization = 2.f;
459
460 p.anisotropy_first = 0.f;
461 p.anisotropy_second = 0.f;
462 p.anisotropy_third = +4.f;
463 p.anisotropy_fourth = +4.f;
464
465 p.first = 0.f;
466 p.second = 0.f;
467 p.third = +0.5f;
468 p.fourth = +0.5f;
469 dt_gui_presets_add_generic(_("simulate watercolor"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
470
471 p.iterations = 50;
472 p.radius = 64;
473 p.sharpness = 0.0f;
474 p.threshold = 0.0f;
475 p.variance_threshold = 0.f;
476 p.regularization = 4.f;
477
478 p.anisotropy_first = -5.f;
479 p.anisotropy_second = -5.f;
480 p.anisotropy_third = -5.f;
481 p.anisotropy_fourth = -5.f;
482
483 p.first = -1.f;
484 p.second = -1.f;
485 p.third = -1.f;
486 p.fourth = -1.f;
487 dt_gui_presets_add_generic(_("simulate line drawing"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
488
489 // local contrast
490 p.sharpness = 0.0f;
491 p.threshold = 0.0f;
492 p.variance_threshold = 0.f;
493
494 p.anisotropy_first = -2.5f;
495 p.anisotropy_second = 0.f;
496 p.anisotropy_third = 0.f;
497 p.anisotropy_fourth = -2.5f;
498
499 p.first = -0.50f;
500 p.second = 0.f;
501 p.third = 0.f;
502 p.fourth = -0.50f;
503
504 p.iterations = 10;
505 p.radius = 333;
506 p.radius_center = 512;
507 p.regularization = 0.1f;
508 dt_gui_presets_add_generic(_("add local contrast"), self->op, self->version(), &p, sizeof(p), 1,
510
511 p.iterations = 32;
512 p.radius = 4;
513 p.radius_center = 0;
514 p.sharpness = 0.0f;
515 p.threshold = 1.41f;
516 p.variance_threshold = 0.f;
517 p.regularization = 0.f;
518
519 p.anisotropy_first = +0.f;
520 p.anisotropy_second = +0.f;
521 p.anisotropy_third = +0.f;
522 p.anisotropy_fourth = +2.f;
523
524 p.first = +0.0f;
525 p.second = +0.0f;
526 p.third = +0.0f;
527 p.fourth = +0.5f;
528 dt_gui_presets_add_generic(_("inpaint highlights"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);
529
530 // fast presets for slow hardware
531 p.radius_center = 0;
532 p.radius = 128;
533 p.sharpness = 0.0f;
534 p.threshold = 0.0f;
535 p.variance_threshold = 0.f;
536 p.regularization = 0.f;
537
538 p.anisotropy_first = 0.f;
539 p.anisotropy_second = 0.f;
540 p.anisotropy_third = 5.f;
541 p.anisotropy_fourth = 0.f;
542
543 p.first = 0.f;
544 p.second = 0.f;
545 p.third = -0.50f;
546 p.fourth = 0.f;
547
548 p.iterations = 1;
549 dt_gui_presets_add_generic(_("fast sharpness"), self->op, self->version(), &p, sizeof(p), 1,
551
552 p.radius_center = 512;
553 p.radius = 512;
554 p.sharpness = 0.0f;
555 p.threshold = 0.0f;
556 p.variance_threshold = 0.f;
557 p.regularization = 0.f;
558
559
560 p.anisotropy_first = 0.f;
561 p.anisotropy_second = 0.f;
562 p.anisotropy_third = 5.f;
563 p.anisotropy_fourth = 0.f;
564
565 p.first = 0.f;
566 p.second = 0.f;
567 p.third = -0.50f;
568 p.fourth = 0.f;
569
570 p.iterations = 1;
571 dt_gui_presets_add_generic(_("fast local contrast"), self->op, self->version(), &p, sizeof(p), 1,
573}
574
575void tiling_callback(struct dt_iop_module_t *self, const struct dt_dev_pixelpipe_t *pipe, const struct dt_dev_pixelpipe_iop_t *piece, struct dt_develop_tiling_t *tiling)
576{
577 const dt_iop_roi_t *const roi_in = &piece->roi_in;
579
580 const float zoom = dt_dev_get_module_scale(pipe, roi_in);
581 const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;
582 const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);
583 const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);
584 const int max_filter_radius = (1 << scales);
585
586 // Account for the exact full-frame buffers kept alive by the CPU/OpenCL paths:
587 // one borrowed input, one output, two temp ping-pong buffers, two low-pass ping-pong
588 // buffers, one stored detail buffer per wavelet scale, and one 8-bit mask.
589 tiling->factor = 6.0625f + scales;
590 tiling->factor_cl = 6.0625f + scales;
591
592 tiling->maxbuf = 1.0f;
593 tiling->maxbuf_cl = 1.0f;
594 tiling->overhead = 0;
595 tiling->overlap = max_filter_radius;
596 tiling->xalign = 1;
597 tiling->yalign = 1;
598 return;
599}
600
602static inline void init_reconstruct(float *const restrict reconstructed, const size_t width,
603 const size_t height)
604{
605// init the reconstructed buffer with non-clipped and partially clipped pixels
606 __OMP_PARALLEL_FOR_SIMD__(aligned(reconstructed:64))
607 for(size_t k = 0; k < height * width * 4; k++) reconstructed[k] = 0.f;
608
609}
610
611
612// Discretization parameters for the Partial Derivative Equation solver
613#define H 1 // spatial step
614#define KAPPA 0.25f // 0.25 if h = 1, 1 if h = 2
615
616
617static inline __attribute__((always_inline)) void find_gradients(const dt_aligned_pixel_simd_t pixels[9],
618 dt_aligned_pixel_simd_t xy[2])
619{
620 // Compute the gradient with centered finite differences in a 3x3 stencil
621 // warning : x is vertical, y is horizontal
622 const dt_aligned_pixel_simd_t half = dt_simd_set1(0.5f);
623 xy[0] = (pixels[7] - pixels[1]) * half;
624 xy[1] = (pixels[5] - pixels[3]) * half;
625}
626
627static inline __attribute__((always_inline)) void find_laplacians(const dt_aligned_pixel_simd_t pixels[9],
628 dt_aligned_pixel_simd_t xy[2])
629{
630 // Compute the laplacian with centered finite differences in a 3x3 stencil
631 // warning : x is vertical, y is horizontal
632 const dt_aligned_pixel_simd_t two = dt_simd_set1(2.f);
633 xy[0] = (pixels[7] + pixels[1]) - two * pixels[4];
634 xy[1] = (pixels[5] + pixels[3]) - two * pixels[4];
635}
636
637
638static inline __attribute__((always_inline)) void rotation_matrix_isophote(
639 const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,
640 const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,
641 dt_aligned_pixel_simd_t a[2][2])
642{
643 // Write the coefficients of a square symmetrical matrice of rotation of the gradient :
644 // [[ a11, a12 ],
645 // [ a12, a22 ]]
646 // taken from https://www.researchgate.net/publication/220663968
647 // c dampens the gradient direction
648 a[0][0] = cos_theta2 + c2 * sin_theta2;
649 a[1][1] = c2 * cos_theta2 + sin_theta2;
650 a[0][1] = a[1][0] = (c2 - dt_simd_set1(1.f)) * cos_theta_sin_theta;
651}
652
653static inline __attribute__((always_inline)) void rotation_matrix_gradient(
654 const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,
655 const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,
656 dt_aligned_pixel_simd_t a[2][2])
657{
658 // Write the coefficients of a square symmetrical matrice of rotation of the gradient :
659 // [[ a11, a12 ],
660 // [ a12, a22 ]]
661 // based on https://www.researchgate.net/publication/220663968 and inverted
662 // c dampens the isophote direction
663 a[0][0] = c2 * cos_theta2 + sin_theta2;
664 a[1][1] = cos_theta2 + c2 * sin_theta2;
665 a[0][1] = a[1][0] = (dt_simd_set1(1.f) - c2) * cos_theta_sin_theta;
666}
667
668
669static inline __attribute__((always_inline)) void build_matrix(const dt_aligned_pixel_simd_t a[2][2],
670 dt_aligned_pixel_simd_t kernel[9])
671{
672 const dt_aligned_pixel_simd_t half = dt_simd_set1(0.5f);
673 const dt_aligned_pixel_simd_t minus_two = dt_simd_set1(-2.f);
674 const dt_aligned_pixel_simd_t b11 = a[0][1] * half;
675 const dt_aligned_pixel_simd_t b13 = -b11;
676 const dt_aligned_pixel_simd_t b22 = minus_two * (a[0][0] + a[1][1]);
677
678 // build the kernel of rotated anisotropic laplacian
679 // from https://www.researchgate.net/publication/220663968 :
680 // [ [ a12 / 2, a22, -a12 / 2 ],
681 // [ a11, -2 (a11 + a22), a11 ],
682 // [ -a12 / 2, a22, a12 / 2 ] ]
683 // N.B. we have flipped the signs of the a12 terms
684 // compared to the paper. There's probably a mismatch
685 // of coordinate convention between the paper and the
686 // original derivation of this convolution mask
687 // (Witkin 1991, https://doi.org/10.1145/127719.122750).
688 kernel[0] = b11;
689 kernel[1] = a[1][1];
690 kernel[2] = b13;
691 kernel[3] = a[0][0];
692 kernel[4] = b22;
693 kernel[5] = a[0][0];
694 kernel[6] = b13;
695 kernel[7] = a[1][1];
696 kernel[8] = b11;
697}
698
699static inline __attribute__((always_inline)) void isotrope_laplacian(dt_aligned_pixel_simd_t kernel[9])
700{
701 // see in https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Second-order-isotropic-finite-differences
702 // for references (Oono & Puri)
703 const dt_aligned_pixel_simd_t corner = dt_simd_set1(0.25f);
704 const dt_aligned_pixel_simd_t edge = dt_simd_set1(0.5f);
705 const dt_aligned_pixel_simd_t center = dt_simd_set1(-3.f);
706 kernel[0] = corner;
707 kernel[1] = edge;
708 kernel[2] = corner;
709 kernel[3] = edge;
710 kernel[4] = center;
711 kernel[5] = edge;
712 kernel[6] = corner;
713 kernel[7] = edge;
714 kernel[8] = corner;
715}
716
717static inline __attribute__((always_inline)) void compute_kernel(
718 const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,
719 const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,
720 const dt_isotropy_t isotropy_type, dt_aligned_pixel_simd_t kernel[9])
721{
722 // Build the matrix of rotation with anisotropy
723
724 switch(isotropy_type)
725 {
727 default:
728 {
729 isotrope_laplacian(kernel);
730 break;
731 }
733 {
734 dt_aligned_pixel_simd_t a[2][2] = { { dt_simd_set1(0.f) } };
735 rotation_matrix_isophote(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);
736 build_matrix(a, kernel);
737 break;
738 }
740 {
741 dt_aligned_pixel_simd_t a[2][2] = { { dt_simd_set1(0.f) } };
742 rotation_matrix_gradient(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);
743 build_matrix(a, kernel);
744 break;
745 }
746 }
747}
748
750static inline void heat_PDE_diffusion(const float *const restrict high_freq, const float *const restrict low_freq,
751 const uint8_t *const restrict mask, const int has_mask,
752 float *const restrict output, const size_t width,
753 const size_t height, const dt_aligned_pixel_simd_t anisotropy,
754 const dt_isotropy_t isotropy_type[4],
755 const float variance_threshold, const int mult,
756 const float normalized_regularization,
757 const dt_aligned_pixel_simd_t ABCD, const float strength,
758 const int use_nontemporal)
759{
760 // Simultaneous inpainting for image structure and texture using anisotropic heat transfer model
761 // https://www.researchgate.net/publication/220663968
762 // modified as follow :
763 // * apply it in a multi-scale wavelet setup : we basically solve it twice, on the wavelets LF and HF layers.
764 // * replace the manual texture direction/distance selection by an automatic detection similar to the structure one,
765 // * generalize the framework for isotropic diffusion and anisotropic weighted on the isophote direction
766 // * add an HF-band energy regularization to better avoid edges.
767 // The sharpness setting mimics the contrast equalizer effect by simply multiplying the HF by some gain.
768
769 float *const restrict out = DT_IS_ALIGNED(output);
770 const float *const restrict LF = DT_IS_ALIGNED(low_freq);
771 const float *const restrict HF = DT_IS_ALIGNED(high_freq);
772 const dt_aligned_pixel_simd_t zero = dt_simd_set1(0.f);
773 const dt_aligned_pixel_simd_t flt_min = dt_simd_set1(1e-8f);
774 const dt_aligned_pixel_simd_t variance_threshold_v = dt_simd_set1(variance_threshold);
775 const dt_aligned_pixel_simd_t normalized_regularization_v = dt_simd_set1(normalized_regularization);
776 const dt_aligned_pixel_simd_t strength_v = dt_simd_set1(strength);
777
779 for(size_t row = 0; row < height; ++row)
780 {
781 // interleave the order in which we process the rows so that we minimize cache misses
782 const size_t i = dwt_interleave_rows(row, height, mult);
783 // compute the 'above' and 'below' coordinates, clamping them to the image, once for the entire row
784 const size_t i_neighbours[3]
785 = { MAX((int)(i - mult * H), (int)0) * width, // x - mult
786 i * width, // x
787 MIN((int)(i + mult * H), (int)height - 1) * width }; // x + mult
788 for(size_t j = 0; j < width; ++j)
789 {
790 const size_t idx = (i * width + j);
791 const size_t index = idx * 4;
792 const uint8_t opacity = (has_mask) ? mask[idx] : 1;
793
794 if(opacity)
795 {
796 // non-local neighbours coordinates
797 const size_t j_neighbours[3]
798 = { MAX((int)(j - mult * H), (int)0), // y - mult
799 j, // y
800 MIN((int)(j + mult * H), (int)width - 1) }; // y + mult
801
802 // fetch non-local pixels and store them locally and contiguously
803 dt_aligned_pixel_simd_t neighbour_pixel_HF[9];
804 dt_aligned_pixel_simd_t neighbour_pixel_LF[9];
805 dt_aligned_pixel_simd_t energy = zero;
806
807 for(size_t ii = 0; ii < 3; ii++)
808 for(size_t jj = 0; jj < 3; jj++)
809 {
810 const size_t neighbor = 4 * (i_neighbours[ii] + j_neighbours[jj]);
811 const dt_aligned_pixel_simd_t hf_value = dt_load_simd_aligned(HF + neighbor);
812 const dt_aligned_pixel_simd_t lf_value = dt_load_simd_aligned(LF + neighbor);
813 neighbour_pixel_HF[3 * ii + jj] = hf_value;
814 neighbour_pixel_LF[3 * ii + jj] = lf_value;
815 // Clamp LF to a strictly positive floor to avoid divide-by-zero in
816 // the HF/LF energy estimate without branching per channel.
817 const dt_aligned_pixel_simd_t safe_lf = dt_simd_max_zero(lf_value - flt_min) + flt_min;
818 const dt_aligned_pixel_simd_t ratio = hf_value / safe_lf;
819 energy += ratio * ratio;
820 }
821
822 // normalized_regularization already folds together the user
823 // regularization, the 3x3-support averaging factor, the physical blur
824 // radius carried by the current wavelet band and its scale normalization.
825 energy = dt_simd_max_zero(variance_threshold_v + energy * normalized_regularization_v - flt_min) + flt_min;
826
827 // build the local anisotropic convolution filters for gradients and laplacians
828 dt_aligned_pixel_simd_t lf_gradient[2], hf_gradient[2]; // x, y for each channel
829 find_gradients(neighbour_pixel_LF, lf_gradient);
830 find_gradients(neighbour_pixel_HF, hf_gradient);
831
832 // c² in https://www.researchgate.net/publication/220663968
833 dt_aligned_pixel_simd_t c2[4];
834 dt_aligned_pixel_simd_t grad_x = lf_gradient[0];
835 dt_aligned_pixel_simd_t grad_y = lf_gradient[1];
836 dt_aligned_pixel_simd_t c2_first = zero;
837 dt_aligned_pixel_simd_t c2_third = zero;
838 dt_aligned_pixel_simd_t cos_theta_grad_sq = zero;
839 dt_aligned_pixel_simd_t sin_theta_grad_sq = zero;
840 dt_aligned_pixel_simd_t cos_theta_sin_theta_grad = zero;
842 {
843 const float magnitude_grad = dt_fast_hypotf(grad_x[c], grad_y[c]);
844 c2_first[c] = -magnitude_grad * anisotropy[0];
845 c2_third[c] = -magnitude_grad * anisotropy[2];
846 // Compute cos/sin(arg(grad)) with a branchless normalization, forcing
847 // arg(grad)=0 when magnitude is zero.
848 const float nonzero = (magnitude_grad != 0.f);
849 const float inv_mag = 1.f / (magnitude_grad + (1.f - nonzero));
850 grad_x[c] = grad_x[c] * inv_mag + (1.f - nonzero); // cos(0)
851 grad_y[c] = grad_y[c] * inv_mag; // sin(0)
852 // Warning : now gradient = { cos(arg(grad)) , sin(arg(grad)) }
853 cos_theta_grad_sq[c] = sqf(grad_x[c]);
854 sin_theta_grad_sq[c] = sqf(grad_y[c]);
855 cos_theta_sin_theta_grad[c] = grad_x[c] * grad_y[c];
856 }
857
858 c2[0] = c2_first;
859 c2[2] = c2_third;
860 dt_aligned_pixel_simd_t lapl_x = hf_gradient[0];
861 dt_aligned_pixel_simd_t lapl_y = hf_gradient[1];
862 dt_aligned_pixel_simd_t c2_second = zero;
863 dt_aligned_pixel_simd_t c2_fourth = zero;
864 dt_aligned_pixel_simd_t cos_theta_lapl_sq = zero;
865 dt_aligned_pixel_simd_t sin_theta_lapl_sq = zero;
866 dt_aligned_pixel_simd_t cos_theta_sin_theta_lapl = zero;
868 {
869 const float magnitude_lapl = dt_fast_hypotf(lapl_x[c], lapl_y[c]);
870 c2_second[c] = -magnitude_lapl * anisotropy[1];
871 c2_fourth[c] = -magnitude_lapl * anisotropy[3];
872 // Compute cos/sin(arg(lapl)) with a branchless normalization, forcing
873 // arg(lapl)=0 when magnitude is zero.
874 const float nonzero = (magnitude_lapl != 0.f);
875 const float inv_mag = 1.f / (magnitude_lapl + (1.f - nonzero));
876 lapl_x[c] = lapl_x[c] * inv_mag + (1.f - nonzero); // cos(0)
877 lapl_y[c] = lapl_y[c] * inv_mag; // sin(0)
878 // Warning : now laplacian = { cos(arg(lapl)) , sin(arg(lapl)) }
879 cos_theta_lapl_sq[c] = sqf(lapl_x[c]);
880 sin_theta_lapl_sq[c] = sqf(lapl_y[c]);
881 cos_theta_sin_theta_lapl[c] = lapl_x[c] * lapl_y[c];
882 }
883 c2[1] = c2_second;
884 c2[3] = c2_fourth;
885
886 // elements of c2 need to be expf(mag*anistropy), but we haven't applied the expf() yet. Do that now.
887 for(size_t k = 0; k < 4; k++)
888 for_each_channel(c) c2[k][c] = dt_fast_expf(c2[k][c]);
889
890 dt_aligned_pixel_simd_t kern_first[9], kern_second[9], kern_third[9], kern_fourth[9];
891 compute_kernel(c2[0], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type[0],
892 kern_first);
893 compute_kernel(c2[1], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type[1],
894 kern_second);
895 compute_kernel(c2[2], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type[2],
896 kern_third);
897 compute_kernel(c2[3], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type[3],
898 kern_fourth);
899
900 dt_aligned_pixel_simd_t derivatives[4] = { zero, zero, zero, zero };
901 // Convolve filters and accumulate the local HF band energy over the
902 // current 3x3 support. This is not a statistical variance estimator:
903 // HF is a band-pass residual, so we normalize each sample by the
904 // corresponding LF value before squaring it, then normalize the summed
905 // ratio by the physical kernel-variance increment of the current
906 // wavelet band.
907 for(size_t k = 0; k < 9; k++)
908 {
909 derivatives[0] = kern_first[k] * neighbour_pixel_LF[k] + derivatives[0];
910 derivatives[1] = kern_second[k] * neighbour_pixel_LF[k] + derivatives[1];
911 derivatives[2] = kern_third[k] * neighbour_pixel_HF[k] + derivatives[2];
912 derivatives[3] = kern_fourth[k] * neighbour_pixel_HF[k] + derivatives[3];
913 }
914
915 // compute the update
916 dt_aligned_pixel_simd_t update = derivatives[0] * ABCD[0];
917 update = derivatives[1] * ABCD[1] + update;
918 update = derivatives[2] * ABCD[2] + update;
919 update = derivatives[3] * ABCD[3] + update;
920 const dt_aligned_pixel_simd_t acc = neighbour_pixel_HF[4] * strength_v + update / energy;
921
922 if(use_nontemporal)
923 dt_store_simd_nontemporal(out + index, dt_simd_max_zero(acc + neighbour_pixel_LF[4]));
924 else
925 dt_store_simd_aligned(out + index, dt_simd_max_zero(acc + neighbour_pixel_LF[4]));
926 }
927 else
928 {
929 // only copy input to output, do nothing
930 if(use_nontemporal)
931 dt_store_simd_nontemporal(out + index, dt_simd_max_zero(dt_load_simd_aligned(HF + index)
932 + dt_load_simd_aligned(LF + index)));
933 else
934 dt_store_simd_aligned(out + index, dt_simd_max_zero(dt_load_simd_aligned(HF + index)
935 + dt_load_simd_aligned(LF + index)));
936 }
937 }
938 }
939
940
941 if(use_nontemporal)
942 dt_omploop_sfence(); // ensure the final nontemporal writeback completes before the caller reads out
943}
944
945static inline float compute_anisotropy_factor(const float user_param)
946{
947 // compute the inverse of the K param in c evaluation from
948 // https://www.researchgate.net/publication/220663968
949 // but in a perceptually-even way, for better GUI interaction
950 return sqf(user_param);
951}
952
953#if DEBUG_DUMP_PFM
955static void dump_PFM(const char *filename, const float* out, const uint32_t w, const uint32_t h)
956{
957 FILE *f = g_fopen(filename, "wb");
958 fprintf(f, "PF\n%d %d\n-1.0\n", w, h);
959 for(int j = h - 1 ; j >= 0 ; j--)
960 for(int i = 0 ; i < w ; i++)
961 for(int c = 0 ; c < 3 ; c++)
962 fwrite(out + (j * w + i) * 4 + c, 1, sizeof(float), f);
963 fclose(f);
964}
965#endif
966
968static inline int wavelets_process(const float *const restrict in, float *const restrict reconstructed,
969 const uint8_t *const restrict mask, const size_t width,
970 const size_t height, const dt_iop_diffuse_data_t *const data,
971 const float zoom, const int scales,
972 const int has_mask,
973 float *const restrict HF[MAX_NUM_SCALES],
974 float *const restrict LF_odd,
975 float *const restrict LF_even)
976{
977 const dt_aligned_pixel_simd_t anisotropy
982
983 const dt_isotropy_t DT_ALIGNED_PIXEL isotropy_type[4]
988
989 const float regularization = powf(10.f, data->regularization) - 1.f;
990 const float variance_threshold = powf(10.f, data->variance_threshold);
991
992 // À trous decimated wavelet decompose
993 // there is a paper from a guy we know that explains it : https://jo.dreggn.org/home/2010_atrous.pdf
994 // the wavelets decomposition here is the same as the equalizer/atrous module,
995 float *restrict residual; // will store the temp buffer containing the last step of blur
996 // allocate a one-row temporary buffer for the decomposition
997 size_t padded_size;
998 float *const tempbuf = dt_pixelpipe_cache_alloc_perthread_float(4 * width, &padded_size); //TODO: alloc in caller
999 if(IS_NULL_PTR(tempbuf)) return 1;
1000
1001 for(int s = 0; s < scales; ++s)
1002 {
1003 const int mult = 1 << s;
1004
1005 const float *restrict buffer_in;
1006 float *restrict buffer_out;
1007
1008 if(s == 0)
1009 {
1010 buffer_in = in;
1011 buffer_out = LF_odd;
1012 }
1013 else if(s % 2 != 0)
1014 {
1015 buffer_in = LF_odd;
1016 buffer_out = LF_even;
1017 }
1018 else
1019 {
1020 buffer_in = LF_even;
1021 buffer_out = LF_odd;
1022 }
1023
1024 decompose_2D_Bspline(buffer_in, HF[s], buffer_out, width, height, mult, tempbuf, padded_size);
1025
1026 residual = buffer_out;
1027
1028#if DEBUG_DUMP_PFM
1029 char name[64];
1030 sprintf(name, "/tmp/scale-input-%i.pfm", s);
1031 dump_PFM(name, buffer_in, width, height);
1032
1033 sprintf(name, "/tmp/scale-blur-%i.pfm", s);
1034 dump_PFM(name, buffer_out, width, height);
1035#endif
1036 }
1038
1039 // will store the temp buffer NOT containing the last step of blur
1040 float *restrict temp = (residual == LF_even) ? LF_odd : LF_even;
1041 int count = 0;
1042
1043 for(int s = scales - 1; s > -1; --s)
1044 {
1045 const int mult = 1 << s;
1046 const float current_radius = equivalent_sigma_at_step(B_SPLINE_SIGMA, s);
1047 const float real_radius = current_radius * zoom;
1048
1049#if DIFFUSE_V3
1050 const float normalized_regularization =
1051 (data->normalize_band_energy)
1052 ? regularization * sqf(real_radius) / 9.f
1053 : regularization / 9.f;
1054#else
1055 const float normalized_regularization = regularization / 9.f * sqf(real_radius);
1056#endif
1057
1058 const float norm = expf(-sqf(real_radius - (float)data->radius_center) / sqf(data->radius));
1059
1060 const dt_aligned_pixel_simd_t ABCD = { data->first * KAPPA * norm,
1061 data->second * KAPPA * norm,
1062 data->third * KAPPA * norm,
1063 data->fourth * KAPPA * norm };
1064 const float strength = data->sharpness * norm + 1.f;
1065
1066 const float *restrict buffer_in;
1067 float *restrict buffer_out;
1068
1069 if(count == 0)
1070 {
1071 buffer_in = residual;
1072 buffer_out = temp;
1073 }
1074 else if(count % 2 != 0)
1075 {
1076 buffer_in = temp;
1077 buffer_out = residual;
1078 }
1079 else
1080 {
1081 buffer_in = residual;
1082 buffer_out = temp;
1083 }
1084
1085 if(s == 0) buffer_out = reconstructed;
1086
1087 heat_PDE_diffusion(HF[s], buffer_in, mask, has_mask, buffer_out, width, height,
1088 anisotropy, isotropy_type, variance_threshold, mult,
1089 normalized_regularization, ABCD, strength, (s == 0));
1090
1091 count++;
1092 }
1093
1094 return 0;
1095}
1096
1097
1099static inline void build_mask(const float *const restrict input, uint8_t *const restrict mask,
1100 const float threshold, const size_t width, const size_t height)
1101{
1102 __OMP_PARALLEL_FOR_SIMD__(aligned(mask, input : 64))
1103 for(size_t k = 0; k < height * width * 4; k += 4)
1104 {
1105 // TRUE if any channel is above threshold
1106 mask[k / 4] = (input[k] > threshold || input[k + 1] > threshold || input[k + 2] > threshold);
1107 }
1108
1109}
1110
1112static inline void inpaint_mask(float *const restrict inpainted, const float *const restrict original,
1113 const uint8_t *const restrict mask, const size_t width,
1114 const size_t height)
1115{
1116 // init the reconstruction with noise inside the masked areas
1118 for(size_t k = 0; k < height * width * 4; k += 4)
1119 {
1120 if(mask[k / 4])
1121 {
1122 const uint32_t i = k / width;
1123 const uint32_t j = k - i;
1124 uint32_t DT_ALIGNED_ARRAY state[4]
1125 = { splitmix32(j + 1), splitmix32((uint64_t)(j + 1) * (i + 3)),
1126 splitmix32(1337), splitmix32(666) };
1131
1132 for_four_channels(c, aligned(inpainted, original, state:64))
1133 inpainted[k + c] = fabsf(gaussian_noise(original[k + c], original[k + c], i % 2 || j % 2, state));
1134 }
1135 else
1136 {
1137 for_four_channels(c, aligned(original, inpainted:64))
1138 inpainted[k + c] = original[k + c];
1139 }
1140 }
1141
1142}
1143
1146 const void *const restrict ivoid, void *const restrict ovoid)
1147{
1148 const dt_iop_roi_t *const roi_in = &piece->roi_in;
1149 const dt_iop_roi_t *const roi_out = &piece->roi_out;
1150 const dt_iop_diffuse_data_t *const data = (dt_iop_diffuse_data_t *)piece->data;
1151
1152 float *restrict in = DT_IS_ALIGNED((float *const restrict)ivoid);
1153 float *const restrict out = DT_IS_ALIGNED((float *const restrict)ovoid);
1154
1155 float *const restrict temp1 = dt_pixelpipe_cache_alloc_align_float((size_t)roi_out->width * roi_out->height * 4, pipe);
1156 float *const restrict temp2 = dt_pixelpipe_cache_alloc_align_float((size_t)roi_out->width * roi_out->height * 4, pipe);
1157
1158 float *restrict temp_in = NULL;
1159 float *restrict temp_out = NULL;
1160 int err = 0;
1161
1162 uint8_t *const restrict mask = dt_pixelpipe_cache_alloc_align(
1163 sizeof(uint8_t) * roi_out->width * roi_out->height,
1164 pipe);
1165
1166 const float zoom = dt_dev_get_module_scale(pipe, roi_in);
1167 const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;
1168 // No legacy iteration remap is applied here anymore. The current solver uses
1169 // the historical a-trous band order and kernel-variance increments exactly,
1170 // so any extra factor would be content-dependent and belong to pixel math,
1171 // not to the user parameter itself.
1172 const int iterations = MAX((int)ceilf((float)data->iterations), 1);
1173 const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);
1174 const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);
1175
1176 gboolean out_of_memory = (IS_NULL_PTR(temp1)) || (IS_NULL_PTR(temp2));
1177 // One full-resolution buffer per stored wavelet band.
1178 float *restrict HF[MAX_NUM_SCALES] = { NULL };
1179 for(int s = 0; s < scales; s++)
1180 {
1181 HF[s] = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);
1182 if(!HF[s]) out_of_memory = TRUE;
1183 }
1184 // Two ping-pong low-pass buffers reused by the decomposition/synthesis.
1185 float *const restrict LF_odd = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);
1186 float *const restrict LF_even = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);
1187
1188 // PAUSE !
1189 // check that all buffers exist before processing,
1190 // because we use a lot of memory here.
1191 if(IS_NULL_PTR(mask) || IS_NULL_PTR(temp1) || IS_NULL_PTR(temp2) || IS_NULL_PTR(LF_odd) || IS_NULL_PTR(LF_even) || out_of_memory)
1192 {
1193 err = 1;
1194 goto error;
1195 }
1196
1197 const int has_mask = (data->threshold > 0.f);
1198
1199 if(has_mask)
1200 {
1201 // build a boolean mask, TRUE where image is above threshold, FALSE otherwise
1202 build_mask(in, mask, data->threshold, roi_out->width, roi_out->height);
1203
1204 // init the inpainting area with noise
1205 inpaint_mask(temp1, in, mask, roi_out->width, roi_out->height);
1206
1207 in = temp1;
1208 }
1209
1210 for(int it = 0; it < iterations; it++)
1211 {
1212 if(it == 0)
1213 {
1214 temp_in = in;
1215 temp_out = temp2;
1216 }
1217 else if(it % 2 == 0)
1218 {
1219 temp_in = temp1;
1220 temp_out = temp2;
1221 }
1222 else
1223 {
1224 temp_in = temp2;
1225 temp_out = temp1;
1226 }
1227
1228 if(it == (int)iterations - 1)
1229 temp_out = out;
1230
1231 if(wavelets_process(temp_in, temp_out, mask, roi_out->width, roi_out->height,
1232 data, zoom, scales, has_mask, HF, LF_odd, LF_even))
1233 {
1234 err = 1;
1235 goto error;
1236 }
1237 }
1238
1239error:
1245 for(int s = 0; s < scales; s++)
1246 if(HF[s]) dt_pixelpipe_cache_free_align(HF[s]);
1247 return err;
1248}
1249
1250#if HAVE_OPENCL
1251static inline cl_int wavelets_process_cl(const int devid, cl_mem in, cl_mem reconstructed, cl_mem mask,
1252 const size_t sizes[3], const int width, const int height,
1253 const dt_iop_diffuse_data_t *const data,
1255 const float zoom, const int scales,
1256 const int has_mask,
1257 cl_mem HF[MAX_NUM_SCALES],
1258 cl_mem LF_odd,
1259 cl_mem LF_even)
1260{
1261 cl_int err = -999;
1262
1263 const dt_aligned_pixel_simd_t anisotropy
1268
1269 /*
1270 fprintf(stdout, "anisotropy : %f ; %f ; %f ; %f \n",
1271 anisotropy[0], anisotropy[1], anisotropy[2], anisotropy[3]);
1272 */
1273
1274 const dt_isotropy_t DT_ALIGNED_PIXEL isotropy_type[4]
1279
1280 /*
1281 fprintf(stdout, "type : %d ; %d ; %d ; %d \n",
1282 isotropy_type[0], isotropy_type[1], isotropy_type[2], isotropy_type[3]);
1283 */
1284
1285 const float regularization = powf(10.f, data->regularization) - 1.f;
1286 const float variance_threshold = powf(10.f, data->variance_threshold);
1287 // Same a-trous decomposition as the CPU path, mirrored in OpenCL.
1288 cl_mem residual;
1289
1290 for(int s = 0; s < scales; ++s)
1291 {
1292 const int mult = 1 << s;
1293
1294 cl_mem buffer_in;
1295 cl_mem buffer_out;
1296
1297 if(s == 0)
1298 {
1299 buffer_in = in;
1300 buffer_out = LF_odd;
1301 }
1302 else if(s % 2 != 0)
1303 {
1304 buffer_in = LF_odd;
1305 buffer_out = LF_even;
1306 }
1307 else
1308 {
1309 buffer_in = LF_even;
1310 buffer_out = LF_odd;
1311 }
1312
1313 // Compute wavelets low-frequency scales
1314 const int clamp_lf = 1;
1315 int hblocksize;
1316 dt_opencl_local_buffer_t hlocopt = (dt_opencl_local_buffer_t){ .xoffset = 2 * mult, .xfactor = 1,
1317 .yoffset = 0, .yfactor = 1,
1318 .cellsize = 4 * sizeof(float), .overhead = 0,
1319 .sizex = 1 << 16, .sizey = 1 };
1321 hblocksize = hlocopt.sizex;
1322 else
1323 hblocksize = 1;
1324
1325 // Keep the same separable order as the CPU path: vertical pass first,
1326 // store its intermediate into HF[s], then horizontal pass builds LF.
1327 int vblocksize;
1328 dt_opencl_local_buffer_t vlocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1,
1329 .yoffset = 2 * mult, .yfactor = 1,
1330 .cellsize = 4 * sizeof(float), .overhead = 0,
1331 .sizex = 1, .sizey = 1 << 16 };
1333 vblocksize = vlocopt.sizey;
1334 else
1335 vblocksize = 1;
1336
1337 if(vblocksize > 1)
1338 {
1339 const size_t vertical_sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUP(height, vblocksize), 1 };
1340 const size_t vertical_local[3] = { 1, vblocksize, 1 };
1341 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 0, sizeof(cl_mem), (void *)&buffer_in);
1342 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 1, sizeof(cl_mem), (void *)&HF[s]);
1343 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 2, sizeof(int), (void *)&width);
1344 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 3, sizeof(int), (void *)&height);
1345 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 4, sizeof(int), (void *)&mult);
1346 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 5, sizeof(int), (void *)&clamp_lf);
1348 (vblocksize + 4 * mult) * 4 * sizeof(float), NULL);
1350 vertical_sizes, vertical_local);
1351 }
1352 else
1353 {
1354 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 0, sizeof(cl_mem), (void *)&buffer_in);
1355 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 1, sizeof(cl_mem), (void *)&HF[s]);
1356 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 2, sizeof(int), (void *)&width);
1357 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 3, sizeof(int), (void *)&height);
1358 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 4, sizeof(int), (void *)&mult);
1359 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 5, sizeof(int), (void *)&clamp_lf);
1361 }
1362 if(err != CL_SUCCESS) return err;
1363
1364 if(hblocksize > 1)
1365 {
1366 const size_t horizontal_sizes[3] = { ROUNDUP(width, hblocksize), ROUNDUPDHT(height, devid), 1 };
1367 const size_t horizontal_local[3] = { hblocksize, 1, 1 };
1368 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 0, sizeof(cl_mem), (void *)&HF[s]);
1369 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 1, sizeof(cl_mem), (void *)&buffer_out);
1370 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 2, sizeof(int), (void *)&width);
1371 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 3, sizeof(int), (void *)&height);
1372 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 4, sizeof(int), (void *)&mult);
1373 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 5, sizeof(int), (void *)&clamp_lf);
1375 (hblocksize + 4 * mult) * 4 * sizeof(float), NULL);
1377 horizontal_sizes, horizontal_local);
1378 }
1379 else
1380 {
1381 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 0, sizeof(cl_mem), (void *)&HF[s]);
1382 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 1, sizeof(cl_mem), (void *)&buffer_out);
1383 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 2, sizeof(int), (void *)&width);
1384 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 3, sizeof(int), (void *)&height);
1385 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 4, sizeof(int), (void *)&mult);
1386 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 5, sizeof(int), (void *)&clamp_lf);
1388 }
1389 if(err != CL_SUCCESS) return err;
1390
1391 // Compute wavelets high-frequency scales and backup the maximum of texture over the RGB channels
1392 // Note : HF = detail - LF
1393 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 0, sizeof(cl_mem), (void *)&buffer_in);
1394 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 1, sizeof(cl_mem), (void *)&buffer_out);
1395 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 2, sizeof(cl_mem), (void *)&HF[s]);
1396 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 3, sizeof(int), (void *)&width);
1397 dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 4, sizeof(int), (void *)&height);
1399 if(err != CL_SUCCESS) return err;
1400
1401 residual = buffer_out;
1402 }
1403
1404 // Ping-pong low-pass buffer not currently holding the coarsest residual.
1405 cl_mem temp = (residual == LF_even) ? LF_odd : LF_even;
1406 int count = 0;
1407
1408 for(int s = scales - 1; s > -1; --s)
1409 {
1410 const int mult = 1 << s;
1411 const float current_radius = equivalent_sigma_at_step(B_SPLINE_SIGMA, s);
1412 const float real_radius = current_radius * zoom;
1413
1414#if DIFFUSE_V3
1415 const float normalized_regularization =
1416 (data->normalize_band_energy)
1417 ? regularization * sqf(real_radius) / 9.f
1418 : regularization / 9.f;
1419#else
1420 const float normalized_regularization = regularization / 9.f * sqf(real_radius);
1421#endif
1422
1423 const float norm = expf(-sqf(real_radius - (float)data->radius_center) / sqf(data->radius));
1424
1425 const dt_aligned_pixel_simd_t ABCD = { data->first * KAPPA * norm,
1426 data->second * KAPPA * norm,
1427 data->third * KAPPA * norm,
1428 data->fourth * KAPPA * norm };
1429 const float strength = data->sharpness * norm + 1.f;
1430
1431 cl_mem buffer_in;
1432 cl_mem buffer_out;
1433
1434 if(count == 0)
1435 {
1436 buffer_in = residual;
1437 buffer_out = temp;
1438 }
1439 else if(count % 2 != 0)
1440 {
1441 buffer_in = temp;
1442 buffer_out = residual;
1443 }
1444 else
1445 {
1446 buffer_in = residual;
1447 buffer_out = temp;
1448 }
1449
1450 if(s == 0) buffer_out = reconstructed;
1451
1452 // Compute wavelets low-frequency scales
1453 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 0, sizeof(cl_mem), (void *)&HF[s]);
1454 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 1, sizeof(cl_mem), (void *)&buffer_in);
1455 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 2, sizeof(cl_mem), (void *)&mask);
1456 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 3, sizeof(int), (void *)&has_mask);
1457 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 4, sizeof(cl_mem), (void *)&buffer_out);
1458 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 5, sizeof(int), (void *)&width);
1459 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 6, sizeof(int), (void *)&height);
1460 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 7, 4 * sizeof(float), (void *)&anisotropy);
1461 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 8, 4 * sizeof(dt_isotropy_t), (void *)&isotropy_type);
1462 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 9, sizeof(float), (void *)&normalized_regularization);
1463 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 10, sizeof(float), (void *)&variance_threshold);
1464 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 11, sizeof(int), (void *)&mult);
1465 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 12, 4 * sizeof(float), (void *)&ABCD);
1466 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 13, sizeof(float), (void *)&strength);
1467 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_diffuse_pde, sizes);
1468 if(err != CL_SUCCESS) return err;
1469
1470 count++;
1471 }
1472
1473 return err;
1474}
1475
1476int process_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out)
1477{
1478 const dt_iop_roi_t *const roi_in = &piece->roi_in;
1479 const dt_iop_roi_t *const roi_out = &piece->roi_out;
1480 const dt_iop_diffuse_data_t *const data = (dt_iop_diffuse_data_t *)piece->data;
1482
1483 int out_of_memory = FALSE;
1484
1485 cl_int err = -999;
1486
1487 const int devid = pipe->devid;
1488 const int width = roi_in->width;
1489 const int height = roi_in->height;
1490
1491 size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1492
1493 cl_mem in = dev_in;
1494
1495 cl_mem temp1 = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);
1496 cl_mem temp2 = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);
1497
1498 cl_mem temp_in = NULL;
1499 cl_mem temp_out = NULL;
1500
1501 cl_mem mask = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(uint8_t));
1502
1503 const float zoom = dt_dev_get_module_scale(pipe, roi_in);
1504 const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;
1505 // See the CPU path above: iterations stay in user space because the current
1506 // solver already matches the historical a-trous band ordering and kernel
1507 // variance increments. There is no content-independent remap left to apply.
1508 const int iterations = MAX((int)ceilf((float)data->iterations), 1);
1509 const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);
1510 const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);
1511 // One device buffer per stored wavelet band.
1512 cl_mem HF[MAX_NUM_SCALES] = { NULL };
1513 for(int s = 0; s < scales; s++)
1514 {
1515 HF[s] = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);
1516 if(!HF[s]) out_of_memory = TRUE;
1517 }
1518 // Two low-pass ping-pong buffers reused across all scales.
1519 cl_mem LF_even = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);
1520 cl_mem LF_odd = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);
1521
1522 // PAUSE !
1523 // check that all buffers exist before processing,
1524 // because we use a lot of memory here.
1525 if(IS_NULL_PTR(mask) || IS_NULL_PTR(temp1) || IS_NULL_PTR(temp2) || IS_NULL_PTR(LF_odd) || IS_NULL_PTR(LF_even) || out_of_memory)
1526 {
1527 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
1528 goto error;
1529 }
1530
1531 const int has_mask = (data->threshold > 0.f);
1532
1533 if(has_mask)
1534 {
1535 // build a boolean mask, TRUE where image is above threshold, FALSE otherwise
1536 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 0, sizeof(cl_mem), (void *)&in);
1537 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 1, sizeof(cl_mem), (void *)&mask);
1538 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 2, sizeof(float), (void *)&data->threshold);
1539 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 3, sizeof(int), (void *)&roi_out->width);
1540 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 4, sizeof(int), (void *)&roi_out->height);
1542 if(err != CL_SUCCESS) goto error;
1543
1544 // init the inpainting area with noise
1545 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 0, sizeof(cl_mem), (void *)&temp1);
1546 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 1, sizeof(cl_mem), (void *)&in);
1547 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 2, sizeof(cl_mem), (void *)&mask);
1548 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 3, sizeof(int), (void *)&roi_out->width);
1549 dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 4, sizeof(int), (void *)&roi_out->height);
1551 if(err != CL_SUCCESS) goto error;
1552
1553 in = temp1;
1554 }
1555
1556 for(int it = 0; it < iterations; it++)
1557 {
1558 if(it == 0)
1559 {
1560 temp_in = in;
1561 temp_out = temp2;
1562 }
1563 else if(it % 2 == 0)
1564 {
1565 temp_in = temp1;
1566 temp_out = temp2;
1567 }
1568 else
1569 {
1570 temp_in = temp2;
1571 temp_out = temp1;
1572 }
1573
1574 if(it == (int)iterations - 1) temp_out = dev_out;
1575 err = wavelets_process_cl(devid, temp_in, temp_out, mask, sizes, width, height,
1576 data, gd, zoom, scales, has_mask, HF, LF_odd, LF_even);
1577 if(err != CL_SUCCESS) goto error;
1578 }
1579
1580 // cleanup and exit on success
1586 for(int s = 0; s < scales; s++) dt_opencl_release_mem_object(HF[s]);
1587 return TRUE;
1588
1589error:
1595 for(int s = 0; s < scales; s++) dt_opencl_release_mem_object(HF[s]);
1596
1597 dt_print(DT_DEBUG_OPENCL, "[opencl_diffuse] couldn't enqueue kernel! %d\n", err);
1598 return FALSE;
1599}
1600
1602{
1603 const int program = 33; // diffuse.cl in programs.conf
1605
1606 module->data = gd;
1607 gd->kernel_diffuse_build_mask = dt_opencl_create_kernel(program, "build_mask");
1608 gd->kernel_diffuse_inpaint_mask = dt_opencl_create_kernel(program, "inpaint_mask");
1609 gd->kernel_diffuse_pde = dt_opencl_create_kernel(program, "diffuse_pde");
1610
1611 const int wavelets = 35; // bspline.cl, from programs.conf
1612 gd->kernel_filmic_bspline_horizontal = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_horizontal");
1613 gd->kernel_filmic_bspline_vertical = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_vertical");
1614 gd->kernel_filmic_bspline_horizontal_local = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_horizontal_local");
1615 gd->kernel_filmic_bspline_vertical_local = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_vertical_local");
1616 gd->kernel_filmic_wavelets_detail = dt_opencl_create_kernel(wavelets, "wavelets_detail_level");
1617}
1618
1619
1634#endif
1635
1636
1637void gui_init(struct dt_iop_module_t *self)
1638{
1640 self->widget = gtk_box_new(GTK_ORIENTATION_VERTICAL, DT_GUI_BOX_SPACING);
1641
1642 gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("properties")), FALSE, FALSE, 0);
1643
1644 g->iterations = dt_bauhaus_slider_from_params(self, "iterations");
1645 dt_bauhaus_slider_set_soft_range(g->iterations, 1., 128);
1646 gtk_widget_set_tooltip_text(g->iterations,
1647 _("more iterations make the effect stronger but the module slower.\n"
1648 "this is analogous to giving more time to the diffusion reaction.\n"
1649 "if you plan on sharpening or inpainting, \n"
1650 "more iterations help reconstruction."));
1651
1652 g->radius_center = dt_bauhaus_slider_from_params(self, "radius_center");
1653 dt_bauhaus_slider_set_soft_range(g->radius_center, 0., 512.);
1654 dt_bauhaus_slider_set_format(g->radius_center, " px");
1655 gtk_widget_set_tooltip_text(
1656 g->radius_center, _("main scale of the diffusion.\n"
1657 "zero makes diffusion act on the finest details more heavily.\n"
1658 "non-zero defines the size of the details to diffuse heavily.\n"
1659 "for deblurring and denoising, set to zero.\n"
1660 "increase to act on local contrast instead."));
1661
1662 g->radius = dt_bauhaus_slider_from_params(self, "radius");
1663 dt_bauhaus_slider_set_soft_range(g->radius, 1., 512.);
1664 dt_bauhaus_slider_set_format(g->radius, " px");
1665 gtk_widget_set_tooltip_text(
1666 g->radius, _("width of the diffusion around the central radius.\n"
1667 "high values diffuse on a large band of radii.\n"
1668 "low values diffuse closer to the central radius.\n"
1669 "if you plan on deblurring, \n"
1670 "the radius should be around the width of your lens blur."));
1671
1672 GtkWidget *label_speed = dt_ui_section_label_new(_("speed (sharpen \342\206\224 diffuse)"));
1673 gtk_box_pack_start(GTK_BOX(self->widget), label_speed, FALSE, FALSE, 0);
1674
1675 g->first = dt_bauhaus_slider_from_params(self, "first");
1677 dt_bauhaus_slider_set_format(g->first, "%");
1678 gtk_widget_set_tooltip_text(g->first, _("diffusion speed of low-frequency wavelet layers\n"
1679 "in the direction of 1st order anisotropy (set below).\n\n"
1680 "negative values sharpen, \n"
1681 "positive values diffuse and blur, \n"
1682 "zero does nothing."));
1683
1684 g->second = dt_bauhaus_slider_from_params(self, "second");
1685 dt_bauhaus_slider_set_digits(g->second, 4);
1686 dt_bauhaus_slider_set_format(g->second, "%");
1687 gtk_widget_set_tooltip_text(g->second, _("diffusion speed of low-frequency wavelet layers\n"
1688 "in the direction of 2nd order anisotropy (set below).\n\n"
1689 "negative values sharpen, \n"
1690 "positive values diffuse and blur, \n"
1691 "zero does nothing."));
1692
1693 g->third = dt_bauhaus_slider_from_params(self, "third");
1695 dt_bauhaus_slider_set_format(g->third, "%");
1696 gtk_widget_set_tooltip_text(g->third, _("diffusion speed of high-frequency wavelet layers\n"
1697 "in the direction of 3rd order anisotropy (set below).\n\n"
1698 "negative values sharpen, \n"
1699 "positive values diffuse and blur, \n"
1700 "zero does nothing."));
1701
1702 g->fourth = dt_bauhaus_slider_from_params(self, "fourth");
1703 dt_bauhaus_slider_set_digits(g->fourth, 4);
1704 dt_bauhaus_slider_set_format(g->fourth, "%");
1705 gtk_widget_set_tooltip_text(g->fourth, _("diffusion speed of high-frequency wavelet layers\n"
1706 "in the direction of 4th order anisotropy (set below).\n\n"
1707 "negative values sharpen, \n"
1708 "positive values diffuse and blur, \n"
1709 "zero does nothing."));
1710
1711 GtkWidget *label_direction = dt_ui_section_label_new(_("direction"));
1712 gtk_box_pack_start(GTK_BOX(self->widget), label_direction, FALSE, FALSE, 0);
1713
1714 g->anisotropy_first = dt_bauhaus_slider_from_params(self, "anisotropy_first");
1715 dt_bauhaus_slider_set_digits(g->anisotropy_first, 4);
1716 dt_bauhaus_slider_set_format(g->anisotropy_first, "%");
1717 gtk_widget_set_tooltip_text(g->anisotropy_first, _("direction of 1st order speed (set above).\n\n"
1718 "negative values follow gradients more closely, \n"
1719 "positive values rather avoid edges (isophotes), \n"
1720 "zero affects both equally (isotropic)."));
1721
1722 g->anisotropy_second = dt_bauhaus_slider_from_params(self, "anisotropy_second");
1723 dt_bauhaus_slider_set_digits(g->anisotropy_second, 4);
1724 dt_bauhaus_slider_set_format(g->anisotropy_second, "%");
1725 gtk_widget_set_tooltip_text(g->anisotropy_second,_("direction of 2nd order speed (set above).\n\n"
1726 "negative values follow gradients more closely, \n"
1727 "positive values rather avoid edges (isophotes), \n"
1728 "zero affects both equally (isotropic)."));
1729
1730 g->anisotropy_third = dt_bauhaus_slider_from_params(self, "anisotropy_third");
1731 dt_bauhaus_slider_set_digits(g->anisotropy_third, 4);
1732 dt_bauhaus_slider_set_format(g->anisotropy_third, "%");
1733 gtk_widget_set_tooltip_text(g->anisotropy_third,_("direction of 3rd order speed (set above).\n\n"
1734 "negative values follow gradients more closely, \n"
1735 "positive values rather avoid edges (isophotes), \n"
1736 "zero affects both equally (isotropic)."));
1737
1738 g->anisotropy_fourth = dt_bauhaus_slider_from_params(self, "anisotropy_fourth");
1739 dt_bauhaus_slider_set_digits(g->anisotropy_fourth, 4);
1740 dt_bauhaus_slider_set_format(g->anisotropy_fourth, "%");
1741 gtk_widget_set_tooltip_text(g->anisotropy_fourth,_("direction of 4th order speed (set above).\n\n"
1742 "negative values follow gradients more closely, \n"
1743 "positive values rather avoid edges (isophotes), \n"
1744 "zero affects both equally (isotropic)."));
1745
1746 gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("edge management")), FALSE, FALSE, 0);
1747
1748 g->sharpness = dt_bauhaus_slider_from_params(self, "sharpness");
1749 dt_bauhaus_slider_set_format(g->sharpness, "%");
1750 gtk_widget_set_tooltip_text(g->sharpness,
1751 _("increase or decrease the sharpness of the highest frequencies.\n"
1752 "can be used to keep details after blooming,\n"
1753 "for standalone sharpening set speed to negative values."));
1754
1755 g->regularization = dt_bauhaus_slider_from_params(self, "regularization");
1756 gtk_widget_set_tooltip_text(g->regularization,
1757 _("define the sensitivity of the variance penalty for edges.\n"
1758 "increase to exclude more edges from diffusion,\n"
1759 "if fringes or halos appear."));
1760
1761 g->variance_threshold = dt_bauhaus_slider_from_params(self, "variance_threshold");
1762 gtk_widget_set_tooltip_text(g->variance_threshold,
1763 _("define the variance threshold between edge amplification and penalty.\n"
1764 "decrease if you want pixels on smooth surfaces get a boost,\n"
1765 "increase if you see noise appear on smooth surfaces or\n"
1766 "if dark areas seem oversharpened compared to bright areas."));
1767
1768
1769 gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("diffusion spatiality")), FALSE, FALSE, 0);
1770
1771 g->threshold = dt_bauhaus_slider_from_params(self, "threshold");
1772 dt_bauhaus_slider_set_format(g->threshold, "%");
1773 dt_bauhaus_slider_set_digits(g->threshold, 2);
1774 gtk_widget_set_tooltip_text(g->threshold,
1775 _("luminance threshold for the mask.\n"
1776 "0. disables the luminance masking and applies the module on the whole image.\n"
1777 "any higher value excludes pixels with luminance lower than the threshold.\n"
1778 "this can be used to inpaint highlights."));
1779}
1780// clang-format off
1781// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
1782// vim: shiftwidth=2 expandtab tabstop=2 cindent
1783// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
1784// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
void dt_bauhaus_slider_set_soft_range(GtkWidget *widget, float soft_min, float soft_max)
Definition bauhaus.c:1647
void dt_bauhaus_slider_set_digits(GtkWidget *widget, int val)
Definition bauhaus.c:3534
void dt_bauhaus_slider_set_format(GtkWidget *widget, const char *format)
Definition bauhaus.c:3598
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
@ DEVELOP_BLEND_CS_RGB_SCENE
Definition blend.h:60
#define B_SPLINE_SIGMA
Definition bspline.h:34
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:61
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:48
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:347
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
return vector dt_simd_set1(valid ?(scaling+NORM_MIN) :NORM_MIN)
@ IOP_CS_RGB
const dt_aligned_pixel_t f
const float threshold
const dt_colormatrix_t dt_aligned_pixel_t out
dt_store_simd_aligned(out, dt_mat3x4_mul_vec4(vin, dt_colormatrix_row_to_simd(matrix, 0), dt_colormatrix_row_to_simd(matrix, 1), dt_colormatrix_row_to_simd(matrix, 2)))
static const int row
static float strength(float value, float strength)
Definition colorzones.c:420
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
#define DT_ALIGNED_PIXEL
Definition darktable.h:389
@ DT_DEBUG_OPENCL
Definition darktable.h:722
#define DT_ALIGNED_ARRAY
Definition darktable.h:388
#define DT_IS_ALIGNED(x)
Definition darktable.h:371
#define for_each_channel(_var,...)
Definition darktable.h:662
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
#define dt_pixelpipe_cache_alloc_align(size, pipe)
Definition darktable.h:437
#define dt_free(ptr)
Definition darktable.h:456
#define DT_MODULE_INTROSPECTION(MODVER, PARAMSTYPE)
Definition darktable.h:151
#define __OMP_DECLARE_SIMD__(...)
Definition darktable.h:263
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define dt_pixelpipe_cache_alloc_align_float(pixels, pipe)
Definition darktable.h:442
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define for_four_channels(_var,...)
Definition darktable.h:664
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define __OMP_PARALLEL_FOR_SIMD__(...)
Definition darktable.h:259
#define dt_pixelpipe_cache_alloc_perthread_float(n, padded_size)
Definition darktable.h:1030
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static unsigned int splitmix32(const unsigned long seed)
static float xoshiro128plus(uint state[4])
void dt_iop_params_t
Definition dev_history.h:41
const char ** description(struct dt_iop_module_t *self)
Definition diffuse.c:176
int default_group()
Definition diffuse.c:189
static __DT_CLONE_TARGETS__ int wavelets_process(const float *const restrict in, float *const restrict reconstructed, const uint8_t *const restrict mask, const size_t width, const size_t height, const dt_iop_diffuse_data_t *const data, const float zoom, const int scales, const int has_mask, float *const restrict HF[10], float *const restrict LF_odd, float *const restrict LF_even)
Definition diffuse.c:968
__DT_CLONE_TARGETS__ int process(dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, const void *const restrict ivoid, void *const restrict ovoid)
Definition diffuse.c:1145
void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
Definition diffuse.c:136
dt_isotropy_t
Definition diffuse.c:145
@ DT_ISOTROPY_ISOPHOTE
Definition diffuse.c:147
@ DT_ISOTROPY_ISOTROPE
Definition diffuse.c:146
@ DT_ISOTROPY_GRADIENT
Definition diffuse.c:148
static float compute_anisotropy_factor(const float user_param)
Definition diffuse.c:945
const char * aliases()
Definition diffuse.c:171
static __DT_CLONE_TARGETS__ void build_mask(const float *const restrict input, uint8_t *const restrict mask, const float threshold, const size_t width, const size_t height)
Definition diffuse.c:1099
static __DT_CLONE_TARGETS__ void heat_PDE_diffusion(const float *const restrict high_freq, const float *const restrict low_freq, const uint8_t *const restrict mask, const int has_mask, float *const restrict output, const size_t width, const size_t height, const dt_aligned_pixel_simd_t anisotropy, const dt_isotropy_t isotropy_type[4], const float variance_threshold, const int mult, const float normalized_regularization, const dt_aligned_pixel_simd_t ABCD, const float strength, const int use_nontemporal)
Definition diffuse.c:750
const char * name()
Definition diffuse.c:166
void gui_init(struct dt_iop_module_t *self)
Definition diffuse.c:1637
void tiling_callback(struct dt_iop_module_t *self, const struct dt_dev_pixelpipe_t *pipe, const struct dt_dev_pixelpipe_iop_t *piece, struct dt_develop_tiling_t *tiling)
Definition diffuse.c:575
void cleanup_global(dt_iop_module_so_t *module)
Definition diffuse.c:1620
static cl_int wavelets_process_cl(const int devid, cl_mem in, cl_mem reconstructed, cl_mem mask, const size_t sizes[3], const int width, const int height, const dt_iop_diffuse_data_t *const data, dt_iop_diffuse_global_data_t *const gd, const float zoom, const int scales, const int has_mask, cl_mem HF[10], cl_mem LF_odd, cl_mem LF_even)
Definition diffuse.c:1251
int default_colorspace(dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece)
Definition diffuse.c:199
int flags()
Definition diffuse.c:194
void init_presets(dt_iop_module_so_t *self)
Definition diffuse.c:298
#define H
Definition diffuse.c:613
#define MAX_NUM_SCALES
Definition diffuse.c:77
static __DT_CLONE_TARGETS__ void init_reconstruct(float *const restrict reconstructed, const size_t width, const size_t height)
Definition diffuse.c:602
#define KAPPA
Definition diffuse.c:614
static dt_isotropy_t check_isotropy_mode(const float anisotropy)
Definition diffuse.c:153
void init_global(dt_iop_module_so_t *module)
Definition diffuse.c:1601
int process_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out)
Definition diffuse.c:1476
int legacy_params(dt_iop_module_t *self, const void *const old_params, const int old_version, void *new_params, const int new_version)
Definition diffuse.c:204
static __DT_CLONE_TARGETS__ void inpaint_mask(float *const restrict inpainted, const float *const restrict original, const uint8_t *const restrict mask, const size_t width, const size_t height)
Definition diffuse.c:1112
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93
static GtkWidget * dt_ui_section_label_new(const gchar *str)
Definition gtk.h:451
#define DT_GUI_BOX_SPACING
Definition gtk.h:109
void dt_gui_presets_add_generic(const char *name, dt_dev_operation_t op, const int32_t version, const void *params, const int32_t params_size, const int32_t enabled, const dt_develop_blend_colorspace_t blend_cst)
const char ** dt_iop_set_description(dt_iop_module_t *module, const char *main_text, const char *purpose, const char *input, const char *process, const char *output)
Definition imageop.c:3141
float dt_dev_get_module_scale(const dt_dev_pixelpipe_t *const pipe, const dt_iop_roi_t *const roi_in)
Definition imageop.c:131
#define dt_omploop_sfence()
Definition imageop.h:702
@ IOP_FLAGS_INCLUDE_IN_STYLES
Definition imageop.h:166
@ IOP_FLAGS_SUPPORTS_BLENDING
Definition imageop.h:167
@ IOP_FLAGS_ALLOW_TILING
Definition imageop.h:169
@ IOP_GROUP_SHARPNESS
Definition imageop.h:141
#define IOP_GUI_ALLOC(module)
Definition imageop.h:599
GtkWidget * dt_bauhaus_slider_from_params(dt_iop_module_t *self, const char *param)
Definition imageop_gui.c:77
void *const ovoid
static float kernel(const float *x, const float *y)
#define ABCD(A, B, C, D)
float *const restrict const size_t k
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:3156
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:2142
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
#define ROUNDUP(a, n)
Definition opencl.h:78
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
struct _GtkWidget GtkWidget
Definition splash.h:29
const float uint32_t state[4]
unsigned __int64 uint64_t
Definition strptime.c:75
struct dt_iop_module_t *void * data
GtkWidget * anisotropy_third
Definition diffuse.c:116
GtkWidget * anisotropy_first
Definition diffuse.c:116
GtkWidget * regularization
Definition diffuse.c:115
GtkWidget * regularization_first
Definition diffuse.c:116
GtkWidget * anisotropy_second
Definition diffuse.c:116
GtkWidget * radius_center
Definition diffuse.c:115
GtkWidget * variance_threshold
Definition diffuse.c:116
GtkWidget * anisotropy_fourth
Definition diffuse.c:116
GModule *dt_dev_operation_t op
Definition imageop.h:230
dt_iop_global_data_t * data
Definition imageop.h:233
dt_iop_params_t * default_params
Definition imageop.h:307
GtkWidget * widget
Definition imageop.h:337
dt_iop_global_data_t * global_data
Definition imageop.h:314
int32_t params_size
Definition imageop.h:309
Region of interest passed through the pixelpipe.
Definition imageop.h:72
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29