Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
heal.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2017 Edgardo Hoszowski.
4 Copyright (C) 2019 Andreas Schneider.
5 Copyright (C) 2020 Hubert Kowalski.
6 Copyright (C) 2020-2022 Pascal Obry.
7 Copyright (C) 2021 parafin.
8 Copyright (C) 2021-2022 Ralf Brown.
9 Copyright (C) 2022 Hanno Schwalm.
10 Copyright (C) 2022 Martin Bařinka.
11 Copyright (C) 2024 Alynx Zhou.
12 Copyright (C) 2026 Aurélien PIERRE.
13
14 darktable is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 darktable is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with darktable. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "common/darktable.h"
29#include "control/control.h"
30#include "develop/imageop.h"
32#include "heal.h"
33
34/* Based on the original source code of GIMP's Healing Tool, by Jean-Yves Couleaud
35 *
36 * http://www.gimp.org/
37 *
38 * */
39
40/* NOTES
41 *
42 * The method used here is similar to the lighting invariant correction
43 * method but slightly different: we do not divide the RGB components,
44 * but subtract them I2 = I0 - I1, where I0 is the sample image to be
45 * corrected, I1 is the reference pattern. Then we solve DeltaI=0
46 * (Laplace) with I2 Dirichlet conditions at the borders of the
47 * mask. The solver is a red/black checker Gauss-Seidel with over-relaxation.
48 * It could benefit from a multi-grid evaluation of an initial solution
49 * before the main iteration loop.
50 *
51 * I reduced the convergence criteria to 0.1% (0.001) as we are
52 * dealing here with RGB integer components, more is overkill.
53 *
54 * Jean-Yves Couleaud cjyves@free.fr
55 */
56
57
58// Subtract bottom from top and store in result as a float; separate 'red' and 'black' pixels into
59// two contiguous regions
60static void _heal_sub(const float *const top_buffer, const float *const bottom_buffer,
61 float *const restrict red_buffer, float *const restrict black_buffer,
62 const size_t width, const size_t height)
63{
64 // how many red or black pixels per line? For consistency, we need the larger of the two, so round up
65 const size_t res_stride = 4 * ((width + 1) / 2);
67 for(size_t row = 0; row < height; row++)
68 {
69 const int parity = row & 1;
70 const size_t row_start = (row+1) * res_stride;
71 float *const buf1 = parity ? red_buffer + row_start : black_buffer + row_start;
72 float *const buf2 = parity ? black_buffer + row_start : red_buffer + row_start;
73 // handle the pixels of the row pairwise, one red and one black at a time
74 for(size_t col = 0; col < width/2; col++)
75 {
76 const size_t idx = 4 * (row * width + 2*col);
78 {
79 buf1[4*col + c] = top_buffer[idx + c] - bottom_buffer[idx + c];
80 buf2[4*col + c] = top_buffer[idx+4 + c] - bottom_buffer[idx+4 + c];
81 }
82 }
83 if(width & 1)
84 {
85 // Handle the left-over pixel when the total width is odd. Its color will always be the same as the
86 // left-most pixel, so it goes into buf1
87 const size_t res_idx = (width-1)/2;
88 const size_t idx = 4 * (row * width + (width-1));
90 {
91 buf1[4*res_idx + c] = top_buffer[idx + c] - bottom_buffer[idx + c];
92 buf2[4*res_idx + c] = 0.0f;
93 }
94 }
95 }
96 // clear the top and bottom rows, used for padding
97 memset(red_buffer, 0, res_stride * sizeof(float));
98 memset(red_buffer + (height+1)*res_stride, 0, res_stride * sizeof(float));
99 memset(black_buffer, 0, res_stride * sizeof(float));
100 memset(black_buffer + (height+1)*res_stride, 0, res_stride * sizeof(float));
101}
102
103// Add first to second and store in result, re-interleaving the 'red' and 'black' pixels
104static void _heal_add(const float *const restrict red_buffer, const float *const black_buffer,
105 const float *const restrict second_buffer, float *const restrict result_buffer,
106 const size_t width, const size_t height)
107{
108 // how many red or black pixels per line? For consistency, we need the larger of the two, so round up, then
109 // add one to ensure a padding pixel on the right
110 const size_t res_stride = 4 * ((width + 1) / 2);
112 for(size_t row = 0; row < height; row++)
113 {
114 const int parity = row & 1;
115 const size_t row_start = (row+1) * res_stride;
116 const float *const restrict buf1 = parity ? red_buffer + row_start : black_buffer + row_start;
117 const float *const restrict buf2 = parity ? black_buffer + row_start : red_buffer + row_start;
118 // handle the pixels of the row pairwise, one red and one black at a time
119 for(size_t col = 0; col < width/2; col++)
120 {
121 const size_t idx = 4 * (row * width + 2*col);
123 {
124 result_buffer[idx + c] = buf1[4*col + c] + second_buffer[idx + c];
125 result_buffer[idx + 4 + c] = buf2[4*col + c] + second_buffer[idx + 4 + c];
126 }
127 }
128 if(width & 1)
129 {
130 // handle the left-over pixel when the total width is odd
131 const size_t res_idx = (width-1)/2;
132 const size_t idx = 4 * (row * width + (width-1));
134 result_buffer[idx + c] = buf1[4*res_idx + c] + second_buffer[idx + c];
135 }
136 }
137}
138
139// define a custom reduction operation to handle a 3-vector of floats
140// we can't return an array from a function, so wrap the array type in a struct
142#ifdef _OPENMP
143static inline _aligned_pixel _add_float4(_aligned_pixel acc, _aligned_pixel newval)
144{
145 for_each_channel(c) acc.v[c] += newval.v[c];
146 return acc;
147}
148#pragma omp declare reduction(vsum:_aligned_pixel:omp_out=_add_float4(omp_out,omp_in)) \
149 initializer(omp_priv = { { 0.0f, 0.0f, 0.0f, 0.0f } })
150#endif
151
152// Perform one iteration of Gauss-Seidel, and return the sum squared residual.
153static float _heal_laplace_iteration(float *const restrict active_pixels,
154 const float *const restrict neighbor_pixels,
155 const size_t height, const size_t width, const unsigned *const restrict runs,
156 const size_t num_runs, const size_t start_parity, const float w)
157{
158 _aligned_pixel err = { { 0.f } };
159
160 // on each iteration, we adjust each cell of the current color by a weighted fraction of the difference between
161 // it and the sum of the adjacenct cells of the opposite color. Because we've split the cells by color, this
162 // leads to a somewhat different computation of neighbors.
163 // Rearranged, as provided to this function:
164 // r00 r01 r02 ... b00 b01 b02 ...
165 // r10 r11 r12 b10 b11 b12
166 // r20 r21 r22 b20 b21 b22
167 // ... ...
168 // Original layout, from which we need to get the neighbors:
169 // r00 b00 r01 b01 r02 b02 ...
170 // b10 r10 b11 r11 b12 r12
171 // r20 b20 r21 b21 r22 b22
172 // b30 r30 b31 r31 b32 r32
173 // ...
174 // As can be seen, the 'above' and 'below' neighbors of r(i)(j) are always b(i-1)(j) and b(i+1)(j). The
175 // left and right neighbors depend on which color the row starts with: if red, they are b(i)(j-1) and b(i)(j);
176 // if black, they are b(i)(j) and b(i)(j+1). All of the above holds when colors are swapped.
177#if !(defined(__apple_build_version__) && __apple_build_version__ < 11030000) //makes Xcode 11.3.1 compiler crash
178__OMP_PARALLEL_FOR__(reduction(vsum : err)) /* _OPENMP */
179#endif
180 for(size_t i = 0; i < num_runs; i++)
181 {
182 const size_t idx = runs[2*i];
183 const unsigned count = runs[2*i+1];
184 const size_t index = (size_t)4 * idx;
185 const size_t row = idx / width;
186 float a = 4.0f; // four neighboring pixels except at the edges
187 if(row == 1) a -= 1.0f; // we added a padding row at top and bottom
188 if(row == height) a -= 1.0f;
189 const size_t vert_offset = 4 * width;
190 const size_t lroffset = 4 * (start_parity ^ (row & 1)); // how many floats to offset the left/right neighbors
191 if(count == 1)
192 {
193 const size_t col = idx % width;
194 float aa = a;
195 dt_aligned_pixel_t left = { 0.0f };
196 dt_aligned_pixel_t right = { 0.0f };
197 if(col > 0 || lroffset) // first pixel in original stamp?
198 for_each_channel(c) left[c] = neighbor_pixels[index - 4 + lroffset + c];
199 else
200 aa -= 1.0f;
201 if(col + 1 < width || lroffset == 0) // last pixel in original stamp?
202 for_each_channel(c) right[c] = neighbor_pixels[index + lroffset + c];
203 else
204 aa -= 1.0f;
205
207 for_each_channel(c, aligned(active_pixels, neighbor_pixels))
208 {
209 diff[c] = w * ((aa * active_pixels[index+c])
210 - (neighbor_pixels[index - vert_offset + c] + neighbor_pixels[index + vert_offset + c]
211 + left[c] + right[c]));
212 active_pixels[index + c] -= diff[c];
213 err.v[c] += (diff[c] * diff[c]);
214 }
215 continue;
216 }
218 copy_pixel(left, neighbor_pixels + index - 4 + lroffset);
219 for(size_t j = 0; j < count; j++)
220 {
221 const size_t pixidx = index + 4*j;
223 dt_aligned_pixel_t right;
224 for_each_channel(c, aligned(active_pixels,neighbor_pixels))
225 {
226 right[c] = neighbor_pixels[pixidx + lroffset + c];
227 diff[c] = w * (a * active_pixels[pixidx+c]
228 - (neighbor_pixels[pixidx - vert_offset + c] + neighbor_pixels[pixidx + vert_offset + c]
229 + left[c] + right[c]));
230 active_pixels[pixidx + c] -= diff[c];
231 err.v[c] += (diff[c] * diff[c]);
232 left[c] = right[c];
233 }
234 }
235 }
236 return err.v[0] + err.v[1] + err.v[2];
237}
238
239// convert alternating pixels of one row of the opacity mask into a set of runs of opaque pixels of the
240// form (start_index, count), and return the updated number of runs
241static size_t _collect_color_runs(const float *const restrict mask, const size_t start_index,
242 size_t start, const size_t width,
243 unsigned *const restrict runs, size_t count, size_t *nmask)
244{
245 size_t masked = 0;
246 // handle the first and last pixels of the row specially so that we don't need checks on every pixel in the
247 // adaptation loop (which will be run hundreds of times for any stamp large enough that execution time is
248 // non-negligible)
249 if(start == 0 && mask[start])
250 {
251 runs[2*count] = start_index;
252 runs[2*count+1] = 1;
253 count++;
254 masked++;
255 start += 2; // we've processed the first pixel
256 }
257 gboolean in_run = FALSE;
258 unsigned run_start = 0;
259 size_t col;
260 for(col = start; col < width; col += 2)
261 {
262 if(mask[col])
263 {
264 masked++;
265 if(!in_run)
266 {
267 run_start = col;
268 in_run = TRUE;
269 }
270 }
271 else if(in_run)
272 {
273 runs[2*count] = start_index + run_start / 2;
274 runs[2*count + 1] = (col - run_start) / 2;
275 count++;
276 in_run = FALSE;
277 }
278 }
279 if(in_run) // finish off a run that doesn't have a zero pixel to its right
280 {
281 runs[2*count] = start_index + run_start / 2;
282 const unsigned runlen = (col - run_start) / 2;
283 runs[2*count + 1] = runlen;
284 if(runlen > 1 && col > width)
285 {
286 // split off the final pixel into its own run
287 runs[2*count + 1]--;
288 runs[2*count + 2] = runs[2*count] + runs[2*count+1];
289 runs[2*count + 3] = 1;
290 count++;
291 }
292 count++;
293 }
294 *nmask += masked;
295 return count;
296}
297
298// convert one row of the opacity mask into a set of runs of opaque pixels of the form (start_index, count)
299static void collect_runs(const int start, const float *const restrict mask, const size_t width, const size_t height,
300 const size_t subwidth, unsigned *const restrict runs, size_t *count, size_t *nmask)
301{
302 for(size_t row = 0; row < height; row++)
303 {
304 const int parity = start ^ (row & 1);
305 const size_t index = (row + 1) * subwidth;
306 const size_t mask_index = row * width;
307 *count = _collect_color_runs(mask + mask_index, index, parity, width, runs, *count, nmask);
308 }
309}
310
311// Solve the laplace equation for pixels and store the result in-place.
312static void _heal_laplace_loop(float *const restrict red_pixels, float *const restrict black_pixels,
313 const size_t width, const size_t height,
314 const float *const restrict mask, const int max_iter)
315{
316 // we start by converting the opacity mask into runs of nonzero positions, handling the 'red' and 'black'
317 // checkerboarded pixels separately
318 // the worst case is when consecutive red pixels alternate between being in the mask and out (same for black),
319 // in which case we will need exactly as many values in the run-length encoding as pixels; any other
320 // arrangement will yield fewer runs. For any mask a user would have the patience to draw, there will only be
321 // a handful of runs in each row of pixels.
322 // Note that using `unsigned` instead of size_t, the stamp is limited to ~8 gigapixels (the main image can be larger)
323 const size_t subwidth = (width+1)/2; // round up to be able to handle odd widths
324 unsigned *const restrict red_runs = dt_pixelpipe_cache_alloc_align_cache(
325 sizeof(unsigned) * subwidth * (height + 2),
326 0);
327 unsigned *const restrict black_runs = dt_pixelpipe_cache_alloc_align_cache(
328 sizeof(unsigned) * subwidth * (height + 2),
329 0);
330 if(IS_NULL_PTR(red_runs) || IS_NULL_PTR(black_runs))
331 {
332 fprintf(stderr, "_heal_laplace_loop: error allocating memory for healing\n");
333 goto cleanup;
334 }
335
336 size_t num_red = 0;
337 size_t num_black = 0;
338 size_t nmask_red = 0;
339 size_t nmask_black = 0;
340
341#ifdef _OPENMP
342#pragma omp parallel sections
343#endif
344 {
345 collect_runs(1, mask, width, height, subwidth, red_runs, &num_red, &nmask_red);
346 #ifdef _OPENMP
347 #pragma omp section
348 #endif
349 collect_runs(0, mask, width, height, subwidth, black_runs, &num_black, &nmask_black);
350 }
351 const size_t nmask = nmask_red + nmask_black;
352
353 /* Empirically optimal over-relaxation factor. (Benchmarked on
354 * round brushes, at least. I don't know whether aspect ratio
355 * affects it.)
356 */
357 const float w = ((2.0f - 1.0f / (0.1575f * sqrtf(nmask) + 0.8f)) * .25f);
358
359 const float epsilon = (0.1 / 255);
360 const float err_exit = epsilon * epsilon * w * w;
361
362 /* Gauss-Seidel with successive over-relaxation */
363 for(int iter = 0; iter < max_iter; iter++)
364 {
365 // process red/black cells separately
366 float err = _heal_laplace_iteration(black_pixels, red_pixels, height, subwidth, black_runs, num_black, 1, w);
367 err += _heal_laplace_iteration(red_pixels, black_pixels, height, subwidth, red_runs, num_red, 0, w);
368
369 if(err < err_exit) break;
370 }
371
372cleanup:
375}
376
377
378/* Original Algorithm Design:
379 *
380 * T. Georgiev, "Photoshop Healing Brush: a Tool for Seamless Cloning
381 * http://www.tgeorgiev.net/Photoshop_Healing.pdf
382 */
383void dt_heal(const float *const src_buffer, float *dest_buffer, const float *const mask_buffer, const int width,
384 const int height, const int ch, const int max_iter)
385{
386 if(ch != 4)
387 {
388 fprintf(stderr,"dt_heal: full-color image required\n");
389 return;
390 }
391 const size_t subwidth = 4 * ((width+1)/2); // round up to be able to handle odd widths
392 float *const restrict red_buffer = dt_pixelpipe_cache_alloc_align_float_cache(subwidth * (height + 2), 0);
393 float *const restrict black_buffer = dt_pixelpipe_cache_alloc_align_float_cache(subwidth * (height + 2), 0);
394 if(IS_NULL_PTR(red_buffer) || IS_NULL_PTR(black_buffer))
395 {
396 fprintf(stderr, "dt_heal: error allocating memory for healing\n");
397 goto cleanup;
398 }
399
400 /* subtract pattern from image and store the result split by 'red' and 'black' positions */
401 _heal_sub(dest_buffer, src_buffer, red_buffer, black_buffer, width, height);
402
403 _heal_laplace_loop(red_buffer, black_buffer, width, height, mask_buffer, max_iter);
404
405 /* add solution to original image and store in dest */
406 _heal_add(red_buffer, black_buffer, src_buffer, dest_buffer, width, height);
407
408cleanup:
410 dt_pixelpipe_cache_free_align(black_buffer);
411}
412
413#ifdef HAVE_OPENCL
414
421
423{
424 if(IS_NULL_PTR(g)) return;
425
426 dt_free(g);
427}
428
430{
431
433 if(IS_NULL_PTR(p)) return NULL;
434
436 p->devid = devid;
437
438 return p;
439}
440
442{
443 if(IS_NULL_PTR(p)) return;
444 dt_free(p);
445}
446
447cl_int dt_heal_cl(heal_params_cl_t *p, cl_mem dev_src, cl_mem dev_dest, const float *const mask_buffer,
448 const int width, const int height, const int max_iter)
449{
450 cl_int err = CL_SUCCESS;
451
452 const int ch = 4;
453
454 float *src_buffer = NULL;
455 float *dest_buffer = NULL;
456
457 src_buffer = dt_pixelpipe_cache_alloc_align_float_cache((size_t)ch * width * height, 0);
458 if(IS_NULL_PTR(src_buffer))
459 {
460 fprintf(stderr, "dt_heal_cl: error allocating memory for healing\n");
462 goto cleanup;
463 }
464
465 dest_buffer = dt_pixelpipe_cache_alloc_align_float_cache((size_t)ch * width * height, 0);
466 if(IS_NULL_PTR(dest_buffer))
467 {
468 fprintf(stderr, "dt_heal_cl: error allocating memory for healing\n");
470 goto cleanup;
471 }
472
473 err = dt_opencl_read_buffer_from_device(p->devid, (void *)src_buffer, dev_src, 0,
474 (size_t)width * height * ch * sizeof(float), CL_TRUE);
475 if(err != CL_SUCCESS)
476 {
477 goto cleanup;
478 }
479
480 err = dt_opencl_read_buffer_from_device(p->devid, (void *)dest_buffer, dev_dest, 0,
481 (size_t)width * height * ch * sizeof(float), CL_TRUE);
482 if(err != CL_SUCCESS)
483 {
484 goto cleanup;
485 }
486
487 // I couldn't make it run fast on opencl (the reduction takes forever), so just call the cpu version
488 dt_heal(src_buffer, dest_buffer, mask_buffer, width, height, ch, max_iter);
489
490 err = dt_opencl_write_buffer_to_device(p->devid, dest_buffer, dev_dest, 0, sizeof(float) * width * height * ch, CL_TRUE);
491 if(err != CL_SUCCESS)
492 {
493 goto cleanup;
494 }
495
496cleanup:
499
500 return err;
501}
502
503#endif
504// clang-format off
505// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
506// vim: shiftwidth=2 expandtab tabstop=2 cindent
507// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
508// clang-format on
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
void cleanup(dt_imageio_module_format_t *self)
Definition avif.c:164
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
static const int row
darktable_t darktable
Definition darktable.c:181
static void copy_pixel(float *const __restrict__ out, const float *const __restrict__ in)
Definition darktable.h:688
#define for_each_channel(_var,...)
Definition darktable.h:662
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
#define dt_pixelpipe_cache_alloc_align_cache(size, id)
Definition darktable.h:433
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
void dt_heal_free_cl_global(dt_heal_cl_global_t *g)
Definition heal.c:422
static size_t _collect_color_runs(const float *const restrict mask, const size_t start_index, size_t start, const size_t width, unsigned *const restrict runs, size_t count, size_t *nmask)
Definition heal.c:241
void dt_heal_free_cl(heal_params_cl_t *p)
Definition heal.c:441
void dt_heal(const float *const src_buffer, float *dest_buffer, const float *const mask_buffer, const int width, const int height, const int ch, const int max_iter)
Definition heal.c:383
static void collect_runs(const int start, const float *const restrict mask, const size_t width, const size_t height, const size_t subwidth, unsigned *const restrict runs, size_t *count, size_t *nmask)
Definition heal.c:299
dt_heal_cl_global_t * dt_heal_init_cl_global()
Definition heal.c:415
cl_int dt_heal_cl(heal_params_cl_t *p, cl_mem dev_src, cl_mem dev_dest, const float *const mask_buffer, const int width, const int height, const int max_iter)
Definition heal.c:447
heal_params_cl_t * dt_heal_init_cl(const int devid)
Definition heal.c:429
static void _heal_add(const float *const restrict red_buffer, const float *const black_buffer, const float *const restrict second_buffer, float *const restrict result_buffer, const size_t width, const size_t height)
Definition heal.c:104
static void _heal_laplace_loop(float *const restrict red_pixels, float *const restrict black_pixels, const size_t width, const size_t height, const float *const restrict mask, const int max_iter)
Definition heal.c:312
static void _heal_sub(const float *const top_buffer, const float *const bottom_buffer, float *const restrict red_buffer, float *const restrict black_buffer, const size_t width, const size_t height)
Definition heal.c:60
static float _heal_laplace_iteration(float *const restrict active_pixels, const float *const restrict neighbor_pixels, const size_t height, const size_t width, const unsigned *const restrict runs, const size_t num_runs, const size_t start_parity, const float w)
Definition heal.c:153
float *const restrict const size_t const size_t ch
float dt_aligned_pixel_t[4]
int dt_opencl_write_buffer_to_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
Definition opencl.c:2320
int dt_opencl_read_buffer_from_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
Definition opencl.c:2309
#define DT_OPENCL_SYSMEM_ALLOCATION
Definition opencl.h:58
dt_aligned_pixel_t v
Definition eaw.c:200
struct dt_opencl_t * opencl
Definition darktable.h:785
struct dt_heal_cl_global_t * heal
Definition opencl.h:269
dt_heal_cl_global_t * global
Definition heal.h:40