Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
dwt.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2017 Edgardo Hoszowski.
4 Copyright (C) 2019, 2021 Andreas Schneider.
5 Copyright (C) 2019, 2025-2026 Aurélien PIERRE.
6 Copyright (C) 2019 luzpaz.
7 Copyright (C) 2020 Heiko Bauke.
8 Copyright (C) 2020-2021 Hubert Kowalski.
9 Copyright (C) 2020 Pascal Obry.
10 Copyright (C) 2020-2021 Ralf Brown.
11 Copyright (C) 2022 Hanno Schwalm.
12 Copyright (C) 2022 Martin Bařinka.
13
14 darktable is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 darktable is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with darktable. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "common/darktable.h"
29#include "common/imagebuf.h"
30#include "control/control.h"
31#include "develop/imageop.h"
32#include "dwt.h"
33
34/* Based on the original source code of GIMP's Wavelet Decompose plugin, by Marco Rossini
35 *
36 * http://registry.gimp.org/node/11742
37 *
38*/
39
40dwt_params_t *dt_dwt_init(float *image, const int width, const int height, const int ch, const int scales,
41 const int return_layer, const int merge_from_scale, void *user_data,
42 const float preview_scale, const int use_sse)
43{
44 dwt_params_t *p = (dwt_params_t *)malloc(sizeof(dwt_params_t));
45 if(IS_NULL_PTR(p)) return NULL;
46
47 p->image = image;
48 p->ch = ch;
49 p->width = width;
50 p->height = height;
51 p->scales = scales;
52 p->return_layer = return_layer;
53 p->merge_from_scale = merge_from_scale;
54 p->user_data = user_data;
55 p->preview_scale = preview_scale;
56 p->use_sse = use_sse;
57
58 return p;
59}
60
62{
63 if(IS_NULL_PTR(p)) return;
64
65 dt_free(p);
66}
67
69static int _get_max_scale(const int width, const int height, const float preview_scale)
70{
71 int maxscale = 0;
72
73 // smallest edge must be higher than or equal to 2^scales
74 unsigned int size = MIN(width, height);
75 float size_tmp = ((size >>= 1) * preview_scale);
76 while(size_tmp > 0.f)
77 {
78 size_tmp = ((size >>= 1) * preview_scale);
79 maxscale++;
80 }
81
82 // avoid rounding issues...
83 size = MIN(width, height);
84 while((maxscale > 0) && ((1 << maxscale) * preview_scale >= size)) maxscale--;
85
86 return maxscale;
87}
88
90{
91 return _get_max_scale(p->width / p->preview_scale, p->height / p->preview_scale, p->preview_scale);
92}
93
95static int _first_scale_visible(const int num_scales, const float preview_scale)
96{
97 int first_scale = 0;
98
99 for(unsigned int lev = 0; lev < num_scales; lev++)
100 {
101 int sc = 1 << lev;
102 sc *= preview_scale;
103 if(sc > 0)
104 {
105 first_scale = lev + 1;
106 break;
107 }
108 }
109
110 return first_scale;
111}
112
114{
115 return _first_scale_visible(p->scales, p->preview_scale);
116}
117
118static inline __attribute__((always_inline)) void dwt_get_image_layer(float *const layer, dwt_params_t *const p)
119{
120 if(p->image != layer) memcpy(p->image, layer, sizeof(float) * p->width * p->height * p->ch);
121}
122
123// first, "vertical" pass of wavelet decomposition
125static void dwt_decompose_vert(float *const restrict out, const float *const restrict in,
126 const size_t height, const size_t width, const size_t lev)
127{
128 const size_t vscale = MIN(1 << lev, height-1);
130 for(int rowid = 0; rowid < height ; rowid++)
131 {
132 const size_t row = dwt_interleave_rows(rowid,height,vscale);
133 // perform a weighted sum of the current pixel row with the rows 'scale' pixels above and below
134 // if either of those is beyond the edge of the image, we use reflection to get a value for averaging,
135 // i.e. we move as many rows in from the edge as we would have been beyond the edge
136 // for the top edge, this means we can simply use the absolute value of row-vscale; for the bottom edge,
137 // we need to reflect around height
138 const size_t rowstart = (size_t)4 * row * width;
139 const size_t above_row = (row > vscale) ? row - vscale : vscale - row;
140 const size_t below_row = (row + vscale < height) ? (row + vscale) : 2*(height-1) - (row + vscale);
141 const float* const restrict center = in + rowstart;
142 const float* const restrict above = in + 4 * above_row * width;
143 const float* const restrict below = in + 4 * below_row * width;
144 float* const restrict temprow = out + rowstart;
145 for (size_t col = 0; col < 4*width; col += 4)
146 {
147 for_each_channel(c,aligned(center, above, below, temprow : 16))
148 {
149 temprow[col + c] = 2.f * center[col+c] + above[col+c] + below[col+c];
150 }
151 }
152 }
153}
154
155// second, horizontal pass of wavelet decomposition; generates 'coarse' into the output buffer and overwrites
156// the input buffer with 'details'
158static void dwt_decompose_horiz(float *const restrict out, float *const restrict in, float *const temp,
159 const size_t height, const size_t width, const size_t lev)
160{
161 const int hscale = MIN(1 << lev, width); //(int because we need a signed difference below)
163 for(int row = 0; row < height ; row++)
164 {
165 // perform a weighted sum of the current pixel with the ones 'scale' pixels to the left and right, using
166 // reflection to get a value if either of those positions is out of bounds, i.e. we move as many columns
167 // in from the edge as we would have been beyond the edge to avoid an additional pass, we also rescale the
168 // final sum and split the original input into 'coarse' and 'details' by subtracting the scaled sum from
169 // the original input.
170 const size_t rowindex = (size_t)4 * (row * width);
171 float* const restrict temprow = temp + width * dt_get_thread_num() * 4;
172 float* const restrict details = in + rowindex;
173 float* const restrict coarse = out + rowindex;
174
175 for (int col = 0; col < width - hscale; col++)
176 {
177 const size_t leftpos = (size_t)4*abs(col-hscale); // the abs() handles reflection at the left edge
178 const size_t rightpos = (size_t)4*(col+hscale);
179 for_each_channel(c,aligned(temprow, details, coarse : 16))
180 {
181 const float left = coarse[leftpos+c];
182 const float right = coarse[rightpos+c];
183 // add up left/center/right, and renormalize by dividing by the total weight of all numbers added together
184 const float hat = (2.f * coarse[4*col+c] + left + right) / 16.f;
185 // the normalized value is our 'coarse' result; 'details' is the difference between original input and 'coarse'
186 temprow[4*col+c] = hat;
187 details[4*col+c] -= hat;
188 }
189 }
190 // handle reflection at right edge
191 for (int col = width - hscale; col < width; col++)
192 {
193 const size_t leftpos = (size_t)4 * abs(col-hscale); // still need to handle reflection, if hscale>=width/2
194 const size_t rightpos = (size_t)4 * (2*width - 2 - (col+hscale));
195 for_each_channel(c,aligned(temprow, details, coarse : 16))
196 {
197 const float left = coarse[leftpos+c];
198 const float right = coarse[rightpos+c];
199 // add up left/center/right, and renormalize by dividing by the total weight of all numbers added together
200 const float hat = (2.f * coarse[4*col+c] + left + right) / 16.f;
201 // the normalized value is our 'coarse' result; 'details' is the difference between original input and 'coarse'
202 temprow[4*col+c] = hat;
203 details[4*col+c] -= hat;
204 }
205 }
206 // now that we're done with the row of pixels, we can overwrite the intermediate result from the
207 // first pass with the final decomposition
208 memcpy(coarse, temprow, sizeof(float) * 4 * width);
209 }
210}
211
212// split input into 'coarse' and 'details'; put 'details' back into the input buffer
213static inline __attribute__((always_inline)) void dwt_decompose_layer(float *const restrict out, float *const restrict in, float *const temp, const int lev,
214 const dwt_params_t *const p)
215{
216 dwt_decompose_vert(out, in, p->height, p->width, lev);
217 dwt_decompose_horiz(out, in, temp, p->height, p->width, lev);
218 return;
219}
220
221/* actual decomposing algorithm */
223static int dwt_wavelet_decompose(float *img, dwt_params_t *const p, _dwt_layer_func layer_func)
224{
225 float *temp = NULL;
226 float *layers = NULL;
227 float *merged_layers = NULL;
228 float *buffer[2] = { 0, 0 };
229 int bcontinue = 1;
230 int err = 0;
231 const size_t size = (size_t)p->width * p->height * p->ch;
232
233 assert(p->ch == 4);
234
235 if(layer_func && layer_func(img, p, 0) != 0) return 1;
236
237 if(p->scales <= 0) goto cleanup;
238
239 /* image buffers */
240 buffer[0] = img;
241 /* temporary storage */
243 // buffer to reconstruct the image
244 layers = dt_pixelpipe_cache_alloc_align_float_cache((size_t)4 * p->width * p->height, 0);
245 // scratch buffer for decomposition
247
248 if(buffer[1] == NULL || IS_NULL_PTR(layers) || IS_NULL_PTR(temp))
249 {
250 printf("not enough memory for wavelet decomposition");
251 err = 1;
252 goto cleanup;
253 }
254 dt_iop_image_fill(layers,0.0f,p->width,p->height,p->ch);
255
256 if(p->merge_from_scale > 0)
257 {
258 merged_layers = dt_pixelpipe_cache_alloc_align_float_cache((size_t)p->width * p->height * p->ch, 0);
259 if(IS_NULL_PTR(merged_layers))
260 {
261 printf("not enough memory for wavelet decomposition");
262 err = 1;
263 goto cleanup;
264 }
265 dt_iop_image_fill(merged_layers,0.0f,p->width,p->height,p->ch);
266 }
267
268 // iterate over wavelet scales
269 unsigned int hpass = 0;
270 for(unsigned int lev = 0; lev < p->scales && bcontinue; lev++)
271 {
272 unsigned int lpass = (1 - (lev & 1));
273
274 dwt_decompose_layer(buffer[lpass], buffer[hpass], temp, lev, p);
275
276 // no merge scales or we didn't reach the merge scale from yet
277 if(p->merge_from_scale == 0 || p->merge_from_scale > lev + 1)
278 {
279 // allow to process this detail scale
280 if(layer_func && layer_func(buffer[hpass], p, lev + 1) != 0)
281 {
282 err = 1;
283 goto cleanup;
284 }
285
286 // user wants to preview this detail scale
287 if(p->return_layer == lev + 1)
288 {
289 // return this detail scale
290 dwt_get_image_layer(buffer[hpass], p);
291
292 bcontinue = 0;
293 }
294 // user wants the entire reconstructed image
295 else if(p->return_layer == 0)
296 {
297 // add this detail scale to the final image
298 dt_iop_image_add_image(layers, buffer[hpass], p->width, p->height, p->ch);
299 }
300 }
301 // we are on the merge scales range
302 else
303 {
304 // add this detail scale to the merged ones
305 dt_iop_image_add_image(merged_layers, buffer[hpass], p->width, p->height, p->ch);
306
307 // allow to process this merged scale
308 if(layer_func && layer_func(merged_layers, p, lev + 1) != 0)
309 {
310 err = 1;
311 goto cleanup;
312 }
313
314 // user wants to preview this merged scale
315 if(p->return_layer == lev + 1)
316 {
317 // return this merged scale
318 dwt_get_image_layer(merged_layers, p);
319
320 bcontinue = 0;
321 }
322 }
323
324 hpass = lpass;
325 }
326
327 // all scales have been processed
328 if(bcontinue)
329 {
330 // allow to process residual image
331 if(layer_func && layer_func(buffer[hpass], p, p->scales + 1) != 0)
332 {
333 err = 1;
334 goto cleanup;
335 }
336
337 // user wants to preview residual image
338 if(p->return_layer == p->scales + 1)
339 {
340 // return residual image
341 dwt_get_image_layer(buffer[hpass], p);
342 }
343 // return reconstructed image
344 else if(p->return_layer == 0)
345 {
346 // some of the detail scales are on the merged layers
347 if(p->merge_from_scale > 0)
348 {
349 // add merged layers to final image
350 dt_iop_image_add_image(layers, merged_layers, p->width, p->height, p->ch);
351 }
352
353 // add residual image to final image
354 dt_iop_image_add_image(layers, buffer[hpass], p->width, p->height, p->ch);
355
356 // allow to process reconstructed image
357 if(layer_func && layer_func(layers, p, p->scales + 2) != 0)
358 {
359 err = 1;
360 goto cleanup;
361 }
362
363 // return reconstructed image
364 dwt_get_image_layer(layers, p);
365 }
366 }
367
368cleanup:
371 dt_pixelpipe_cache_free_align(merged_layers);
373 return err;
374}
375
376/* this function prepares for decomposing, which is done in the function dwt_wavelet_decompose() */
378{
379 // this is a zoom scale, not a wavelet scale
380 if(p->preview_scale <= 0.f) p->preview_scale = 1.f;
381
382 // if a single scale is requested it cannot be grather than the residual
383 if(p->return_layer > p->scales + 1)
384 {
385 p->return_layer = p->scales + 1;
386 }
387
388 const int max_scale = dwt_get_max_scale(p);
389
390 // if requested scales is grather than max scales adjust it
391 if(p->scales > max_scale)
392 {
393 // residual should be returned
394 if(p->return_layer > p->scales) p->return_layer = max_scale + 1;
395 // a scale should be returned, it cannot be grather than max scales
396 else if(p->return_layer > max_scale)
397 p->return_layer = max_scale;
398
399 p->scales = max_scale;
400 }
401
402 // call the actual decompose
403 return dwt_wavelet_decompose(p->image, p, layer_func);
404}
405
406// first, "vertical" pass of wavelet decomposition
408static void dwt_denoise_vert_1ch(float *const restrict out, const float *const restrict in,
409 const size_t height, const size_t width, const size_t lev)
410{
411 const int vscale = MIN(1 << lev, height);
413 for(int rowid = 0; rowid < height ; rowid++)
414 {
415 const int row = dwt_interleave_rows(rowid,height,vscale);
416 // perform a weighted sum of the current pixel row with the rows 'scale' pixels above and below
417 // if either of those is beyond the edge of the image, we use reflection to get a value for averaging,
418 // i.e. we move as many rows in from the edge as we would have been beyond the edge
419 // for the top edge, this means we can simply use the absolute value of row-vscale; for the bottom edge,
420 // we need to reflect around height
421 const size_t rowstart = (size_t)row * width;
422 const size_t below_row = (row + vscale < height) ? (row + vscale) : 2*(height-1) - (row + vscale);
423 const float *const restrict center = in + rowstart;
424 const float *const restrict above = in + abs(row - vscale) * width;
425 const float *const restrict below = in + below_row * width;
426 float* const restrict outrow = out + rowstart;
428 for (int col= 0; col < width; col++)
429 {
430 outrow[col] = 2.f * center[col] + above[col] + below[col];
431 }
432 }
433}
434
435// second, horizontal pass of wavelet decomposition; generates 'coarse' into the output buffer and overwrites
436// the input buffer with 'details'
438static void dwt_denoise_horiz_1ch(float *const restrict out, float *const restrict in,
439 float *const restrict accum, const size_t height, const size_t width,
440 const size_t lev, const float thold, const int last)
441{
442 const int hscale = MIN(1 << lev, width);
444 for(int row = 0; row < height ; row++)
445 {
446 // perform a weighted sum of the current pixel with the ones 'scale' pixels to the left and right, using
447 // reflection to get a value if either of those positions is out of bounds, i.e. we move as many columns
448 // in from the edge as we would have been beyond the edge to avoid an additional pass, we also rescale the
449 // final sum and split the original input into 'coarse' and 'details' by subtracting the scaled sum from
450 // the original input.
451 const size_t rowindex = (size_t)row * width;
452 float *const restrict details = in + rowindex;
453 float *const restrict coarse = out + rowindex;
454 float *const restrict accum_row = accum + rowindex;
455 // handle reflection at left edge
457 for (int col = 0; col < hscale; col++)
458 {
459 // add up left/center/right, and renormalize by dividing by the total weight of all numbers added together
460 const float hat = (2.f * coarse[col] + coarse[hscale-col] + coarse[col+hscale]) / 16.f;
461 // the normalized value is our 'coarse' result; 'diff' is the difference between original input and 'coarse'
462 // (which would ordinarily be stored as the details scale, but we don't need it any further)
463 const float diff = details[col] - hat;
464 details[col] = hat; // done with original input, so we can overwrite it with 'coarse'
465 // GCC8 won't vectorize if we use the following line, but it turns out that just adding the two conditional
466 // alternatives produces exactly the same result, and *that* does get vectorized
467 //const float excess = diff < 0.0 ? MIN(diff + thold, 0.0f) : MAX(diff - thold, 0.0f);
468 accum_row[col] += MAX(diff - thold,0.0f) + MIN(diff + thold, 0.0f);
469 }
471 for (int col = hscale; col < width - hscale; col++)
472 {
473 // add up left/center/right, and renormalize by dividing by the total weight of all numbers added together
474 const float hat = (2.f * coarse[col] + coarse[col-hscale] + coarse[col+hscale]) / 16.f;
475 // the normalized value is our 'coarse' result; 'diff' is the difference between original input and 'coarse'
476 // (which would ordinarily be stored as the details scale, but we don't need it any further)
477 const float diff = details[col] - hat;
478 details[col] = hat; // done with original input, so we can overwrite it with 'coarse'
479 // GCC8 won't vectorize if we use the following line, but it turns out that just adding the two conditional
480 // alternatives produces exactly the same result, and *that* does get vectorized
481 //const float excess = diff < 0.0 ? MIN(diff + thold, 0.0f) : MAX(diff - thold, 0.0f);
482 accum_row[col] += MAX(diff - thold,0.0f) + MIN(diff + thold, 0.0f);
483 }
484 // handle reflection at right edge
486 for (int col = width - hscale; col < width; col++)
487 {
488 const float right = coarse[2*width - 2 - (col+hscale)];
489 // add up left/center/right, and renormalize by dividing by the total weight of all numbers added together
490 const float hat = (2.f * coarse[col] + coarse[col-hscale] + right) / 16.f;
491 // the normalized value is our 'coarse' result; 'diff' is the difference between original input and 'coarse'
492 // (which would ordinarily be stored as the details scale, but we don't need it any further)
493 const float diff = details[col] - hat;
494 details[col] = hat; // done with original input, so we can overwrite it with 'coarse'
495 accum_row[col] += MAX(diff - thold,0.0f) + MIN(diff + thold, 0.0f);
496 }
497 if (last)
498 {
499 // add the details to the residue to create the final denoised result
500 for (int col = 0; col < width; col++)
501 {
502 details[col] += accum_row[col];
503 }
504 }
505 }
506}
507
508/* this function denoises an image by decomposing it into the specified number of wavelet scales and
509 * recomposing the result from just the portion of each scale which exceeds the magnitude of the given
510 * threshold for that scale.
511 */
513int dwt_denoise(float *const img, const int width, const int height, const int bands, const float *const noise)
514{
515 float *const details = dt_pixelpipe_cache_alloc_align_float_cache((size_t)2 * width * height, 0);
516 if(IS_NULL_PTR(details)) return 1;
517 float *const interm = details + width * height; // temporary storage for use during each pass
518
519 // zero the accumulator
520 dt_iop_image_fill(details, 0.0f, width, height, 1);
521
522 for(int lev = 0; lev < bands; lev++)
523 {
524 const int last = (lev+1) == bands;
525
526 // "vertical" pass, averages pixels with those 'scale' rows above and below and puts result in 'interm'
527 dwt_denoise_vert_1ch(interm, img, height, width, lev);
528 // horizontal filtering pass, averages pixels in 'interm' with those 'scale' rows to the left and right
529 // accumulates the portion of the detail scale that is above the noise threshold into 'details'; this
530 // will be added to the residue left in 'img' on the last iteration
531 dwt_denoise_horiz_1ch(interm, img, details, height, width, lev, noise[lev], last);
532 }
534 return 0;
535}
536
537#ifdef HAVE_OPENCL
539{
541
542 const int program = 20; // dwt.cl, from programs.conf
543 g->kernel_dwt_add_img_to_layer = dt_opencl_create_kernel(program, "dwt_add_img_to_layer");
544 g->kernel_dwt_subtract_layer = dt_opencl_create_kernel(program, "dwt_subtract_layer");
545 g->kernel_dwt_hat_transform_col = dt_opencl_create_kernel(program, "dwt_hat_transform_col");
546 g->kernel_dwt_hat_transform_row = dt_opencl_create_kernel(program, "dwt_hat_transform_row");
547 g->kernel_dwt_init_buffer = dt_opencl_create_kernel(program, "dwt_init_buffer");
548 return g;
549}
550
552{
553 if(IS_NULL_PTR(g)) return;
554
555 // destroy kernels
556 dt_opencl_free_kernel(g->kernel_dwt_add_img_to_layer);
557 dt_opencl_free_kernel(g->kernel_dwt_subtract_layer);
558 dt_opencl_free_kernel(g->kernel_dwt_hat_transform_col);
559 dt_opencl_free_kernel(g->kernel_dwt_hat_transform_row);
560 dt_opencl_free_kernel(g->kernel_dwt_init_buffer);
561
562 dt_free(g);
563}
564
565dwt_params_cl_t *dt_dwt_init_cl(const int devid, cl_mem image, const int width, const int height, const int scales,
566 const int return_layer, const int merge_from_scale, void *user_data,
567 const float preview_scale)
568{
569 dwt_params_cl_t *p = (dwt_params_cl_t *)malloc(sizeof(dwt_params_cl_t));
570 if(IS_NULL_PTR(p)) return NULL;
571
573 p->devid = devid;
574 p->image = image;
575 p->ch = 4;
576 p->width = width;
577 p->height = height;
578 p->scales = scales;
579 p->return_layer = return_layer;
580 p->merge_from_scale = merge_from_scale;
581 p->user_data = user_data;
582 p->preview_scale = preview_scale;
583
584 return p;
585}
586
588{
589 if(IS_NULL_PTR(p)) return;
590 dt_free(p);
591}
592
594{
595 return _get_max_scale(p->width / p->preview_scale, p->height / p->preview_scale, p->preview_scale);
596}
597
599{
600 return _first_scale_visible(p->scales, p->preview_scale);
601}
602
603static cl_int dwt_subtract_layer_cl(cl_mem bl, cl_mem bh, dwt_params_cl_t *const p)
604{
605 cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
606
607 const int devid = p->devid;
608 const int kernel = p->global->kernel_dwt_subtract_layer;
609
610 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
611
612 const float lpass_mult = (1.f / 16.f);
613 const int width = p->width;
614 const int height = p->height;
615
616 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&bl);
617 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&bh);
618 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(width));
619 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&(height));
620 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(float), (void *)&lpass_mult);
621 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
622
623 return err;
624}
625
626static cl_int dwt_add_layer_cl(cl_mem img, cl_mem layers, dwt_params_cl_t *const p, const int n_scale)
627{
628 cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
629
630 const int devid = p->devid;
631 const int kernel = p->global->kernel_dwt_add_img_to_layer;
632
633 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
634
635 const int width = p->width;
636 const int height = p->height;
637
638 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&img);
639 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&layers);
640 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(width));
641 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&(height));
642 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
643
644 return err;
645}
646
647static cl_int dwt_get_image_layer_cl(cl_mem layer, dwt_params_cl_t *const p)
648{
649 cl_int err = CL_SUCCESS;
650
651 if(p->image != layer)
652 err = dt_opencl_enqueue_copy_buffer_to_buffer(p->devid, layer, p->image, 0, 0,
653 (size_t)p->width * p->height * p->ch * sizeof(float));
654
655 return err;
656}
657
658static cl_int dwt_wavelet_decompose_cl(cl_mem img, dwt_params_cl_t *const p, _dwt_layer_func_cl layer_func)
659{
660 cl_int err = CL_SUCCESS;
661
662 const int devid = p->devid;
663
664 cl_mem temp = NULL;
665 cl_mem layers = NULL;
666 cl_mem merged_layers = NULL;
667 unsigned int lpass, hpass;
668 cl_mem buffer[2] = { 0, 0 };
669 int bcontinue = 1;
670
671 if(layer_func)
672 {
673 err = layer_func(img, p, 0);
674 if(err != CL_SUCCESS) goto cleanup;
675 }
676
677 if(p->scales <= 0) goto cleanup;
678
679 /* image buffers */
680 buffer[0] = img;
681 /* temporary storage */
682 buffer[1] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * p->ch * p->width * p->height);
683 if(buffer[1] == NULL)
684 {
685 printf("not enough memory for wavelet decomposition");
686 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
687 goto cleanup;
688 }
689
690 // buffer to reconstruct the image
691 layers = dt_opencl_alloc_device_buffer(devid, sizeof(float) * p->ch * p->width * p->height);
692 if(IS_NULL_PTR(layers))
693 {
694 printf("not enough memory for wavelet decomposition");
695 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
696 goto cleanup;
697 }
698 // init layer buffer
699 {
700 const int kernel = p->global->kernel_dwt_init_buffer;
701
702 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
703 const int width = p->width;
704 const int height = p->height;
705
706 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&layers);
707 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), (void *)&(width));
708 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(height));
709 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
710 if(err != CL_SUCCESS) goto cleanup;
711 }
712
713 if(p->merge_from_scale > 0)
714 {
715 merged_layers = dt_opencl_alloc_device_buffer(devid, sizeof(float) * p->ch * p->width * p->height);
716 if(IS_NULL_PTR(merged_layers))
717 {
718 printf("not enough memory for wavelet decomposition");
719 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
720 goto cleanup;
721 }
722 // init reconstruct buffer
723 {
724 const int kernel = p->global->kernel_dwt_init_buffer;
725
726 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
727 const int width = p->width;
728 const int height = p->height;
729
730 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&merged_layers);
731 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), (void *)&(width));
732 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(height));
733 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
734 if(err != CL_SUCCESS) goto cleanup;
735 }
736 }
737
738 // iterate over wavelet scales
739 lpass = 1;
740 hpass = 0;
741 for(unsigned int lev = 0; lev < p->scales && bcontinue; lev++)
742 {
743 lpass = (1 - (lev & 1));
744
745 // when (*layer_func) uses too much memory I get a -4 error, so alloc and free for each scale
746 // setup a temp buffer
747 temp = dt_opencl_alloc_device_buffer(devid, sizeof(float) * p->ch * p->width * p->height);
748 if(IS_NULL_PTR(temp))
749 {
750 printf("not enough memory for wavelet decomposition");
751 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
752 goto cleanup;
753 }
754
755 // hat transform by row
756 {
757 const int kernel = p->global->kernel_dwt_hat_transform_row;
758
759 int sc = 1 << lev;
760 sc = (int)(sc * p->preview_scale);
761 if(sc > p->width) sc = p->width;
762
763 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
764
765 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&temp);
766 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&(buffer[hpass]));
767 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(p->width));
768 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&(p->height));
769 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(int), (void *)&sc);
770 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
771 if(err != CL_SUCCESS) goto cleanup;
772 }
773
774 // hat transform by col
775 {
776 const int kernel = p->global->kernel_dwt_hat_transform_col;
777
778 int sc = 1 << lev;
779 sc = (int)(sc * p->preview_scale);
780 if(sc > p->height) sc = p->height;
781 const float lpass_mult = (1.f / 16.f);
782
783 size_t sizes[] = { ROUNDUPDWD(p->width, devid), ROUNDUPDHT(p->height, devid), 1 };
784
785 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&temp);
786 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), (void *)&(p->width));
787 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(p->height));
788 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&sc);
789 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), (void *)&(buffer[lpass]));
790 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(float), (void *)&lpass_mult);
791 err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
792 if(err != CL_SUCCESS) goto cleanup;
793 }
794
795 if(temp)
796 {
798 temp = NULL;
799 }
800
801 err = dwt_subtract_layer_cl(buffer[lpass], buffer[hpass], p);
802 if(err != CL_SUCCESS) goto cleanup;
803
804 // no merge scales or we didn't reach the merge scale from yet
805 if(p->merge_from_scale == 0 || p->merge_from_scale > lev + 1)
806 {
807 // allow to process this detail scale
808 if(layer_func)
809 {
810 err = layer_func(buffer[hpass], p, lev + 1);
811 if(err != CL_SUCCESS) goto cleanup;
812 }
813
814 // user wants to preview this detail scale
815 if(p->return_layer == lev + 1)
816 {
817 // return this detail scale
818 err = dwt_get_image_layer_cl(buffer[hpass], p);
819 if(err != CL_SUCCESS) goto cleanup;
820
821 bcontinue = 0;
822 }
823 // user wants the entire reconstructed image
824 else if(p->return_layer == 0)
825 {
826 // add this detail scale to the final image
827 err = dwt_add_layer_cl(buffer[hpass], layers, p, lev + 1);
828 if(err != CL_SUCCESS) goto cleanup;
829 }
830 }
831 // we are on the merge scales range
832 else
833 {
834 // add this detail scale to the merged ones
835 err = dwt_add_layer_cl(buffer[hpass], merged_layers, p, lev + 1);
836 if(err != CL_SUCCESS) goto cleanup;
837
838 // allow to process this merged scale
839 if(layer_func)
840 {
841 err = layer_func(merged_layers, p, lev + 1);
842 if(err != CL_SUCCESS) goto cleanup;
843 }
844
845 // user wants to preview this merged scale
846 if(p->return_layer == lev + 1)
847 {
848 // return this merged scale
849 err = dwt_get_image_layer_cl(merged_layers, p);
850 if(err != CL_SUCCESS) goto cleanup;
851
852 bcontinue = 0;
853 }
854 }
855
856 hpass = lpass;
857 }
858
859 // all scales have been processed
860 if(bcontinue)
861 {
862 // allow to process residual image
863 if(layer_func)
864 {
865 err = layer_func(buffer[hpass], p, p->scales + 1);
866 if(err != CL_SUCCESS) goto cleanup;
867 }
868
869 // user wants to preview residual image
870 if(p->return_layer == p->scales + 1)
871 {
872 // return residual image
873 err = dwt_get_image_layer_cl(buffer[hpass], p);
874 if(err != CL_SUCCESS) goto cleanup;
875 }
876 // return reconstructed image
877 else if(p->return_layer == 0)
878 {
879 // some of the detail scales are on the merged layers
880 if(p->merge_from_scale > 0)
881 {
882 // add merged layers to final image
883 err = dwt_add_layer_cl(merged_layers, layers, p, p->scales + 1);
884 if(err != CL_SUCCESS) goto cleanup;
885 }
886
887 // add residual image to final image
888 err = dwt_add_layer_cl(buffer[hpass], layers, p, p->scales + 1);
889 if(err != CL_SUCCESS) goto cleanup;
890
891 // allow to process reconstructed image
892 if(layer_func)
893 {
894 err = layer_func(layers, p, p->scales + 2);
895 if(err != CL_SUCCESS) goto cleanup;
896 }
897
898 // return reconstructed image
899 err = dwt_get_image_layer_cl(layers, p);
900 if(err != CL_SUCCESS) goto cleanup;
901 }
902 }
903
904cleanup:
906 dt_opencl_release_mem_object(merged_layers);
909
910 return err;
911}
912
914{
915 cl_int err = CL_SUCCESS;
916
917 // this is a zoom scale, not a wavelet scale
918 if(p->preview_scale <= 0.f) p->preview_scale = 1.f;
919
920 // if a single scale is requested it cannot be grather than the residual
921 if(p->return_layer > p->scales + 1)
922 {
923 p->return_layer = p->scales + 1;
924 }
925
926 const int max_scale = dwt_get_max_scale_cl(p);
927
928 // if requested scales is grather than max scales adjust it
929 if(p->scales > max_scale)
930 {
931 // residual should be returned
932 if(p->return_layer > p->scales) p->return_layer = max_scale + 1;
933 // a scale should be returned, it cannot be grather than max scales
934 else if(p->return_layer > max_scale)
935 p->return_layer = max_scale;
936
937 p->scales = max_scale;
938 }
939
940 // call the actual decompose
941 err = dwt_wavelet_decompose_cl(p->image, p, layer_func);
942
943 return err;
944}
945
946#endif
947// clang-format off
948// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
949// vim: shiftwidth=2 expandtab tabstop=2 cindent
950// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
951// clang-format on
void cleanup(dt_imageio_module_format_t *self)
Definition avif.c:164
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
const dt_colormatrix_t dt_aligned_pixel_t out
static const int row
darktable_t darktable
Definition darktable.c:181
#define __OMP_SIMD__(...)
Definition darktable.h:262
#define for_each_channel(_var,...)
Definition darktable.h:662
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
static int dt_get_thread_num()
Definition darktable.h:291
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static __DT_CLONE_TARGETS__ void dwt_decompose_horiz(float *const restrict out, float *const restrict in, float *const temp, const size_t height, const size_t width, const size_t lev)
Definition dwt.c:158
static __DT_CLONE_TARGETS__ int dwt_wavelet_decompose(float *img, dwt_params_t *const p, _dwt_layer_func layer_func)
Definition dwt.c:223
__DT_CLONE_TARGETS__ int dwt_denoise(float *const img, const int width, const int height, const int bands, const float *const noise)
Definition dwt.c:513
dt_dwt_cl_global_t * dt_dwt_init_cl_global()
Definition dwt.c:538
int dt_dwt_first_scale_visible_cl(dwt_params_cl_t *p)
Definition dwt.c:598
static cl_int dwt_wavelet_decompose_cl(cl_mem img, dwt_params_cl_t *const p, _dwt_layer_func_cl layer_func)
Definition dwt.c:658
int dwt_decompose(dwt_params_t *p, _dwt_layer_func layer_func)
Definition dwt.c:377
int dwt_get_max_scale(dwt_params_t *p)
Definition dwt.c:89
void dt_dwt_free(dwt_params_t *p)
Definition dwt.c:61
dwt_params_cl_t * dt_dwt_init_cl(const int devid, cl_mem image, const int width, const int height, const int scales, const int return_layer, const int merge_from_scale, void *user_data, const float preview_scale)
Definition dwt.c:565
static __DT_CLONE_TARGETS__ void dwt_denoise_horiz_1ch(float *const restrict out, float *const restrict in, float *const restrict accum, const size_t height, const size_t width, const size_t lev, const float thold, const int last)
Definition dwt.c:438
static __DT_CLONE_TARGETS__ void dwt_decompose_vert(float *const restrict out, const float *const restrict in, const size_t height, const size_t width, const size_t lev)
Definition dwt.c:125
static __DT_CLONE_TARGETS__ void dwt_denoise_vert_1ch(float *const restrict out, const float *const restrict in, const size_t height, const size_t width, const size_t lev)
Definition dwt.c:408
void dt_dwt_free_cl_global(dt_dwt_cl_global_t *g)
Definition dwt.c:551
void dt_dwt_free_cl(dwt_params_cl_t *p)
Definition dwt.c:587
dwt_params_t * dt_dwt_init(float *image, const int width, const int height, const int ch, const int scales, const int return_layer, const int merge_from_scale, void *user_data, const float preview_scale, const int use_sse)
Definition dwt.c:40
static __DT_CLONE_TARGETS__ int _get_max_scale(const int width, const int height, const float preview_scale)
Definition dwt.c:69
cl_int dwt_decompose_cl(dwt_params_cl_t *p, _dwt_layer_func_cl layer_func)
Definition dwt.c:913
int dwt_get_max_scale_cl(dwt_params_cl_t *p)
Definition dwt.c:593
static __DT_CLONE_TARGETS__ int _first_scale_visible(const int num_scales, const float preview_scale)
Definition dwt.c:95
static cl_int dwt_get_image_layer_cl(cl_mem layer, dwt_params_cl_t *const p)
Definition dwt.c:647
int dt_dwt_first_scale_visible(dwt_params_t *p)
Definition dwt.c:113
static cl_int dwt_add_layer_cl(cl_mem img, cl_mem layers, dwt_params_cl_t *const p, const int n_scale)
Definition dwt.c:626
static cl_int dwt_subtract_layer_cl(cl_mem bl, cl_mem bh, dwt_params_cl_t *const p)
Definition dwt.c:603
int() _dwt_layer_func(float *layer, dwt_params_t *const p, const int scale)
Definition dwt.h:42
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93
cl_int() _dwt_layer_func_cl(cl_mem layer, dwt_params_cl_t *const p, const int scale)
Definition dwt.h:133
__DT_CLONE_TARGETS__ void dt_iop_image_add_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:276
__DT_CLONE_TARGETS__ void dt_iop_image_fill(float *const buf, const float fill_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:214
static float kernel(const float *x, const float *y)
float *const restrict const size_t const size_t ch
size_t size
Definition mipmap_cache.c:3
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2544
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer, size_t srcoffset, size_t dstoffset, size_t size)
Definition opencl.c:2296
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
const float noise
int32_t num_openmp_threads
Definition darktable.h:758
struct dt_opencl_t * opencl
Definition darktable.h:785
int kernel_dwt_add_img_to_layer
Definition dwt.h:111
struct dt_dwt_cl_global_t * dwt
Definition opencl.h:266
dt_dwt_cl_global_t * global
Definition dwt.h:120
float * image
Definition dwt.h:29
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29