Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
bilateral.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2012-2013 johannes hanika.
4 Copyright (C) 2012 Moritz Lipp.
5 Copyright (C) 2012-2014, 2016 Tobias Ellinghaus.
6 Copyright (C) 2012, 2014, 2016 Ulrich Pegelow.
7 Copyright (C) 2015-2016 Roman Lebedev.
8 Copyright (C) 2019 Andreas Schneider.
9 Copyright (C) 2019, 2025-2026 Aurélien PIERRE.
10 Copyright (C) 2019-2021 Pascal Obry.
11 Copyright (C) 2020-2021 Hubert Kowalski.
12 Copyright (C) 2020-2021 Ralf Brown.
13 Copyright (C) 2021 Hanno Schwalm.
14 Copyright (C) 2022 Martin Bařinka.
15 Copyright (C) 2022 Miloš Komarčević.
16
17 darktable is free software: you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation, either version 3 of the License, or
20 (at your option) any later version.
21
22 darktable is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
26
27 You should have received a copy of the GNU General Public License
28 along with darktable. If not, see <http://www.gnu.org/licenses/>.
29*/
30
31#include "common/bilateral.h"
32#include "common/darktable.h" // dt_print, dt_alloc_align, dt_free_align
33#include "common/math.h" // for CLAMPS, roundf
34#include <glib.h> // for MIN, MAX
35#include <stdlib.h> // for size_t, free, malloc, NULL
36#include <string.h> // for memset
37#include <stdio.h> // fprintf
38
39// These limits clamp away insane memory requirements. They should reasonably faithfully represent the full
40// precision though, so tiling will help reduce the memory footprint and export will look the same as darkroom
41// mode (only 1mpix there).
42#define DT_COMMON_BILATERAL_MAX_RES_S 3000
43#define DT_COMMON_BILATERAL_MAX_RES_R 50
44
45void dt_bilateral_grid_size(dt_bilateral_t *b, const int width, const int height, const float L_range,
46 float sigma_s, const float sigma_r)
47{
48 // Callers adjust sigma_s to account for image scaling to make the bilateral filter scale-invariant. As a
49 // result, if the user sets a small enough value for sigma, we can get sigma_s substantially below 1.0.
50 // Values < 1 generate a bilateral grid with spatial dimensions larger than the (scaled) image pixel
51 // dimensions; for sigma_s < 0.5, there is at least one unused grid point between any two used points, and
52 // thus the gaussian blur will have little effect. So we force sigma_s to be at least 0.5 to avoid an
53 // excessively large grid.
54 if (sigma_s < 0.5) sigma_s = 0.5;
55
56 // compute an initial grid size, clamping away insanely large grids
57 float _x = CLAMPS((int)roundf(width / sigma_s), 4, DT_COMMON_BILATERAL_MAX_RES_S);
58 float _y = CLAMPS((int)roundf(height / sigma_s), 4, DT_COMMON_BILATERAL_MAX_RES_S);
59 float _z = CLAMPS((int)roundf(L_range / sigma_r), 4, DT_COMMON_BILATERAL_MAX_RES_R);
60 // If we clamped the X or Y dimensions, the sigma_s for that dimension changes. Since we need to use the
61 // same value in both dimensions, compute the effective sigma_s for the grid.
62 b->sigma_s = MAX(height / _y, width / _x);
63 b->sigma_r = L_range / _z;
64 // Compute the grid size in light of the actual adjusted values for sigma_s and sigma_r
65 b->size_x = (int)ceilf(width / b->sigma_s) + 1;
66 b->size_y = (int)ceilf(height / b->sigma_s) + 1;
67 b->size_z = (int)ceilf(L_range / b->sigma_r) + 1;
68#if 0
69 if (b->sigma_s != sigma_s) fprintf(stderr, "[bilateral] clamped sigma_s (%g -> %g)!\n",sigma_s,b->sigma_s);
70 if (b->sigma_r != sigma_r) fprintf(stderr, "[bilateral] clamped sigma_r (%g -> %g)!\n",sigma_r,b->sigma_r);
71#endif
72}
73
74size_t dt_bilateral_memory_use(const int width, // width of input image
75 const int height, // height of input image
76 const float sigma_s, // spatial sigma (blur pixel coords)
77 const float sigma_r) // range sigma (blur luma values)
78{
81 size_t grid_size = b.size_x * b.size_y * b.size_z;
82#ifdef HAVE_OPENCL
83 // OpenCL path needs two buffers
84 return 2 * grid_size * sizeof(float);
85#else
86 return (grid_size + 3 * darktable.num_openmp_threads * b.size_x * b.size_z) * sizeof(float);
87#endif /* HAVE_OPENCL */
88}
89
90#ifndef HAVE_OPENCL
91// for the CPU path this is just an alias as no additional temp buffer is needed
92// when compiling with OpenCL, version in bilateralcl.c takes precedence
93size_t dt_bilateral_memory_use2(const int width,
94 const int height,
95 const float sigma_s,
96 const float sigma_r)
97{
99}
100#endif /* !HAVE_OPENCL */
101
102size_t dt_bilateral_singlebuffer_size(const int width, // width of input image
103 const int height, // height of input image
104 const float sigma_s, // spatial sigma (blur pixel coords)
105 const float sigma_r) // range sigma (blur luma values)
106{
109 size_t grid_size = b.size_x * b.size_y * b.size_z;
110 return (grid_size + 3 * darktable.num_openmp_threads * b.size_x * b.size_z) * sizeof(float);
111}
112
113#ifndef HAVE_OPENCL
114// for the CPU path this is just an alias as no additional temp buffer is needed
115// when compiling with OpenCL, version in bilateralcl.c takes precedence
117 const int height,
118 const float sigma_s,
119 const float sigma_r)
120{
122}
123#endif /* !HAVE_OPENCL */
124
125static inline __attribute__((always_inline)) size_t image_to_grid(const dt_bilateral_t *const b, const int i, const int j, const float L,
126 float *xf, float *yf, float *zf)
127{
128 float x = CLAMPS(i / b->sigma_s, 0, b->size_x - 1);
129 float y = CLAMPS(j / b->sigma_s, 0, b->size_y - 1);
130 float z = CLAMPS(L / b->sigma_r, 0, b->size_z - 1);
131 const int xi = MIN((int)x, b->size_x - 2);
132 const int yi = MIN((int)y, b->size_y - 2);
133 const int zi = MIN((int)z, b->size_z - 2);
134 *xf = x - xi;
135 *yf = y - yi;
136 *zf = z - zi;
137 return ((xi + yi * b->size_x) * b->size_z) + zi;
138}
139
140static inline __attribute__((always_inline)) size_t image_to_relgrid(const dt_bilateral_t *const b, const int i, const float L, float *xf, float *zf)
141{
142 float x = CLAMPS(i / b->sigma_s, 0, b->size_x - 1);
143 float z = CLAMPS(L / b->sigma_r, 0, b->size_z - 1);
144 const int xi = MIN((int)x, b->size_x - 2);
145 const int zi = MIN((int)z, b->size_z - 2);
146 *xf = x - xi;
147 *zf = z - zi;
148 return (xi * b->size_z) + zi;
149}
150
151dt_bilateral_t *dt_bilateral_init(const int width, // width of input image
152 const int height, // height of input image
153 const float sigma_s, // spatial sigma (blur pixel coords)
154 const float sigma_r) // range sigma (blur luma values)
155{
156 dt_bilateral_t *b = (dt_bilateral_t *)malloc(sizeof(dt_bilateral_t));
157 if(IS_NULL_PTR(b)) return NULL;
159 b->width = width;
160 b->height = height;
161 b->numslices = darktable.num_openmp_threads;
162 b->sliceheight = (height + b->numslices - 1) / b->numslices;
163 b->slicerows = (b->size_y + b->numslices - 1) / b->numslices + 2;
164 b->buf = dt_pixelpipe_cache_alloc_align_float_cache(b->size_x * b->size_z * b->numslices * b->slicerows, 0);
165 if(b->buf) memset(b->buf, 0, sizeof(float) * b->size_x * b->size_z * b->numslices * b->slicerows);
166 if (IS_NULL_PTR(b->buf))
167 {
168 fprintf(stderr,"[bilateral] unable to allocate buffer for %" G_GSIZE_FORMAT "x%" G_GSIZE_FORMAT "x%" G_GSIZE_FORMAT " grid\n",b->size_x,b->size_y,b->size_z);
169 dt_free(b);
170 return NULL;
171 }
172 dt_print(DT_DEBUG_DEV, "[bilateral] created grid [%" G_GSIZE_FORMAT " %" G_GSIZE_FORMAT " %" G_GSIZE_FORMAT "] with sigma (%f %f) (%f %f)\n",
173 b->size_x, b->size_y, b->size_z, b->sigma_s, sigma_s, b->sigma_r, sigma_r);
174 return b;
175}
177void dt_bilateral_splat(const dt_bilateral_t *b, const float *const in)
178{
179 const int ox = b->size_z;
180 const int oy = b->size_x * b->size_z;
181 const int oz = 1;
182 const float sigma_s = b->sigma_s * b->sigma_s;
183 float *const buf = b->buf;
184
185 if (IS_NULL_PTR(buf)) return;
186 // splat into downsampled grid
187 const int nthreads = darktable.num_openmp_threads;
188 const size_t offsets[8] =
189 {
190 0,
191 ox,
192 oy,
193 ox + oy,
194 oz,
195 oz + ox,
196 oz + oy,
197 oz + oy + ox
198 };
200 for(int slice = 0; slice < b->numslices; slice++)
201 {
202 const int firstrow = slice * b->sliceheight;
203 const int lastrow = MIN((slice+1)*b->sliceheight,b->height);
204 // compute the first row of the final grid which this slice splats, and subtract that from the first
205 // row the current thread should use to get an offset
206 const int slice_offset = slice * b->slicerows - (int)(firstrow / b->sigma_s);
207 // now iterate over the rows of the current horizontal slice
208 for(int j = firstrow; j < lastrow; j++)
209 {
210 float y = CLAMPS(j / b->sigma_s, 0, b->size_y - 1);
211 const int yi = MIN((int)y, b->size_y - 2);
212 const float yf = y - yi;
213 const size_t base = (size_t)(yi + slice_offset) * oy;
214 for(int i = 0; i < b->width; i++)
215 {
216 size_t index = 4 * (j * b->width + i);
217 float xf, zf;
218 const float L = in[index];
219 // nearest neighbour splatting:
220 const size_t grid_index = base + image_to_relgrid(b, i, L, &xf, &zf);
221 // sum up payload here
222 const dt_aligned_pixel_t contrib =
223 {
224 (1.0f - xf) * (1.0f - yf) * 100.0f / sigma_s, // precompute the contributions along the first two dimensions
225 xf * (1.0f - yf) * 100.0f / sigma_s,
226 (1.0f - xf) * yf * 100.0f / sigma_s,
227 xf * yf * 100.0f / sigma_s
228 };
229 __OMP_SIMD__(aligned(buf:64))
230 for(int k = 0; k < 4; k++)
231 {
232 buf[grid_index + offsets[k]] += (contrib[k] * (1.0f - zf));
233 buf[grid_index + offsets[k+4]] += (contrib[k] * zf);
234 }
235 }
236 }
237 }
238
239 // merge the per-thread results into the final result
240 for (int slice = 1 ; slice < nthreads; slice++)
241 {
242 // compute the first row of the final grid which this slice splats
243 const int destrow = (int)(slice * b->sliceheight / b->sigma_s);
244 float *dest = buf + destrow * oy;
245 // now iterate over the grid rows splatted for this slice
246 for(int j = slice * b->slicerows; j < (slice+1)*b->slicerows; j++)
247 {
248 float *src = buf + j * oy;
249 for(int i = 0; i < oy; i++)
250 {
251 dest[i] += src[i];
252 }
253 dest += oy;
254 // clear elements in the part of the buffer which holds the final result now that we've read the partial result,
255 // since we'll be adding to those locations later
256 if (j < b->size_y)
257 memset(buf + j*oy, '\0', sizeof(float) * oy);
258 }
259 }
260}
262static void blur_line_z(float *buf, const int offset1, const int offset2, const int offset3, const int size1,
263 const int size2, const int size3)
264{
265 const float w1 = 4.f / 16.f;
266 const float w2 = 2.f / 16.f;
268 for(int k = 0; k < size1; k++)
269 {
270 size_t index = (size_t)k * offset1;
271 for(int j = 0; j < size2; j++)
272 {
273 float tmp1 = buf[index];
274 buf[index] = w1 * buf[index + offset3] + w2 * buf[index + 2 * offset3];
275 index += offset3;
276 float tmp2 = buf[index];
277 buf[index] = w1 * (buf[index + offset3] - tmp1) + w2 * buf[index + 2 * offset3];
278 index += offset3;
279 for(int i = 2; i < size3 - 2; i++)
280 {
281 const float tmp3 = buf[index];
282 buf[index] = +w1 * (buf[index + offset3] - tmp2) + w2 * (buf[index + 2 * offset3] - tmp1);
283 index += offset3;
284 tmp1 = tmp2;
285 tmp2 = tmp3;
286 }
287 const float tmp3 = buf[index];
288 buf[index] = w1 * (buf[index + offset3] - tmp2) - w2 * tmp1;
289 index += offset3;
290 buf[index] = -w1 * tmp3 - w2 * tmp2;
291 index += offset3;
292 index += offset2 - offset3 * size3;
293 }
294 }
295}
297static void blur_line(float *buf, const int offset1, const int offset2, const int offset3, const int size1,
298 const int size2, const int size3)
299{
300 const float w0 = 6.f / 16.f;
301 const float w1 = 4.f / 16.f;
302 const float w2 = 1.f / 16.f;
304 for(int k = 0; k < size1; k++)
305 {
306 size_t index = (size_t)k * offset1;
307 for(int j = 0; j < size2; j++)
308 {
309 float tmp1 = buf[index];
310 buf[index] = buf[index] * w0 + w1 * buf[index + offset3] + w2 * buf[index + 2 * offset3];
311 index += offset3;
312 float tmp2 = buf[index];
313 buf[index] = buf[index] * w0 + w1 * (buf[index + offset3] + tmp1) + w2 * buf[index + 2 * offset3];
314 index += offset3;
315 for(int i = 2; i < size3 - 2; i++)
316 {
317 const float tmp3 = buf[index];
318 buf[index]
319 = buf[index] * w0 + w1 * (buf[index + offset3] + tmp2) + w2 * (buf[index + 2 * offset3] + tmp1);
320 index += offset3;
321 tmp1 = tmp2;
322 tmp2 = tmp3;
323 }
324 const float tmp3 = buf[index];
325 buf[index] = buf[index] * w0 + w1 * (buf[index + offset3] + tmp2) + w2 * tmp1;
326 index += offset3;
327 buf[index] = buf[index] * w0 + w1 * tmp3 + w2 * tmp2;
328 index += offset3;
329 index += offset2 - offset3 * size3;
330 }
331 }
332}
333
334
336{
337 if (IS_NULL_PTR(b) || IS_NULL_PTR(b->buf))
338 return;
339 const int ox = b->size_z;
340 const int oy = b->size_x * b->size_z;
341 const int oz = 1;
342 // gaussian up to 3 sigma
343 blur_line(b->buf, oz, oy, ox, b->size_z, b->size_y, b->size_x);
344 // gaussian up to 3 sigma
345 blur_line(b->buf, oz, ox, oy, b->size_z, b->size_x, b->size_y);
346 // -2 derivative of the gaussian up to 3 sigma: x*exp(-x*x)
347 blur_line_z(b->buf, ox, oy, oz, b->size_x, b->size_y, b->size_z);
348}
350void dt_bilateral_slice(const dt_bilateral_t *const b, const float *const in, float *out, const float detail)
351{
352 // detail: 0 is leave as is, -1 is bilateral filtered, +1 is contrast boost
353 const float norm = -detail * b->sigma_r * 0.04f;
354 const int ox = b->size_z;
355 const int oy = b->size_x * b->size_z;
356 const int oz = 1;
357 float *const buf = b->buf;
358 const int width = b->width;
359 const int height = b->height;
360
361 if (IS_NULL_PTR(buf)) return;
362 __OMP_PARALLEL_FOR__(collapse(2))
363 for(int j = 0; j < height; j++)
364 {
365 for(int i = 0; i < width; i++)
366 {
367 size_t index = 4 * (j * width + i);
368 float xf, yf, zf;
369 const float L = in[index];
370 // trilinear lookup:
371 const size_t gi = image_to_grid(b, i, j, L, &xf, &yf, &zf);
372 const float Lout = fmaxf( 0.0f, L
373 + norm * (buf[gi] * (1.0f - xf) * (1.0f - yf) * (1.0f - zf)
374 + buf[gi + ox] * (xf) * (1.0f - yf) * (1.0f - zf)
375 + buf[gi + oy] * (1.0f - xf) * (yf) * (1.0f - zf)
376 + buf[gi + ox + oy] * (xf) * (yf) * (1.0f - zf)
377 + buf[gi + oz] * (1.0f - xf) * (1.0f - yf) * (zf)
378 + buf[gi + ox + oz] * (xf) * (1.0f - yf) * (zf)
379 + buf[gi + oy + oz] * (1.0f - xf) * (yf) * (zf)
380 + buf[gi + ox + oy + oz] * (xf) * (yf) * (zf)));
381 out[index] = Lout;
382 // and copy color and mask
383 out[index + 1] = in[index + 1];
384 out[index + 2] = in[index + 2];
385 out[index + 3] = in[index + 3];
386 }
387 }
388}
390void dt_bilateral_slice_to_output(const dt_bilateral_t *const b, const float *const in, float *out,
391 const float detail)
392{
393 // detail: 0 is leave as is, -1 is bilateral filtered, +1 is contrast boost
394 const float norm = -detail * b->sigma_r * 0.04f;
395 const int ox = b->size_z;
396 const int oy = b->size_x * b->size_z;
397 const int oz = 1;
398 float *const buf = b->buf;
399 const int width = b->width;
400 const int height = b->height;
401
402 if (IS_NULL_PTR(buf)) return;
403 __OMP_PARALLEL_FOR__(collapse(2))
404 for(int j = 0; j < height; j++)
405 {
406 for(int i = 0; i < width; i++)
407 {
408 size_t index = 4 * (j * width + i);
409 float xf, yf, zf;
410 const float L = in[index];
411 // trilinear lookup:
412 const size_t gi = image_to_grid(b, i, j, L, &xf, &yf, &zf);
413 const float Lout = norm * (buf[gi] * (1.0f - xf) * (1.0f - yf) * (1.0f - zf)
414 + buf[gi + ox] * (xf) * (1.0f - yf) * (1.0f - zf)
415 + buf[gi + oy] * (1.0f - xf) * (yf) * (1.0f - zf)
416 + buf[gi + ox + oy] * (xf) * (yf) * (1.0f - zf)
417 + buf[gi + oz] * (1.0f - xf) * (1.0f - yf) * (zf)
418 + buf[gi + ox + oz] * (xf) * (1.0f - yf) * (zf)
419 + buf[gi + oy + oz] * (1.0f - xf) * (yf) * (zf)
420 + buf[gi + ox + oy + oz] * (xf) * (yf) * (zf));
421 out[index] = MAX(0.0f, out[index] + Lout);
422 }
423 }
424}
425
427{
428 if(IS_NULL_PTR(b)) return;
430 dt_free(b);
431}
432
433#undef DT_COMMON_BILATERAL_MAX_RES_S
434#undef DT_COMMON_BILATERAL_MAX_RES_R
435
436// clang-format off
437// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
438// vim: shiftwidth=2 expandtab tabstop=2 cindent
439// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
440// clang-format on
void dt_bilateral_free(dt_bilateral_t *b)
Definition bilateral.c:426
static __DT_CLONE_TARGETS__ void blur_line(float *buf, const int offset1, const int offset2, const int offset3, const int size1, const int size2, const int size3)
Definition bilateral.c:297
static __DT_CLONE_TARGETS__ void blur_line_z(float *buf, const int offset1, const int offset2, const int offset3, const int size1, const int size2, const int size3)
Definition bilateral.c:262
__DT_CLONE_TARGETS__ void dt_bilateral_splat(const dt_bilateral_t *b, const float *const in)
Definition bilateral.c:177
void dt_bilateral_grid_size(dt_bilateral_t *b, const int width, const int height, const float L_range, float sigma_s, const float sigma_r)
Definition bilateral.c:45
size_t dt_bilateral_memory_use(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateral.c:74
#define DT_COMMON_BILATERAL_MAX_RES_S
Definition bilateral.c:42
dt_bilateral_t * dt_bilateral_init(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateral.c:151
#define DT_COMMON_BILATERAL_MAX_RES_R
Definition bilateral.c:43
__DT_CLONE_TARGETS__ void dt_bilateral_slice_to_output(const dt_bilateral_t *const b, const float *const in, float *out, const float detail)
Definition bilateral.c:390
size_t dt_bilateral_singlebuffer_size(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateral.c:102
__DT_CLONE_TARGETS__ void dt_bilateral_slice(const dt_bilateral_t *const b, const float *const in, float *out, const float detail)
Definition bilateral.c:350
void dt_bilateral_blur(const dt_bilateral_t *b)
Definition bilateral.c:335
int width
Definition bilateral.h:1
size_t dt_bilateral_memory_use2(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateralcl.c:64
size_t size_y
Definition bilateral.h:0
float sigma_s
Definition bilateral.h:3
size_t dt_bilateral_singlebuffer_size2(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateralcl.c:74
int height
Definition bilateral.h:1
float sigma_r
Definition bilateral.h:3
static void image_to_grid(const dt_iop_colorreconstruct_bilateral_t *const b, const float i, const float j, const float L, float *x, float *y, float *z)
const dt_colormatrix_t dt_aligned_pixel_t out
darktable_t darktable
Definition darktable.c:181
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
#define __OMP_SIMD__(...)
Definition darktable.h:262
@ DT_DEBUG_DEV
Definition darktable.h:717
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static const float x
#define w2
Definition lmmse.c:60
#define w1
Definition lmmse.c:59
float *const restrict const size_t k
#define CLAMPS(A, L, H)
Definition math.h:76
float dt_aligned_pixel_t[4]
int32_t num_openmp_threads
Definition darktable.h:758
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29