Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
fast_guided_filter.h
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2019-2020, 2025-2026 Aurélien PIERRE.
4 Copyright (C) 2019-2021 Pascal Obry.
5 Copyright (C) 2020-2021 Ralf Brown.
6 Copyright (C) 2020 rawfiner.
7 Copyright (C) 2020 Roman Lebedev.
8 Copyright (C) 2022 Martin Bařinka.
9 Copyright (C) 2022 Sakari Kapanen.
10 Copyright (C) 2023 Luca Zulberti.
11
12 darktable is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 darktable is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with darktable. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#pragma once
27
28#include <assert.h>
29#include <math.h>
30#include <stdlib.h>
31#include <stdio.h>
32#include <string.h>
33#include <time.h>
34
35#include "common/box_filters.h"
36#include "common/darktable.h"
37#include "common/imagebuf.h"
38#include "control/control.h"
39
40
41/* NOTE: this code complies with the optimizations in "common/extra_optimizations.h".
42 * Consider including that at the beginning of a *.c file where you use this
43 * header (provided the rest of the code complies).
44 **/
45
46
47#define MIN_FLOAT exp2f(-16.0f)
48
49
55
56
57/***
58 * DOCUMENTATION
59 *
60 * Fast Iterative Guided filter for surface blur
61 *
62 * This is a fast vectorized implementation of guided filter for grey images optimized for
63 * the special case where the guiding and the guided image are the same, which is useful
64 * for edge-aware surface blur.
65 *
66 * Since the guided filter is a linear application, we can safely downscale
67 * the guiding and the guided image by a factor of 4, using a bilinear interpolation,
68 * compute the guidance at this scale, then upscale back to the original size
69 * and get a free 10x speed-up.
70 *
71 * Then, the vectorization adds another substantial speed-up. Overall, it brings a x50 to x200
72 * speed-up compared to the guided_filter.h lib. Of course, it requires every buffer to be
73 * 64-bits aligned.
74 *
75 * On top of the default guided filter, several pre- and post-processing options are provided :
76 *
77 * - mask quantization : perform a posterization of the guiding image in log2 space to
78 * help the guiding to produce smoother areas,
79 *
80 * - blending : perform a regular (linear) blending of a and b parameters after the
81 * variance analysis (aka the by-the-book guided filter), or a geometric mean of the filter output (by-the-book)
82 * and the original image, which produces a pleasing trade-off.
83 *
84 * - iterations : apply the guided filtering recursively, with kernel size increasing by sqrt(2)
85 * between each iteration, to diffuse the filter and soften edges transitions.
86 *
87 * Reference :
88 * Kaiming He, Jian Sun, Microsoft : https://arxiv.org/abs/1505.00996
89 **/
90
91
92 #ifdef _OPENMP
93#pragma omp declare simd
94#endif
95static inline float fast_clamp(const float value, const float bottom, const float top)
96{
97 // vectorizable clamping between bottom and top values
98 return fmaxf(fminf(value, top), bottom);
99}
100
101
103static inline void interpolate_bilinear(const float *const restrict in, const size_t width_in, const size_t height_in,
104 float *const restrict out, const size_t width_out, const size_t height_out,
105 const size_t ch)
106{
107 // Fast vectorized bilinear interpolation on ch channels
108#ifdef _OPENMP
109#pragma omp parallel for collapse(2) default(none) \
110 dt_omp_firstprivate(in, out, width_out, height_out, width_in, height_in, ch) \
111 schedule(simd:static)
112#endif
113 for(size_t i = 0; i < height_out; i++)
114 {
115 for(size_t j = 0; j < width_out; j++)
116 {
117 // Relative coordinates of the pixel in output space
118 const float x_out = (float)j /(float)width_out;
119 const float y_out = (float)i /(float)height_out;
120
121 // Corresponding absolute coordinates of the pixel in input space
122 const float x_in = x_out * (float)width_in;
123 const float y_in = y_out * (float)height_in;
124
125 // Nearest neighbours coordinates in input space
126 size_t x_prev = (size_t)floorf(x_in);
127 size_t x_next = x_prev + 1;
128 size_t y_prev = (size_t)floorf(y_in);
129 size_t y_next = y_prev + 1;
130
131 x_prev = (x_prev < width_in) ? x_prev : width_in - 1;
132 x_next = (x_next < width_in) ? x_next : width_in - 1;
133 y_prev = (y_prev < height_in) ? y_prev : height_in - 1;
134 y_next = (y_next < height_in) ? y_next : height_in - 1;
135
136 // Nearest pixels in input array (nodes in grid)
137 const size_t Y_prev = y_prev * width_in;
138 const size_t Y_next = y_next * width_in;
139 const float *const Q_NW = (float *)in + (Y_prev + x_prev) * ch;
140 const float *const Q_NE = (float *)in + (Y_prev + x_next) * ch;
141 const float *const Q_SE = (float *)in + (Y_next + x_next) * ch;
142 const float *const Q_SW = (float *)in + (Y_next + x_prev) * ch;
143
144 // Spatial differences between nodes
145 const float Dy_next = (float)y_next - y_in;
146 const float Dy_prev = 1.f - Dy_next; // because next - prev = 1
147 const float Dx_next = (float)x_next - x_in;
148 const float Dx_prev = 1.f - Dx_next; // because next - prev = 1
149
150 // Interpolate over ch layers
151 float *const pixel_out = (float *)out + (i * width_out + j) * ch;
152
153//#pragma unroll //LLVM warns it can't unroll -- presumably because 'ch' is not a constant
154 for(size_t c = 0; c < ch; c++)
155 {
156 pixel_out[c] = Dy_prev * (Q_SW[c] * Dx_next + Q_SE[c] * Dx_prev) +
157 Dy_next * (Q_NW[c] * Dx_next + Q_NE[c] * Dx_prev);
158 }
159 }
160 }
161}
162
163
165static inline int variance_analyse(const float *const restrict guide, // I
166 const float *const restrict mask, //p
167 float *const restrict ab,
168 const size_t width, const size_t height,
169 const int radius, const float feathering)
170{
171 // Compute a box average (filter) on a grey image over a window of size 2*radius + 1
172 // then get the variance of the guide and covariance with its mask
173 // output a and b, the linear blending params
174 // p, the mask is the quantised guide I
175
176 const size_t Ndim = width * height;
177 const size_t Ndimch = Ndim * 4;
178
179 /*
180 * input is array of struct : { { guide , mask, guide * guide, guide * mask } }
181 */
182 float *const restrict input = dt_pixelpipe_cache_alloc_align_float_cache(Ndimch, 0);
183 if(input == NULL) return 1;
184
185 // Pre-multiply guide and mask and pack all inputs into an array of 4x1 SIMD struct
186#ifdef _OPENMP
187#pragma omp parallel for default(none) \
188 dt_omp_firstprivate(guide, mask, Ndim, radius, input) \
189 schedule(simd:static)
190#endif
191 for(size_t k = 0; k < Ndim; k++)
192 {
193 const size_t index = k * 4;
194 input[index] = guide[k];
195 input[index + 1] = mask[k];
196 input[index + 2] = guide[k] * guide[k];
197 input[index + 3] = guide[k] * mask[k];
198 }
199
200 // blur the guide and mask as a four-channel image to exploit data locality and SIMD
201 if(dt_box_mean(input, height, width, 4, radius, 1) != 0)
202 {
204 return 1;
205 }
206
207 // blend the result and store in output buffer
208#ifdef _OPENMP
209#pragma omp parallel for default(none) \
210 dt_omp_firstprivate(ab, input, width, height, feathering) \
211 schedule(static)
212#endif
213 for(size_t idx = 0; idx < width*height; idx++)
214 {
215 const float d = fmaxf((input[4*idx+2] - input[4*idx+0] * input[4*idx+0]) + feathering, 1e-15f); // avoid division by 0.
216 const float a = (input[4*idx+3] - input[4*idx+0] * input[4*idx+1]) / d;
217 const float b = input[4*idx+1] - a * input[4*idx+0];
218 ab[2*idx] = a;
219 ab[2*idx+1] = b;
220 }
221
223 return 0;
224}
225
226
228static inline void apply_linear_blending(float *const restrict image,
229 const float *const restrict ab,
230 const size_t num_elem)
231{
232#ifdef _OPENMP
233#pragma omp parallel for simd default(none) \
234dt_omp_firstprivate(image, ab, num_elem) \
235schedule(simd:static) aligned(image, ab:64)
236#endif
237 for(size_t k = 0; k < num_elem; k++)
238 {
239 // Note : image[k] is positive at the outside of the luminance mask
240 image[k] = fmaxf(image[k] * ab[k * 2] + ab[k * 2 + 1], MIN_FLOAT);
241 }
242}
243
244
246static inline void apply_linear_blending_w_geomean(float *const restrict image,
247 const float *const restrict ab,
248 const size_t num_elem)
249{
250#ifdef _OPENMP
251#pragma omp parallel for simd default(none) \
252dt_omp_firstprivate(image, ab, num_elem) \
253schedule(simd:static) aligned(image, ab:64)
254#endif
255 for(size_t k = 0; k < num_elem; k++)
256 {
257 // Note : image[k] is positive at the outside of the luminance mask
258 image[k] = sqrtf(image[k] * fmaxf(image[k] * ab[k * 2] + ab[k * 2 + 1], MIN_FLOAT));
259 }
260}
261
262
264static inline void quantize(const float *const restrict image,
265 float *const restrict out,
266 const size_t num_elem,
267 const float sampling, const float clip_min, const float clip_max)
268{
269 // Quantize in exposure levels evenly spaced in log by sampling
270
271 if(sampling == 0.0f)
272 {
273 // No-op
274 dt_iop_image_copy(out, image, num_elem);
275 }
276 else if(sampling == 1.0f)
277 {
278 // fast track
279#ifdef _OPENMP
280#pragma omp parallel for simd default(none) \
281dt_omp_firstprivate(image, out, num_elem, sampling, clip_min, clip_max) \
282schedule(simd:static) aligned(image, out:64)
283#endif
284 for(size_t k = 0; k < num_elem; k++)
285 out[k] = fast_clamp(exp2f(floorf(log2f(image[k]))), clip_min, clip_max);
286 }
287
288 else
289 {
290 // slow track
291#ifdef _OPENMP
292#pragma omp parallel for simd default(none) \
293dt_omp_firstprivate(image, out, num_elem, sampling, clip_min, clip_max) \
294schedule(simd:static) aligned(image, out:64)
295#endif
296 for(size_t k = 0; k < num_elem; k++)
297 out[k] = fast_clamp(exp2f(floorf(log2f(image[k]) / sampling) * sampling), clip_min, clip_max);
298 }
299}
300
301
303static inline int fast_surface_blur(float *const restrict image,
304 const size_t width, const size_t height,
305 const int radius, float feathering, const int iterations,
306 const dt_iop_guided_filter_blending_t filter, const float scale,
307 const float quantization, const float quantize_min, const float quantize_max)
308{
309 // Works in-place on a grey image
310
311 // A down-scaling of 4 seems empirically safe and consistent no matter the image zoom level
312 // see reference paper above for proof.
313 const float scaling = 4.0f;
314 const int ds_radius = (radius < 4) ? 1 : radius / scaling;
315
316 const size_t ds_height = height / scaling;
317 const size_t ds_width = width / scaling;
318
319 const size_t num_elem_ds = ds_width * ds_height;
320 const size_t num_elem = width * height;
321
322 float *const restrict ds_image = dt_pixelpipe_cache_alloc_align_float_cache(dt_round_size_sse(num_elem_ds), 0);
323 float *const restrict ds_mask = dt_pixelpipe_cache_alloc_align_float_cache(dt_round_size_sse(num_elem_ds), 0);
324 float *const restrict ds_ab = dt_pixelpipe_cache_alloc_align_float_cache(dt_round_size_sse(num_elem_ds * 2), 0);
325 float *const restrict ab = dt_pixelpipe_cache_alloc_align_float_cache(dt_round_size_sse(num_elem * 2), 0);
326
327 if(!ds_image || !ds_mask || !ds_ab || !ab)
328 {
329 dt_control_log(_("fast guided filter failed to allocate memory, check your RAM settings"));
334 return 1;
335 }
336
337 // Downsample the image for speed-up
338 interpolate_bilinear(image, width, height, ds_image, ds_width, ds_height, 1);
339
340 // Iterations of filter models the diffusion, sort of
341 for(int i = 0; i < iterations; ++i)
342 {
343 // (Re)build the mask from the quantized image to help guiding
344 quantize(ds_image, ds_mask, ds_width * ds_height, quantization, quantize_min, quantize_max);
345
346 // Perform the patch-wise variance analyse to get
347 // the a and b parameters for the linear blending s.t. mask = a * I + b
348 if(variance_analyse(ds_mask, ds_image, ds_ab, ds_width, ds_height, ds_radius, feathering) != 0)
349 {
354 return 1;
355 }
356
357 // Compute the patch-wise average of parameters a and b
358 if(dt_box_mean(ds_ab, ds_height, ds_width, 2, ds_radius, 1) != 0)
359 {
364 return 1;
365 }
366
367 if(i != iterations - 1)
368 {
369 // Process the intermediate filtered image
370 apply_linear_blending(ds_image, ds_ab, num_elem_ds);
371 }
372 }
373
374 // Upsample the blending parameters a and b
375 interpolate_bilinear(ds_ab, ds_width, ds_height, ab, width, height, 2);
376
377 // Finally, blend the guided image
378 if(filter == DT_GF_BLENDING_LINEAR)
379 apply_linear_blending(image, ab, num_elem);
380 else if(filter == DT_GF_BLENDING_GEOMEAN)
381 apply_linear_blending_w_geomean(image, ab, num_elem);
382
387 return 0;
388}
389
390// clang-format off
391// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
392// vim: shiftwidth=2 expandtab tabstop=2 cindent
393// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
394// clang-format on
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
int dt_box_mean(float *const buf, const size_t height, const size_t width, const int ch, const int radius, const unsigned iterations)
Definition box_filters.c:1235
static const float scaling
Definition chromatic_adaptation.h:299
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const float d
Definition colorspaces_inline_conversions.h:931
const float b
Definition colorspaces_inline_conversions.h:1326
const float a
Definition colorspaces_inline_conversions.h:1292
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
const float top
Definition colorspaces_inline_conversions.h:672
void dt_control_log(const char *msg,...)
Definition control.c:530
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:371
static size_t dt_round_size_sse(const size_t size)
Definition darktable.h:327
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
#define __DT_CLONE_TARGETS__
Definition darktable.h:291
static const dt_aligned_pixel_simd_t value
Definition darktable.h:501
static __DT_CLONE_TARGETS__ int variance_analyse(const float *const restrict guide, const float *const restrict mask, float *const restrict ab, const size_t width, const size_t height, const int radius, const float feathering)
Definition fast_guided_filter.h:165
static __DT_CLONE_TARGETS__ int fast_surface_blur(float *const restrict image, const size_t width, const size_t height, const int radius, float feathering, const int iterations, const dt_iop_guided_filter_blending_t filter, const float scale, const float quantization, const float quantize_min, const float quantize_max)
Definition fast_guided_filter.h:303
dt_iop_guided_filter_blending_t
Definition fast_guided_filter.h:51
@ DT_GF_BLENDING_LINEAR
Definition fast_guided_filter.h:52
@ DT_GF_BLENDING_GEOMEAN
Definition fast_guided_filter.h:53
static __DT_CLONE_TARGETS__ void quantize(const float *const restrict image, float *const restrict out, const size_t num_elem, const float sampling, const float clip_min, const float clip_max)
Definition fast_guided_filter.h:264
#define MIN_FLOAT
Definition fast_guided_filter.h:47
static __DT_CLONE_TARGETS__ void apply_linear_blending_w_geomean(float *const restrict image, const float *const restrict ab, const size_t num_elem)
Definition fast_guided_filter.h:246
static __DT_CLONE_TARGETS__ void apply_linear_blending(float *const restrict image, const float *const restrict ab, const size_t num_elem)
Definition fast_guided_filter.h:228
static float fast_clamp(const float value, const float bottom, const float top)
Definition fast_guided_filter.h:95
static __DT_CLONE_TARGETS__ void interpolate_bilinear(const float *const restrict in, const size_t width_in, const size_t height_in, float *const restrict out, const size_t width_out, const size_t height_out, const size_t ch)
Definition fast_guided_filter.h:103
__DT_CLONE_TARGETS__ void dt_iop_image_copy(float *const __restrict__ out, const float *const __restrict__ in, const size_t nfloats)
Definition imagebuf.c:138