Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
bspline.h
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2021-2023, 2025 Aurélien PIERRE.
4 Copyright (C) 2021 Pascal Obry.
5 Copyright (C) 2021 Ralf Brown.
6 Copyright (C) 2022 Martin Bařinka.
7 Copyright (C) 2023 Luca Zulberti.
8
9 Ansel is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 Ansel is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
21*/
22#pragma once
23
24#include "common/darktable.h"
25#include "common/dwt.h"
27#include "math.h"
28
29// B spline filter
30#define BSPLINE_FSIZE 5
31
32// The B spline best approximate a Gaussian of standard deviation :
33// see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/
34#define B_SPLINE_SIGMA 1.0553651328015339f
35
36static inline float normalize_laplacian(const float sigma)
37{
38 // Normalize the wavelet scale to approximate a laplacian
39 // see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Scaling-coefficient
40 return 2.f / sqf(sigma);
41}
42
43// Normalization scaling of the wavelet to approximate a laplacian
44// from the function above for sigma = B_SPLINE_SIGMA as a constant
45#define B_SPLINE_TO_LAPLACIAN 3.182727439285017f
46#define B_SPLINE_TO_LAPLACIAN_2 10.129753952777762f // square
47
48static inline float equivalent_sigma_at_step(const float sigma, const unsigned int s)
49{
50 // If we stack several gaussian blurs of standard deviation sigma on top of each other,
51 // this is the equivalent standard deviation we get at the end (after s steps)
52 // First step is s = 0
53 // see
54 // https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Multi-scale-iterative-scheme
55 if(s == 0)
56 return sigma;
57 else
58 return sqrtf(sqf(equivalent_sigma_at_step(sigma, s - 1)) + sqf(exp2f((float)s) * sigma));
59}
60
61static inline unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
62{
63 // The inverse of the above : compute the number of scales needed to reach the desired equivalent sigma_final
64 // after sequential blurs of constant sigma_filter
65 unsigned int s = 0;
66 float radius = sigma_filter;
67 while(radius < sigma_final)
68 {
69 ++s;
70 radius = sqrtf(sqf(radius) + sqf((float)(1 << s) * sigma_filter));
71 }
72 return s + 1;
73}
74
75static inline size_t decimated_bspline_size(const size_t size)
76{
77 return (size - 1u) / 2u + 1u;
78}
79
80
81#ifdef _OPENMP
82#pragma omp declare simd aligned(buf, indices, result:64)
83#endif
84static inline void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[BSPLINE_FSIZE],
85 dt_aligned_pixel_t result, const gboolean clip_negatives)
86{
87 // scalar product of 2 3x5 vectors stored as RGB planes and B-spline filter,
88 // e.g. RRRRR - GGGGG - BBBBB
89 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
90 4.0f / 16.0f,
91 6.0f / 16.0f,
92 4.0f / 16.0f,
93 1.0f / 16.0f };
94
95 if(clip_negatives)
96 {
97 for_each_channel(c, aligned(buf,indices,result))
98 {
99 result[c] = MAX(0.0f, filter[0] * buf[indices[0] + c] +
100 filter[1] * buf[indices[1] + c] +
101 filter[2] * buf[indices[2] + c] +
102 filter[3] * buf[indices[3] + c] +
103 filter[4] * buf[indices[4] + c]);
104 }
105 }
106 else
107 {
108 for_each_channel(c, aligned(buf,indices,result))
109 {
110 result[c] = filter[0] * buf[indices[0] + c] +
111 filter[1] * buf[indices[1] + c] +
112 filter[2] * buf[indices[2] + c] +
113 filter[3] * buf[indices[3] + c] +
114 filter[4] * buf[indices[4] + c];
115 }
116 }
117}
118
119#ifdef _OPENMP
120#pragma omp declare simd aligned(in, temp)
121#endif
122static inline void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp,
123 size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
124{
125 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
126 // compute the index offsets of the pixels of interest; since the offsets are the same for the entire row,
127 // we only need to do this once and can then process the entire row
128 indices[0] = 4 * width * MAX((int)row - 2 * mult, 0);
129 indices[1] = 4 * width * MAX((int)row - mult, 0);
130 indices[2] = 4 * width * row;
131 indices[3] = 4 * width * MIN(row + mult, height-1);
132 indices[4] = 4 * width * MIN(row + 2 * mult, height-1);
133 for(size_t j = 0; j < width; j++)
134 {
135 // Compute the vertical blur of the current pixel and store it in the temp buffer for the row
136 sparse_scalar_product(in + j * 4, indices, temp + j * 4, clip_negatives);
137 }
138}
139
140#ifdef _OPENMP
141#pragma omp declare simd aligned(temp, out)
142#endif
143static inline void _bspline_horizontal(const float *const restrict temp, float *const restrict out,
144 size_t col, size_t width, int mult, const gboolean clip_negatives)
145{
146 // Compute the array indices of the pixels of interest; since the offsets will change near the ends of
147 // the row, we need to recompute for each pixel
148 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
149 indices[0] = 4 * MAX((int)col - 2 * mult, 0);
150 indices[1] = 4 * MAX((int)col - mult, 0);
151 indices[2] = 4 * col;
152 indices[3] = 4 * MIN(col + mult, width-1);
153 indices[4] = 4 * MIN(col + 2 * mult, width-1);
154 // Compute the horizontal blur of the already vertically-blurred pixel and store the result at the proper
155 // row/column location in the output buffer
156 sparse_scalar_product(temp, indices, out, clip_negatives);
157}
158
159#ifdef _OPENMP
160#pragma omp declare simd aligned(temp, out)
161#endif
162static inline void _bspline_horizontal_decimated(const float *const restrict temp, float *const restrict out,
163 const size_t col, const size_t width,
164 const gboolean clip_negatives)
165{
166 // The vertical pass has already been evaluated on the fine grid. We now only
167 // sample the horizontal convolution on the even columns kept by the decimated
168 // spline pyramid.
169 const size_t center = col * 2u;
170 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
171 indices[0] = 4 * MAX((int)center - 2, 0);
172 indices[1] = 4 * MAX((int)center - 1, 0);
173 indices[2] = 4 * center;
174 indices[3] = 4 * MIN(center + 1, width - 1);
175 indices[4] = 4 * MIN(center + 2, width - 1);
176 sparse_scalar_product(temp, indices, out, clip_negatives);
177}
178
179#ifdef _OPENMP
180#pragma omp declare simd aligned(in, out:64) aligned(tempbuf:16)
181#endif
182inline static void reduce_2D_Bspline(const float *const restrict in, float *const restrict out,
183 const size_t width, const size_t height,
184 float *const restrict tempbuf, const size_t padded_size,
185 const gboolean clip_negatives)
186{
187 const size_t coarse_width = decimated_bspline_size(width);
188 const size_t coarse_height = decimated_bspline_size(height);
189 const gboolean use_replicated_boundary = (coarse_width > 2u && coarse_height > 2u);
190 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
191 4.0f / 16.0f,
192 6.0f / 16.0f,
193 4.0f / 16.0f,
194 1.0f / 16.0f };
195 (void)tempbuf;
196 (void)padded_size;
197
198#ifdef _OPENMP
199#pragma omp parallel for default(none) \
200 dt_omp_firstprivate(width, height, coarse_width, coarse_height, padded_size, clip_negatives, use_replicated_boundary) \
201 dt_omp_sharedconst(in, out, tempbuf, filter) \
202 schedule(static)
203#endif
204 for(size_t row = 0; row < coarse_height; ++row)
205 {
206 for(size_t col = 0; col < coarse_width; ++col)
207 {
208 dt_aligned_pixel_t accum = { 0.f };
209 const size_t sample_row = use_replicated_boundary ? CLAMP((int)row, 1, (int)coarse_height - 2) : row;
210 const size_t sample_col = use_replicated_boundary ? CLAMP((int)col, 1, (int)coarse_width - 2) : col;
211 const size_t center_row = sample_row * 2u;
212 const size_t center_col = sample_col * 2u;
213
214 // Evaluate the decimated 5x5 cardinal B-spline on the current grid. This
215 // is the Gaussian-pyramid reduce stage used to build the hybrid decimated
216 // wavelet stack.
217 for(int jj = -2; jj <= 2; ++jj)
218 {
219 const size_t yy = CLAMP((int)center_row + jj, 0, (int)height - 1);
220 for(int ii = -2; ii <= 2; ++ii)
221 {
222 const size_t xx = CLAMP((int)center_col + ii, 0, (int)width - 1);
223 const float weight = filter[ii + 2] * filter[jj + 2];
224 const size_t index = 4 * (yy * width + xx);
226 accum[c] += weight * in[index + c];
227 }
228 }
229
230 const size_t out_index = 4 * (row * coarse_width + col);
231 if(clip_negatives)
232 {
234 out[out_index + c] = MAX(accum[c], 0.f);
235 }
236 else
237 {
238 copy_pixel_nontemporal(out + out_index, accum);
239 }
240 }
241 }
242}
243
244#ifdef _OPENMP
245#pragma omp declare simd aligned(in, out:64)
246#endif
247inline static void expand_2D_Bspline(const float *const restrict in, float *const restrict out,
248 const size_t width, const size_t height,
249 const gboolean clip_negatives)
250{
251 const size_t coarse_width = decimated_bspline_size(width);
252 const size_t coarse_height = decimated_bspline_size(height);
253 const gboolean use_replicated_boundary = (width > 2u && height > 2u && coarse_width > 1u && coarse_height > 1u);
254 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
255 4.0f / 16.0f,
256 6.0f / 16.0f,
257 4.0f / 16.0f,
258 1.0f / 16.0f };
259
260#ifdef _OPENMP
261#pragma omp parallel for default(none) \
262 dt_omp_firstprivate(width, height, coarse_width, coarse_height, clip_negatives, use_replicated_boundary) \
263 dt_omp_sharedconst(in, out, filter) \
264 schedule(static) \
265 collapse(2)
266#endif
267 for(size_t row = 0; row < height; ++row)
268 for(size_t col = 0; col < width; ++col)
269 {
270 size_t sample_row = row;
271 size_t sample_col = col;
272 if(use_replicated_boundary)
273 {
274 const size_t max_row = (height & 1u) ? height - 2u : height - 3u;
275 const size_t max_col = (width & 1u) ? width - 2u : width - 3u;
276 sample_row = CLAMP((int)row, 1, (int)max_row);
277 sample_col = CLAMP((int)col, 1, (int)max_col);
278 }
279
280 const size_t center_row = sample_row >> 1;
281 const size_t center_col = sample_col >> 1;
282 dt_aligned_pixel_t accum = { 0.f };
283
284 // Rebuild the fine sample with the same parity-dependent Gaussian expand
285 // used by the local-laplacian pyramid, but on 4-channel spline data.
286 switch((sample_col & 1u) + 2u * (sample_row & 1u))
287 {
288 case 0:
289 {
290 for(int jj = -1; jj <= 1; ++jj)
291 for(int ii = -1; ii <= 1; ++ii)
292 {
293 const size_t yy = center_row + jj;
294 const size_t xx = center_col + ii;
295 const float weight = 4.f * filter[2 * (jj + 1)] * filter[2 * (ii + 1)];
296 const size_t index = 4 * (yy * coarse_width + xx);
298 accum[c] += weight * in[index + c];
299 }
300 break;
301 }
302 case 1:
303 {
304 for(int jj = -1; jj <= 1; ++jj)
305 for(int ii = 0; ii <= 1; ++ii)
306 {
307 const size_t yy = center_row + jj;
308 const size_t xx = center_col + ii;
309 const float weight = 4.f * filter[2 * (jj + 1)] * filter[2 * ii + 1];
310 const size_t index = 4 * (yy * coarse_width + xx);
312 accum[c] += weight * in[index + c];
313 }
314 break;
315 }
316 case 2:
317 {
318 for(int jj = 0; jj <= 1; ++jj)
319 for(int ii = -1; ii <= 1; ++ii)
320 {
321 const size_t yy = center_row + jj;
322 const size_t xx = center_col + ii;
323 const float weight = 4.f * filter[2 * jj + 1] * filter[2 * (ii + 1)];
324 const size_t index = 4 * (yy * coarse_width + xx);
326 accum[c] += weight * in[index + c];
327 }
328 break;
329 }
330 default:
331 {
332 for(int jj = 0; jj <= 1; ++jj)
333 for(int ii = 0; ii <= 1; ++ii)
334 {
335 const size_t yy = center_row + jj;
336 const size_t xx = center_col + ii;
337 const float weight = 4.f * filter[2 * jj + 1] * filter[2 * ii + 1];
338 const size_t index = 4 * (yy * coarse_width + xx);
340 accum[c] += weight * in[index + c];
341 }
342 break;
343 }
344 }
345
346 const size_t out_index = 4 * (row * width + col);
347 if(clip_negatives)
348 {
350 out[out_index + c] = MAX(accum[c], 0.f);
351 }
352 else
353 {
354 copy_pixel_nontemporal(out + out_index, accum);
355 }
356 }
357}
358
359#ifdef _OPENMP
360#pragma omp declare simd aligned(in, out:64) aligned(tempbuf:16)
361#endif
362inline static void blur_2D_Bspline(const float *const restrict in, float *const restrict out,
363 float *const restrict tempbuf,
364 const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
365{
366 // À-trous B-spline interpolation/blur shifted by mult
367 #ifdef _OPENMP
368 #pragma omp parallel for default(none) \
369 dt_omp_firstprivate(width, height, mult) \
370 dt_omp_sharedconst(out, in, tempbuf, clip_negatives) \
371 schedule(static)
372 #endif
373 for(size_t row = 0; row < height; row++)
374 {
375 // get a thread-private one-row temporary buffer
376 float *const temp = tempbuf + 4 * width * dt_get_thread_num();
377 // interleave the order in which we process the rows so that we minimize cache misses
378 const size_t i = dwt_interleave_rows(row, height, mult);
379 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
380 _bspline_vertical_pass(in, temp, i, width, height, mult, clip_negatives);
381 // Convolve B-spline filter horizontally over current row
382 for(size_t j = 0; j < width; j++)
383 {
384 _bspline_horizontal(temp, out + (i * width + j) * 4, j, width, mult, clip_negatives);
385 }
386 }
387}
388
389#ifdef _OPENMP
390#pragma omp declare simd aligned(in, HF, LF:64) aligned(tempbuf:16)
391#endif
392inline static void decompose_2D_Bspline(const float *const restrict in,
393 float *const restrict HF,
394 float *const restrict LF,
395 const size_t width, const size_t height, const int mult,
396 float *const tempbuf, size_t padded_size)
397{
398 // Blur and compute the wavelet at once
399#ifdef _OPENMP
400#pragma omp parallel for default(none) \
401 dt_omp_firstprivate(width, height, mult, padded_size) \
402 dt_omp_sharedconst(in, HF, LF, tempbuf) \
403 schedule(static)
404#endif
405 for(size_t row = 0; row < height; row++)
406 {
407 // get a thread-private one-row temporary buffer
408 float *restrict DT_ALIGNED_ARRAY const temp = dt_get_perthread(tempbuf, padded_size);
409 // interleave the order in which we process the rows so that we minimize cache misses
410 const size_t i = dwt_interleave_rows(row, height, mult);
411 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
412 _bspline_vertical_pass(in, temp, i, width, height, mult, TRUE); // always clip negatives
413 // Convolve B-spline filter horizontally over current row
414 for(size_t j = 0; j < width; j++)
415 {
416 const size_t index = 4U * (i * width + j);
417 _bspline_horizontal(temp, LF + index, j, width, mult, TRUE); // always clip negatives
418 // compute the HF component by subtracting the LF from the original input
420 HF[index + c] = in[index + c] - LF[index + c];
421 }
422 }
423}
424
425// clang-format off
426// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
427// vim: shiftwidth=2 expandtab tabstop=2 cindent
428// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
429// clang-format on
#define TRUE
Definition ashift_lsd.c:162
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:61
static void _bspline_horizontal_decimated(const float *const restrict temp, float *const restrict out, const size_t col, const size_t width, const gboolean clip_negatives)
Definition bspline.h:162
static void reduce_2D_Bspline(const float *const restrict in, float *const restrict out, const size_t width, const size_t height, float *const restrict tempbuf, const size_t padded_size, const gboolean clip_negatives)
Definition bspline.h:182
static void _bspline_horizontal(const float *const restrict temp, float *const restrict out, size_t col, size_t width, int mult, const gboolean clip_negatives)
Definition bspline.h:143
static void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[5], dt_aligned_pixel_t result, const gboolean clip_negatives)
Definition bspline.h:84
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:48
static void blur_2D_Bspline(const float *const restrict in, float *const restrict out, float *const restrict tempbuf, const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
Definition bspline.h:362
static void expand_2D_Bspline(const float *const restrict in, float *const restrict out, const size_t width, const size_t height, const gboolean clip_negatives)
Definition bspline.h:247
#define BSPLINE_FSIZE
Definition bspline.h:30
static float normalize_laplacian(const float sigma)
Definition bspline.h:36
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:392
static size_t decimated_bspline_size(const size_t size)
Definition bspline.h:75
static void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp, size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
Definition bspline.h:122
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
static const int row
Definition colorspaces_inline_conversions.h:175
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
#define DT_ALIGNED_ARRAY
Definition darktable.h:312
#define for_each_channel(_var,...)
Definition darktable.h:582
static void copy_pixel_nontemporal(float *const __restrict__ out, const float *const __restrict__ in)
Definition darktable.h:597
static int dt_get_thread_num()
Definition darktable.h:269
#define dt_get_perthread(buf, padsize)
Definition darktable.h:967
#define for_four_channels(_var,...)
Definition darktable.h:584
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:33
static float sqf(const float x)
Definition math.h:223
size_t size
Definition mipmap_cache.c:3
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29