Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
bspline.h
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2021-2023, 2025 Aurélien PIERRE.
4 Copyright (C) 2021 Pascal Obry.
5 Copyright (C) 2021 Ralf Brown.
6 Copyright (C) 2022 Martin Bařinka.
7 Copyright (C) 2023 Luca Zulberti.
8
9 Ansel is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 Ansel is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
21*/
22#pragma once
23
24#include "common/darktable.h"
25#include "common/dwt.h"
27#include "math.h"
28
29// B spline filter
30#define BSPLINE_FSIZE 5
31
32// The B spline best approximate a Gaussian of standard deviation :
33// see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/
34#define B_SPLINE_SIGMA 1.0553651328015339f
35
36static inline float normalize_laplacian(const float sigma)
37{
38 // Normalize the wavelet scale to approximate a laplacian
39 // see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Scaling-coefficient
40 return 2.f / sqf(sigma);
41}
42
43// Normalization scaling of the wavelet to approximate a laplacian
44// from the function above for sigma = B_SPLINE_SIGMA as a constant
45#define B_SPLINE_TO_LAPLACIAN 3.182727439285017f
46#define B_SPLINE_TO_LAPLACIAN_2 10.129753952777762f // square
47
48static inline float equivalent_sigma_at_step(const float sigma, const unsigned int s)
49{
50 // If we stack several gaussian blurs of standard deviation sigma on top of each other,
51 // this is the equivalent standard deviation we get at the end (after s steps)
52 // First step is s = 0
53 // see
54 // https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Multi-scale-iterative-scheme
55 if(s == 0)
56 return sigma;
57 else
58 return sqrtf(sqf(equivalent_sigma_at_step(sigma, s - 1)) + sqf(exp2f((float)s) * sigma));
59}
60
61static inline unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
62{
63 // The inverse of the above : compute the number of scales needed to reach the desired equivalent sigma_final
64 // after sequential blurs of constant sigma_filter
65 unsigned int s = 0;
66 float radius = sigma_filter;
67 while(radius < sigma_final)
68 {
69 ++s;
70 radius = sqrtf(sqf(radius) + sqf((float)(1 << s) * sigma_filter));
71 }
72 return s + 1;
73}
74
75static inline size_t decimated_bspline_size(const size_t size)
76{
77 return (size - 1u) / 2u + 1u;
78}
79static inline void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[BSPLINE_FSIZE],
80 dt_aligned_pixel_t result, const gboolean clip_negatives)
81{
82 // scalar product of 2 3x5 vectors stored as RGB planes and B-spline filter,
83 // e.g. RRRRR - GGGGG - BBBBB
84 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
85 4.0f / 16.0f,
86 6.0f / 16.0f,
87 4.0f / 16.0f,
88 1.0f / 16.0f };
89
90 if(clip_negatives)
91 {
92 for_each_channel(c, aligned(buf,indices,result))
93 {
94 result[c] = MAX(0.0f, filter[0] * buf[indices[0] + c] +
95 filter[1] * buf[indices[1] + c] +
96 filter[2] * buf[indices[2] + c] +
97 filter[3] * buf[indices[3] + c] +
98 filter[4] * buf[indices[4] + c]);
99 }
100 }
101 else
102 {
103 for_each_channel(c, aligned(buf,indices,result))
104 {
105 result[c] = filter[0] * buf[indices[0] + c] +
106 filter[1] * buf[indices[1] + c] +
107 filter[2] * buf[indices[2] + c] +
108 filter[3] * buf[indices[3] + c] +
109 filter[4] * buf[indices[4] + c];
110 }
111 }
112}
113static inline void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp,
114 size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
115{
116 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
117 // compute the index offsets of the pixels of interest; since the offsets are the same for the entire row,
118 // we only need to do this once and can then process the entire row
119 indices[0] = 4 * width * MAX((int)row - 2 * mult, 0);
120 indices[1] = 4 * width * MAX((int)row - mult, 0);
121 indices[2] = 4 * width * row;
122 indices[3] = 4 * width * MIN(row + mult, height-1);
123 indices[4] = 4 * width * MIN(row + 2 * mult, height-1);
124 for(size_t j = 0; j < width; j++)
125 {
126 // Compute the vertical blur of the current pixel and store it in the temp buffer for the row
127 sparse_scalar_product(in + j * 4, indices, temp + j * 4, clip_negatives);
128 }
129}
130
131__OMP_DECLARE_SIMD__(aligned(temp, out))
132static inline void _bspline_horizontal(const float *const restrict temp, float *const restrict out,
133 size_t col, size_t width, int mult, const gboolean clip_negatives)
134{
135 // Compute the array indices of the pixels of interest; since the offsets will change near the ends of
136 // the row, we need to recompute for each pixel
137 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
138 indices[0] = 4 * MAX((int)col - 2 * mult, 0);
139 indices[1] = 4 * MAX((int)col - mult, 0);
140 indices[2] = 4 * col;
141 indices[3] = 4 * MIN(col + mult, width-1);
142 indices[4] = 4 * MIN(col + 2 * mult, width-1);
143 // Compute the horizontal blur of the already vertically-blurred pixel and store the result at the proper
144 // row/column location in the output buffer
145 sparse_scalar_product(temp, indices, out, clip_negatives);
146}
147
148__OMP_DECLARE_SIMD__(aligned(temp, out))
149static inline void _bspline_horizontal_decimated(const float *const restrict temp, float *const restrict out,
150 const size_t col, const size_t width,
151 const gboolean clip_negatives)
152{
153 // The vertical pass has already been evaluated on the fine grid. We now only
154 // sample the horizontal convolution on the even columns kept by the decimated
155 // spline pyramid.
156 const size_t center = col * 2u;
157 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
158 indices[0] = 4 * MAX((int)center - 2, 0);
159 indices[1] = 4 * MAX((int)center - 1, 0);
160 indices[2] = 4 * center;
161 indices[3] = 4 * MIN(center + 1, width - 1);
162 indices[4] = 4 * MIN(center + 2, width - 1);
163 sparse_scalar_product(temp, indices, out, clip_negatives);
164}
165inline static void reduce_2D_Bspline(const float *const restrict in, float *const restrict out,
166 const size_t width, const size_t height,
167 float *const restrict tempbuf, const size_t padded_size,
168 const gboolean clip_negatives)
169{
170 const size_t coarse_width = decimated_bspline_size(width);
171 const size_t coarse_height = decimated_bspline_size(height);
172 const gboolean use_replicated_boundary = (coarse_width > 2u && coarse_height > 2u);
173 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
174 4.0f / 16.0f,
175 6.0f / 16.0f,
176 4.0f / 16.0f,
177 1.0f / 16.0f };
178 (void)tempbuf;
179 (void)padded_size;
181 for(size_t row = 0; row < coarse_height; ++row)
182 {
183 for(size_t col = 0; col < coarse_width; ++col)
184 {
185 dt_aligned_pixel_t accum = { 0.f };
186 const size_t sample_row = use_replicated_boundary ? CLAMP((int)row, 1, (int)coarse_height - 2) : row;
187 const size_t sample_col = use_replicated_boundary ? CLAMP((int)col, 1, (int)coarse_width - 2) : col;
188 const size_t center_row = sample_row * 2u;
189 const size_t center_col = sample_col * 2u;
190
191 // Evaluate the decimated 5x5 cardinal B-spline on the current grid. This
192 // is the Gaussian-pyramid reduce stage used to build the hybrid decimated
193 // wavelet stack.
194 for(int jj = -2; jj <= 2; ++jj)
195 {
196 const size_t yy = CLAMP((int)center_row + jj, 0, (int)height - 1);
197 for(int ii = -2; ii <= 2; ++ii)
198 {
199 const size_t xx = CLAMP((int)center_col + ii, 0, (int)width - 1);
200 const float weight = filter[ii + 2] * filter[jj + 2];
201 const size_t index = 4 * (yy * width + xx);
203 accum[c] += weight * in[index + c];
204 }
205 }
206
207 const size_t out_index = 4 * (row * coarse_width + col);
208 if(clip_negatives)
209 {
211 out[out_index + c] = MAX(accum[c], 0.f);
212 }
213 else
214 {
215 copy_pixel_nontemporal(out + out_index, accum);
216 }
217 }
218 }
219}
220inline static void expand_2D_Bspline(const float *const restrict in, float *const restrict out,
221 const size_t width, const size_t height,
222 const gboolean clip_negatives)
223{
224 const size_t coarse_width = decimated_bspline_size(width);
225 const size_t coarse_height = decimated_bspline_size(height);
226 const gboolean use_replicated_boundary = (width > 2u && height > 2u && coarse_width > 1u && coarse_height > 1u);
227 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
228 4.0f / 16.0f,
229 6.0f / 16.0f,
230 4.0f / 16.0f,
231 1.0f / 16.0f };
232 __OMP_PARALLEL_FOR__(collapse(2))
233 for(size_t row = 0; row < height; ++row)
234 for(size_t col = 0; col < width; ++col)
235 {
236 size_t sample_row = row;
237 size_t sample_col = col;
238 if(use_replicated_boundary)
239 {
240 const size_t max_row = (height & 1u) ? height - 2u : height - 3u;
241 const size_t max_col = (width & 1u) ? width - 2u : width - 3u;
242 sample_row = CLAMP((int)row, 1, (int)max_row);
243 sample_col = CLAMP((int)col, 1, (int)max_col);
244 }
245
246 const size_t center_row = sample_row >> 1;
247 const size_t center_col = sample_col >> 1;
248 dt_aligned_pixel_t accum = { 0.f };
249
250 // Rebuild the fine sample with the same parity-dependent Gaussian expand
251 // used by the local-laplacian pyramid, but on 4-channel spline data.
252 switch((sample_col & 1u) + 2u * (sample_row & 1u))
253 {
254 case 0:
255 {
256 for(int jj = -1; jj <= 1; ++jj)
257 for(int ii = -1; ii <= 1; ++ii)
258 {
259 const size_t yy = center_row + jj;
260 const size_t xx = center_col + ii;
261 const float weight = 4.f * filter[2 * (jj + 1)] * filter[2 * (ii + 1)];
262 const size_t index = 4 * (yy * coarse_width + xx);
264 accum[c] += weight * in[index + c];
265 }
266 break;
267 }
268 case 1:
269 {
270 for(int jj = -1; jj <= 1; ++jj)
271 for(int ii = 0; ii <= 1; ++ii)
272 {
273 const size_t yy = center_row + jj;
274 const size_t xx = center_col + ii;
275 const float weight = 4.f * filter[2 * (jj + 1)] * filter[2 * ii + 1];
276 const size_t index = 4 * (yy * coarse_width + xx);
278 accum[c] += weight * in[index + c];
279 }
280 break;
281 }
282 case 2:
283 {
284 for(int jj = 0; jj <= 1; ++jj)
285 for(int ii = -1; ii <= 1; ++ii)
286 {
287 const size_t yy = center_row + jj;
288 const size_t xx = center_col + ii;
289 const float weight = 4.f * filter[2 * jj + 1] * filter[2 * (ii + 1)];
290 const size_t index = 4 * (yy * coarse_width + xx);
292 accum[c] += weight * in[index + c];
293 }
294 break;
295 }
296 default:
297 {
298 for(int jj = 0; jj <= 1; ++jj)
299 for(int ii = 0; ii <= 1; ++ii)
300 {
301 const size_t yy = center_row + jj;
302 const size_t xx = center_col + ii;
303 const float weight = 4.f * filter[2 * jj + 1] * filter[2 * ii + 1];
304 const size_t index = 4 * (yy * coarse_width + xx);
306 accum[c] += weight * in[index + c];
307 }
308 break;
309 }
310 }
311
312 const size_t out_index = 4 * (row * width + col);
313 if(clip_negatives)
314 {
316 out[out_index + c] = MAX(accum[c], 0.f);
317 }
318 else
319 {
320 copy_pixel_nontemporal(out + out_index, accum);
321 }
322 }
323
324}
325inline static void blur_2D_Bspline(const float *const restrict in, float *const restrict out,
326 float *const restrict tempbuf,
327 const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
328{
329 // À-trous B-spline interpolation/blur shifted by mult
331 for(size_t row = 0; row < height; row++)
332 {
333 // get a thread-private one-row temporary buffer
334 float *const temp = tempbuf + 4 * width * dt_get_thread_num();
335 // interleave the order in which we process the rows so that we minimize cache misses
336 const size_t i = dwt_interleave_rows(row, height, mult);
337 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
338 _bspline_vertical_pass(in, temp, i, width, height, mult, clip_negatives);
339 // Convolve B-spline filter horizontally over current row
340 for(size_t j = 0; j < width; j++)
341 {
342 _bspline_horizontal(temp, out + (i * width + j) * 4, j, width, mult, clip_negatives);
343 }
344 }
345
346}
347inline static void decompose_2D_Bspline(const float *const restrict in,
348 float *const restrict HF,
349 float *const restrict LF,
350 const size_t width, const size_t height, const int mult,
351 float *const tempbuf, size_t padded_size)
352{
353 // Blur and compute the wavelet at once
355 for(size_t row = 0; row < height; row++)
356 {
357 // get a thread-private one-row temporary buffer
358 float *restrict DT_ALIGNED_ARRAY const temp = dt_get_perthread(tempbuf, padded_size);
359 // interleave the order in which we process the rows so that we minimize cache misses
360 const size_t i = dwt_interleave_rows(row, height, mult);
361 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
362 _bspline_vertical_pass(in, temp, i, width, height, mult, TRUE); // always clip negatives
363 // Convolve B-spline filter horizontally over current row
364 for(size_t j = 0; j < width; j++)
365 {
366 const size_t index = 4U * (i * width + j);
367 _bspline_horizontal(temp, LF + index, j, width, mult, TRUE); // always clip negatives
368 // compute the HF component by subtracting the LF from the original input
370 HF[index + c] = in[index + c] - LF[index + c];
371 }
372 }
373
374}
375
376// clang-format off
377// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
378// vim: shiftwidth=2 expandtab tabstop=2 cindent
379// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
380// clang-format on
#define TRUE
Definition ashift_lsd.c:162
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:61
static void _bspline_horizontal_decimated(const float *const restrict temp, float *const restrict out, const size_t col, const size_t width, const gboolean clip_negatives)
Definition bspline.h:149
static void reduce_2D_Bspline(const float *const restrict in, float *const restrict out, const size_t width, const size_t height, float *const restrict tempbuf, const size_t padded_size, const gboolean clip_negatives)
Definition bspline.h:165
static void _bspline_horizontal(const float *const restrict temp, float *const restrict out, size_t col, size_t width, int mult, const gboolean clip_negatives)
Definition bspline.h:132
static void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[5], dt_aligned_pixel_t result, const gboolean clip_negatives)
Definition bspline.h:79
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:48
static void blur_2D_Bspline(const float *const restrict in, float *const restrict out, float *const restrict tempbuf, const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
Definition bspline.h:325
static void expand_2D_Bspline(const float *const restrict in, float *const restrict out, const size_t width, const size_t height, const gboolean clip_negatives)
Definition bspline.h:220
#define BSPLINE_FSIZE
Definition bspline.h:30
static float normalize_laplacian(const float sigma)
Definition bspline.h:36
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:347
static size_t decimated_bspline_size(const size_t size)
Definition bspline.h:75
static void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp, size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
Definition bspline.h:113
const float i
Definition colorspaces_inline_conversions.h:440
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42
static const int row
Definition colorspaces_inline_conversions.h:35
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
#define DT_ALIGNED_ARRAY
Definition darktable.h:388
#define for_each_channel(_var,...)
Definition darktable.h:662
static void copy_pixel_nontemporal(float *const __restrict__ out, const float *const __restrict__ in)
Definition darktable.h:677
static int dt_get_thread_num()
Definition darktable.h:291
#define __OMP_DECLARE_SIMD__(...)
Definition darktable.h:263
#define dt_get_perthread(buf, padsize)
Definition darktable.h:1034
#define for_four_channels(_var,...)
Definition darktable.h:664
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:30
size_t size
Definition mipmap_cache.c:3
float dt_aligned_pixel_t[4]
Definition noiseprofile.c:28
const float sigma
Definition src/develop/noise_generator.h:71
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29