Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
bspline.h
Go to the documentation of this file.
1#pragma once
2
3#include "common/darktable.h"
4#include "common/dwt.h"
6#include "math.h"
7
8// B spline filter
9#define BSPLINE_FSIZE 5
10
11// The B spline best approximate a Gaussian of standard deviation :
12// see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/
13#define B_SPLINE_SIGMA 1.0553651328015339f
14
15static inline float normalize_laplacian(const float sigma)
16{
17 // Normalize the wavelet scale to approximate a laplacian
18 // see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Scaling-coefficient
19 return 2.f / sqf(sigma);
20}
21
22// Normalization scaling of the wavelet to approximate a laplacian
23// from the function above for sigma = B_SPLINE_SIGMA as a constant
24#define B_SPLINE_TO_LAPLACIAN 3.182727439285017f
25#define B_SPLINE_TO_LAPLACIAN_2 10.129753952777762f // square
26
27static inline float equivalent_sigma_at_step(const float sigma, const unsigned int s)
28{
29 // If we stack several gaussian blurs of standard deviation sigma on top of each other,
30 // this is the equivalent standard deviation we get at the end (after s steps)
31 // First step is s = 0
32 // see
33 // https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Multi-scale-iterative-scheme
34 if(s == 0)
35 return sigma;
36 else
37 return sqrtf(sqf(equivalent_sigma_at_step(sigma, s - 1)) + sqf(exp2f((float)s) * sigma));
38}
39
40static inline unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
41{
42 // The inverse of the above : compute the number of scales needed to reach the desired equivalent sigma_final
43 // after sequential blurs of constant sigma_filter
44 unsigned int s = 0;
45 float radius = sigma_filter;
46 while(radius < sigma_final)
47 {
48 ++s;
49 radius = sqrtf(sqf(radius) + sqf((float)(1 << s) * sigma_filter));
50 }
51 return s + 1;
52}
53
54
55#ifdef _OPENMP
56#pragma omp declare simd aligned(buf, indices, result:64)
57#endif
58static inline void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[BSPLINE_FSIZE],
59 dt_aligned_pixel_t result, const gboolean clip_negatives)
60{
61 // scalar product of 2 3x5 vectors stored as RGB planes and B-spline filter,
62 // e.g. RRRRR - GGGGG - BBBBB
63 static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,
64 4.0f / 16.0f,
65 6.0f / 16.0f,
66 4.0f / 16.0f,
67 1.0f / 16.0f };
68
69 if(clip_negatives)
70 {
71 for_each_channel(c, aligned(buf,indices,result))
72 {
73 result[c] = MAX(0.0f, filter[0] * buf[indices[0] + c] +
74 filter[1] * buf[indices[1] + c] +
75 filter[2] * buf[indices[2] + c] +
76 filter[3] * buf[indices[3] + c] +
77 filter[4] * buf[indices[4] + c]);
78 }
79 }
80 else
81 {
82 for_each_channel(c, aligned(buf,indices,result))
83 {
84 result[c] = filter[0] * buf[indices[0] + c] +
85 filter[1] * buf[indices[1] + c] +
86 filter[2] * buf[indices[2] + c] +
87 filter[3] * buf[indices[3] + c] +
88 filter[4] * buf[indices[4] + c];
89 }
90 }
91}
92
93#ifdef _OPENMP
94#pragma omp declare simd aligned(in, temp)
95#endif
96static inline void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp,
97 size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
98{
99 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
100 // compute the index offsets of the pixels of interest; since the offsets are the same for the entire row,
101 // we only need to do this once and can then process the entire row
102 indices[0] = 4 * width * MAX((int)row - 2 * mult, 0);
103 indices[1] = 4 * width * MAX((int)row - mult, 0);
104 indices[2] = 4 * width * row;
105 indices[3] = 4 * width * MIN(row + mult, height-1);
106 indices[4] = 4 * width * MIN(row + 2 * mult, height-1);
107 for(size_t j = 0; j < width; j++)
108 {
109 // Compute the vertical blur of the current pixel and store it in the temp buffer for the row
110 sparse_scalar_product(in + j * 4, indices, temp + j * 4, clip_negatives);
111 }
112}
113
114#ifdef _OPENMP
115#pragma omp declare simd aligned(temp, out)
116#endif
117static inline void _bspline_horizontal(const float *const restrict temp, float *const restrict out,
118 size_t col, size_t width, int mult, const gboolean clip_negatives)
119{
120 // Compute the array indices of the pixels of interest; since the offsets will change near the ends of
121 // the row, we need to recompute for each pixel
122 size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];
123 indices[0] = 4 * MAX((int)col - 2 * mult, 0);
124 indices[1] = 4 * MAX((int)col - mult, 0);
125 indices[2] = 4 * col;
126 indices[3] = 4 * MIN(col + mult, width-1);
127 indices[4] = 4 * MIN(col + 2 * mult, width-1);
128 // Compute the horizontal blur of the already vertically-blurred pixel and store the result at the proper
129 // row/column location in the output buffer
130 sparse_scalar_product(temp, indices, out, clip_negatives);
131}
132
133#ifdef _OPENMP
134#pragma omp declare simd aligned(in, out:64) aligned(tempbuf:16)
135#endif
136inline static void blur_2D_Bspline(const float *const restrict in, float *const restrict out,
137 float *const restrict tempbuf,
138 const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
139{
140 // À-trous B-spline interpolation/blur shifted by mult
141 #ifdef _OPENMP
142 #pragma omp parallel for default(none) \
143 dt_omp_firstprivate(width, height, mult) \
144 dt_omp_sharedconst(out, in, tempbuf, clip_negatives) \
145 schedule(static)
146 #endif
147 for(size_t row = 0; row < height; row++)
148 {
149 // get a thread-private one-row temporary buffer
150 float *const temp = tempbuf + 4 * width * dt_get_thread_num();
151 // interleave the order in which we process the rows so that we minimize cache misses
152 const size_t i = dwt_interleave_rows(row, height, mult);
153 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
154 _bspline_vertical_pass(in, temp, i, width, height, mult, clip_negatives);
155 // Convolve B-spline filter horizontally over current row
156 for(size_t j = 0; j < width; j++)
157 {
158 _bspline_horizontal(temp, out + (i * width + j) * 4, j, width, mult, clip_negatives);
159 }
160 }
161}
162
163#ifdef _OPENMP
164#pragma omp declare simd aligned(in, HF, LF:64) aligned(tempbuf:16)
165#endif
166inline static void decompose_2D_Bspline(const float *const restrict in,
167 float *const restrict HF,
168 float *const restrict LF,
169 const size_t width, const size_t height, const int mult,
170 float *const tempbuf, size_t padded_size)
171{
172 // Blur and compute the wavelet at once
173#ifdef _OPENMP
174#pragma omp parallel for default(none) \
175 dt_omp_firstprivate(width, height, mult, padded_size) \
176 dt_omp_sharedconst(in, HF, LF, tempbuf) \
177 schedule(static)
178#endif
179 for(size_t row = 0; row < height; row++)
180 {
181 // get a thread-private one-row temporary buffer
182 float *restrict DT_ALIGNED_ARRAY const temp = dt_get_perthread(tempbuf, padded_size);
183 // interleave the order in which we process the rows so that we minimize cache misses
184 const size_t i = dwt_interleave_rows(row, height, mult);
185 // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur
186 _bspline_vertical_pass(in, temp, i, width, height, mult, TRUE); // always clip negatives
187 // Convolve B-spline filter horizontally over current row
188 for(size_t j = 0; j < width; j++)
189 {
190 const size_t index = 4U * (i * width + j);
191 _bspline_horizontal(temp, LF + index, j, width, mult, TRUE); // always clip negatives
192 // compute the HF component by subtracting the LF from the original input
194 HF[index + c] = in[index + c] - LF[index + c];
195 }
196 }
197}
198
199// clang-format off
200// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
201// vim: shiftwidth=2 expandtab tabstop=2 cindent
202// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
203// clang-format on
#define TRUE
Definition ashift_lsd.c:151
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:40
static void _bspline_horizontal(const float *const restrict temp, float *const restrict out, size_t col, size_t width, int mult, const gboolean clip_negatives)
Definition bspline.h:117
static void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[5], dt_aligned_pixel_t result, const gboolean clip_negatives)
Definition bspline.h:58
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:27
static void blur_2D_Bspline(const float *const restrict in, float *const restrict out, float *const restrict tempbuf, const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
Definition bspline.h:136
#define BSPLINE_FSIZE
Definition bspline.h:9
static float normalize_laplacian(const float sigma)
Definition bspline.h:15
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:166
static void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp, size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
Definition bspline.h:96
#define DT_ALIGNED_ARRAY
Definition darktable.h:270
#define for_each_channel(_var,...)
Definition darktable.h:411
static int dt_get_thread_num()
Definition darktable.h:227
#define dt_get_perthread(buf, padsize)
Definition darktable.h:797
#define for_four_channels(_var,...)
Definition darktable.h:413
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:89
static float sqf(const float x)
Definition math.h:215
#define MIN(a, b)
Definition thinplate.c:23
#define MAX(a, b)
Definition thinplate.c:20