Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
eaw.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2020-2021 Hubert Kowalski.
4 Copyright (C) 2020-2021 Ralf Brown.
5 Copyright (C) 2021 parafin.
6 Copyright (C) 2021 Pascal Obry.
7 Copyright (C) 2022 Martin Baƙinka.
8
9 darktable is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 darktable is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with darktable. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "common/eaw.h"
24#include "common/darktable.h"
25#include "common/math.h"
26#include "control/control.h" // needed by dwt.h
27#include "common/dwt.h" // for dwt_interleave_rows
28#include <math.h>
29
30static inline void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
31{
32 dt_aligned_pixel_t square;
33 for_each_channel(c) square[c] = c1[c] - c2[c];
34 for_each_channel(c) square[c] = square[c] * square[c];
35
36 const float wl = dt_fast_expf(-sharpen * square[0]);
37 const float wc = dt_fast_expf(-sharpen * (square[1] + square[2]));
38
39 weight[0] = wl;
40 weight[1] = wc;
41 weight[2] = wc;
42 weight[3] = 1.0f;
43}
44
45
46#define SUM_PIXEL_CONTRIBUTION(ii, jj) \
47 do \
48 { \
49 const float f = filter[(ii)] * filter[(jj)]; \
50 dt_aligned_pixel_t wp; \
51 weight(px, px2, sharpen, wp); \
52 dt_aligned_pixel_t w; \
53 dt_aligned_pixel_t pd; \
54 for_four_channels(c,aligned(px2)) \
55 { \
56 w[c] = f * wp[c]; \
57 wgt[c] += w[c]; \
58 pd[c] = w[c] * px2[c]; \
59 sum[c] += pd[c]; \
60 } \
61 } while(0)
62
63#define SUM_PIXEL_PROLOGUE \
64 dt_aligned_pixel_t sum = { 0.0f, 0.0f, 0.0f, 0.0f }; \
65 dt_aligned_pixel_t wgt = { 0.0f, 0.0f, 0.0f, 0.0f };
66
67#define SUM_PIXEL_EPILOGUE \
68 for_each_channel(c) \
69 { \
70 sum[c] /= wgt[c]; \
71 pcoarse[c] = sum[c]; \
72 const float det = (px[c] - sum[c]); \
73 pdetail[c] = det; \
74 } \
75 px += 4; \
76 pdetail += 4; \
77 pcoarse += 4;
78
79
80void eaw_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail,
81 const int scale, const float sharpen, const int32_t width, const int32_t height)
82{
83 const int mult = 1 << scale;
84 static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };
85 const int boundary = 2 * mult;
87 for(int rowid = 0; rowid < height; rowid++)
88 {
89 const size_t j = dwt_interleave_rows(rowid, height, mult);
90 const float *px = ((float *)in) + (size_t)4 * j * width;
91 const float *px2;
92 float *pdetail = detail + (size_t)4 * j * width;
93 float *pcoarse = out + (size_t)4 * j * width;
94
95 // for the first and last 'boundary' rows, we have to perform boundary tests for the entire row;
96 // for the central bulk, we only need to use those slower versions on the leftmost and rightmost pixels
97 const int lbound = (j < boundary || j >= height - boundary) ? width-boundary : boundary;
98
99 /* The first "2*mult" pixels need a boundary check because we might try to access past the left edge,
100 * which requires nearest pixel interpolation */
101 int i;
102 for(i = 0; i < lbound; i++)
103 {
105 for(int jj = 0; jj < 5; jj++)
106 {
107 const int y = j + mult * (jj-2);
108 const int clamp_y = CLAMP(y,0,height-1);
109 for(int ii = 0; ii < 5; ii++)
110 {
111 int x = i + mult * ((ii)-2);
112 if(x < 0) x = 0; // we might be looking past the left edge
113 px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;
115 }
116 }
118 }
119
120 /* For pixels [2*mult, width-2*mult], we don't need to do any boundary checks */
121 for( ; i < width - boundary; i++)
122 {
124 px2 = ((float *)in) + (size_t)4 * (i - 2 * mult + (size_t)(j - 2 * mult) * width);
125 for(int jj = 0; jj < 5; jj++)
126 {
127 for(int ii = 0; ii < 5; ii++)
128 {
130 px2 += (size_t)4 * mult;
131 }
132 px2 += (size_t)4 * (width - 5) * mult;
133 }
135 }
136
137 /* Last 2*mult pixels in the row require the boundary check again */
138 for( ; i < width; i++)
139 {
141 for(int jj = 0; jj < 5; jj++)
142 {
143 const int y = j + mult * (jj-2);
144 const int clamp_y = CLAMP(y,0,height-1);
145 for(int ii = 0; ii < 5; ii++)
146 {
147 int x = i + mult * ((ii)-2);
148 if(x >= width) x = width - 1; // we might be looking beyond the right edge
149 px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;
151 }
152 }
154 }
155 }
156}
157
158void eaw_synthesize(float *const out, const float *const in, const float *const restrict detail,
159 const float *const restrict threshold, const float *const restrict boost,
160 const int32_t width, const int32_t height)
161{
163 for(size_t k = 0; k < (size_t)width * height; k++)
164 {
165 __OMP_SIMD__(simdlen(4) aligned(detail, in, out, threshold, boost))
166 for(size_t c = 0; c < 4; c++)
167 {
168 // decrease the absolute magnitude of the detail by the threshold; copysignf does not vectorize, but it
169 // turns out that just adding up two clamped alternatives gives exactly the same result and DOES vectorize
170 //const float absamt = fmaxf(0.0f, (fabsf(detail[k + c]) - threshold[c]));
171 //const float amount = copysignf(absamt, detail[k + c]);
172 const float amount = MAX(detail[4*k+c] - threshold[c], 0.0f) + MIN(detail[4*k+c] + threshold[c], 0.0f);
173 out[4*k + c] = in[4*k + c] + (boost[c] * amount);
174 }
175 }
176}
177
178// =====================================================================================
179// begin wavelet code from denoiseprofile.c
180// =====================================================================================
181
182static inline float dn_weight(const float *c1, const float *c2, const float inv_sigma2)
183{
184 // 3d distance based on color
187 {
188 const float diff = c1[c] - c2[c];
189 sqr[c] = diff * diff;
190 }
191 const float dot = (sqr[0] + sqr[1] + sqr[2]) * inv_sigma2;
192 const float var
193 = 0.02f; // FIXME: this should ideally depend on the image before noise stabilizing transforms!
194 const float off2 = 9.0f; // (3 sigma)^2
195 return fast_mexp2f(MAX(0, dot * var - off2));
196}
197
198typedef struct _aligned_pixel {
199 union {
201 };
203#ifdef _OPENMP
204static inline _aligned_pixel add_float4(_aligned_pixel acc, _aligned_pixel newval)
205{
206 for_four_channels(c) acc.v[c] += newval.v[c];
207 return acc;
208}
209#pragma omp declare reduction(vsum:_aligned_pixel:omp_out=add_float4(omp_out,omp_in)) \
210 initializer(omp_priv = { .v = { 0.0f, 0.0f, 0.0f, 0.0f } })
211#endif
212
213#undef SUM_PIXEL_CONTRIBUTION
214#define SUM_PIXEL_CONTRIBUTION(ii, jj) \
215 do \
216 { \
217 const float f = filter[(ii)] * filter[(jj)]; \
218 const float wp = dn_weight(px, px2, inv_sigma2); \
219 const float w = f * wp; \
220 dt_aligned_pixel_t pd; \
221 for_each_channel(c,aligned(px2)) \
222 { \
223 pd[c] = w * px2[c]; \
224 wgt[c] += w; \
225 sum[c] += pd[c]; \
226 } \
227 } while(0)
228
229#undef SUM_PIXEL_EPILOGUE
230#define SUM_PIXEL_EPILOGUE \
231 for_each_channel(c) \
232 { \
233 sum[c] /= wgt[c]; \
234 pcoarse[c] = sum[c]; \
235 const float det = (px[c] - sum[c]); \
236 pdetail[c] = det; \
237 sum_sq.v[c] += (det*det); \
238 } \
239 px += 4; \
240 pdetail += 4; \
241 pcoarse += 4;
242
243void eaw_dn_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail,
244 dt_aligned_pixel_t sum_squared, const int scale, const float inv_sigma2,
245 const int32_t width, const int32_t height)
246{
247 const int mult = 1u << scale;
248 static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };
249 const int boundary = 2 * mult;
250
251 _aligned_pixel sum_sq = { .v = { 0.0f } };
252
253#if !(defined(__apple_build_version__) && __apple_build_version__ < 11030000) //makes Xcode 11.3.1 compiler crash
254__OMP_PARALLEL_FOR__(reduction(vsum: sum_sq) )
255#endif
256 for(int rowid = 0; rowid < height; rowid++)
257 {
258 const size_t j = dwt_interleave_rows(rowid, height, mult);
259 const float *px = ((float *)in) + (size_t)4 * j * width;
260 const float *px2;
261 float *pdetail = detail + (size_t)4 * j * width;
262 float *pcoarse = out + (size_t)4 * j * width;
263
264 // for the first and last 'boundary' rows, we have to perform boundary tests for the entire row;
265 // for the central bulk, we only need to use those slower versions on the leftmost and rightmost pixels
266 const int lbound = (j < boundary || j >= height - boundary) ? width-boundary : boundary;
267
268 /* The first "2*mult" pixels need a boundary check because we might try to access past the left edge,
269 * which requires nearest pixel interpolation */
270 int i;
271 for(i = 0; i < lbound; i++)
272 {
274 for(int jj = 0; jj < 5; jj++)
275 {
276 const int y = j + mult * (jj-2);
277 const int clamp_y = CLAMP(y,0,height-1);
278 for(int ii = 0; ii < 5; ii++)
279 {
280 int x = i + mult * ((ii)-2);
281 if(x < 0) x = 0; // we might be looking past the left edge
282 px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;
284 }
285 }
287 }
288
289 /* For pixels [2*mult, width-2*mult], we don't need to do any boundary checks */
290 for( ; i < width - boundary; i++)
291 {
293 px2 = ((float *)in) + (size_t)4 * (i - 2 * mult + (size_t)(j - 2 * mult) * width);
294 for(int jj = 0; jj < 5; jj++)
295 {
296 for(int ii = 0; ii < 5; ii++)
297 {
299 px2 += (size_t)4 * mult;
300 }
301 px2 += (size_t)4 * (width - 5) * mult;
302 }
304 }
305
306 /* Last 2*mult pixels in the row require the boundary check again */
307 for( ; i < width; i++)
308 {
310 for(int jj = 0; jj < 5; jj++)
311 {
312 const int y = j + mult * (jj-2);
313 const int clamp_y = CLAMP(y,0,height-1);
314 for(int ii = 0; ii < 5; ii++)
315 {
316 int x = i + mult * ((ii)-2);
317 if(x >= width) x = width - 1; // we might be looking past the right edge
318 px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;
320 }
321 }
323 }
324 }
326 sum_squared[c] = sum_sq.v[c];
327}
328
329#undef SUM_PIXEL_CONTRIBUTION
330#undef SUM_PIXEL_PROLOGUE
331#undef SUM_PIXEL_EPILOGUE
332
333// clang-format off
334// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
335// vim: shiftwidth=2 expandtab tabstop=2 cindent
336// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
337// clang-format on
338
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const float threshold
const dt_colormatrix_t dt_aligned_pixel_t out
#define __OMP_SIMD__(...)
Definition darktable.h:262
#define for_each_channel(_var,...)
Definition darktable.h:662
#define for_four_channels(_var,...)
Definition darktable.h:664
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93
#define SUM_PIXEL_CONTRIBUTION(ii, jj)
Definition eaw.c:46
#define SUM_PIXEL_PROLOGUE
Definition eaw.c:63
void eaw_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, const int scale, const float sharpen, const int32_t width, const int32_t height)
Definition eaw.c:80
#define SUM_PIXEL_EPILOGUE
Definition eaw.c:67
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:30
struct _aligned_pixel _aligned_pixel
static float dn_weight(const float *c1, const float *c2, const float inv_sigma2)
Definition eaw.c:182
void eaw_dn_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, dt_aligned_pixel_t sum_squared, const int scale, const float inv_sigma2, const int32_t width, const int32_t height)
Definition eaw.c:243
void eaw_synthesize(float *const out, const float *const in, const float *const restrict detail, const float *const restrict threshold, const float *const restrict boost, const int32_t width, const int32_t height)
Definition eaw.c:158
static const float x
float *const restrict const size_t k
static float fast_mexp2f(const float x)
Definition math.h:304
float dt_aligned_pixel_t[4]
dt_aligned_pixel_t v
Definition eaw.c:200
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29