36 const float wl = dt_fast_expf(-sharpen * square[0]);
37 const float wc = dt_fast_expf(-sharpen * (square[1] + square[2]));
46#define SUM_PIXEL_CONTRIBUTION(ii, jj) \
49 const float f = filter[(ii)] * filter[(jj)]; \
50 dt_aligned_pixel_t wp; \
51 weight(px, px2, sharpen, wp); \
52 dt_aligned_pixel_t w; \
53 dt_aligned_pixel_t pd; \
54 for_four_channels(c,aligned(px2)) \
58 pd[c] = w[c] * px2[c]; \
63#define SUM_PIXEL_PROLOGUE \
64 dt_aligned_pixel_t sum = { 0.0f, 0.0f, 0.0f, 0.0f }; \
65 dt_aligned_pixel_t wgt = { 0.0f, 0.0f, 0.0f, 0.0f };
67#define SUM_PIXEL_EPILOGUE \
71 pcoarse[c] = sum[c]; \
72 const float det = (px[c] - sum[c]); \
80void eaw_decompose(
float *
const restrict
out,
const float *
const restrict in,
float *
const restrict detail,
81 const int scale,
const float sharpen,
const int32_t
width,
const int32_t
height)
83 const int mult = 1 << scale;
84 static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };
85 const int boundary = 2 * mult;
87 for(
int rowid = 0; rowid <
height; rowid++)
90 const float *px = ((
float *)in) + (size_t)4 * j *
width;
92 float *pdetail = detail + (size_t)4 * j *
width;
93 float *pcoarse =
out + (size_t)4 * j *
width;
97 const int lbound = (j < boundary || j >=
height - boundary) ?
width-boundary : boundary;
102 for(
i = 0;
i < lbound;
i++)
105 for(
int jj = 0; jj < 5; jj++)
107 const int y = j + mult * (jj-2);
108 const int clamp_y = CLAMP(y,0,
height-1);
109 for(
int ii = 0; ii < 5; ii++)
111 int x =
i + mult * ((ii)-2);
113 px2 = ((
float *)in) + 4 *
x + (size_t)4 * clamp_y *
width;
121 for( ;
i <
width - boundary;
i++)
124 px2 = ((
float *)in) + (size_t)4 * (
i - 2 * mult + (
size_t)(j - 2 * mult) *
width);
125 for(
int jj = 0; jj < 5; jj++)
127 for(
int ii = 0; ii < 5; ii++)
130 px2 += (size_t)4 * mult;
132 px2 += (size_t)4 * (
width - 5) * mult;
141 for(
int jj = 0; jj < 5; jj++)
143 const int y = j + mult * (jj-2);
144 const int clamp_y = CLAMP(y,0,
height-1);
145 for(
int ii = 0; ii < 5; ii++)
147 int x =
i + mult * ((ii)-2);
149 px2 = ((
float *)in) + 4 *
x + (size_t)4 * clamp_y *
width;
159 const float *
const restrict
threshold,
const float *
const restrict boost,
166 for(
size_t c = 0; c < 4; c++)
173 out[4*
k + c] = in[4*
k + c] + (boost[c] * amount);
182static inline float dn_weight(
const float *
c1,
const float *
c2,
const float inv_sigma2)
188 const float diff =
c1[c] -
c2[c];
189 sqr[c] = diff * diff;
191 const float dot = (sqr[0] + sqr[1] + sqr[2]) * inv_sigma2;
194 const float off2 = 9.0f;
209#pragma omp declare reduction(vsum:_aligned_pixel:omp_out=add_float4(omp_out,omp_in)) \
210 initializer(omp_priv = { .v = { 0.0f, 0.0f, 0.0f, 0.0f } })
213#undef SUM_PIXEL_CONTRIBUTION
214#define SUM_PIXEL_CONTRIBUTION(ii, jj) \
217 const float f = filter[(ii)] * filter[(jj)]; \
218 const float wp = dn_weight(px, px2, inv_sigma2); \
219 const float w = f * wp; \
220 dt_aligned_pixel_t pd; \
221 for_each_channel(c,aligned(px2)) \
223 pd[c] = w * px2[c]; \
229#undef SUM_PIXEL_EPILOGUE
230#define SUM_PIXEL_EPILOGUE \
231 for_each_channel(c) \
234 pcoarse[c] = sum[c]; \
235 const float det = (px[c] - sum[c]); \
237 sum_sq.v[c] += (det*det); \
243void eaw_dn_decompose(
float *
const restrict
out,
const float *
const restrict in,
float *
const restrict detail,
247 const int mult = 1u << scale;
248 static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };
249 const int boundary = 2 * mult;
253#if !(defined(__apple_build_version__) && __apple_build_version__ < 11030000)
256 for(
int rowid = 0; rowid <
height; rowid++)
259 const float *px = ((
float *)in) + (size_t)4 * j *
width;
261 float *pdetail = detail + (size_t)4 * j *
width;
262 float *pcoarse =
out + (size_t)4 * j *
width;
266 const int lbound = (j < boundary || j >=
height - boundary) ?
width-boundary : boundary;
271 for(
i = 0;
i < lbound;
i++)
274 for(
int jj = 0; jj < 5; jj++)
276 const int y = j + mult * (jj-2);
277 const int clamp_y = CLAMP(y,0,
height-1);
278 for(
int ii = 0; ii < 5; ii++)
280 int x =
i + mult * ((ii)-2);
282 px2 = ((
float *)in) + 4 *
x + (size_t)4 * clamp_y *
width;
290 for( ;
i <
width - boundary;
i++)
293 px2 = ((
float *)in) + (size_t)4 * (
i - 2 * mult + (
size_t)(j - 2 * mult) *
width);
294 for(
int jj = 0; jj < 5; jj++)
296 for(
int ii = 0; ii < 5; ii++)
299 px2 += (size_t)4 * mult;
301 px2 += (size_t)4 * (
width - 5) * mult;
310 for(
int jj = 0; jj < 5; jj++)
312 const int y = j + mult * (jj-2);
313 const int clamp_y = CLAMP(y,0,
height-1);
314 for(
int ii = 0; ii < 5; ii++)
316 int x =
i + mult * ((ii)-2);
318 px2 = ((
float *)in) + 4 *
x + (size_t)4 * clamp_y *
width;
326 sum_squared[c] = sum_sq.
v[c];
329#undef SUM_PIXEL_CONTRIBUTION
330#undef SUM_PIXEL_PROLOGUE
331#undef SUM_PIXEL_EPILOGUE
const dt_colormatrix_t dt_aligned_pixel_t out
#define __OMP_SIMD__(...)
#define for_each_channel(_var,...)
#define for_four_channels(_var,...)
#define __OMP_PARALLEL_FOR__(...)
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
#define SUM_PIXEL_CONTRIBUTION(ii, jj)
#define SUM_PIXEL_PROLOGUE
void eaw_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, const int scale, const float sharpen, const int32_t width, const int32_t height)
#define SUM_PIXEL_EPILOGUE
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
struct _aligned_pixel _aligned_pixel
static float dn_weight(const float *c1, const float *c2, const float inv_sigma2)
void eaw_dn_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, dt_aligned_pixel_t sum_squared, const int scale, const float inv_sigma2, const int32_t width, const int32_t height)
void eaw_synthesize(float *const out, const float *const in, const float *const restrict detail, const float *const restrict threshold, const float *const restrict boost, const int32_t width, const int32_t height)
float *const restrict const size_t k
static float fast_mexp2f(const float x)
float dt_aligned_pixel_t[4]