47#define MIN_FLOAT exp2f(-16.0f)
93#pragma omp declare simd
98 return fmaxf(fminf(
value,
top), bottom);
103static inline void interpolate_bilinear(
const float *
const restrict in,
const size_t width_in,
const size_t height_in,
104 float *
const restrict
out,
const size_t width_out,
const size_t height_out,
109#pragma omp parallel for collapse(2) default(none) \
110 dt_omp_firstprivate(in, out, width_out, height_out, width_in, height_in, ch) \
111 schedule(simd:static)
113 for(
size_t i = 0;
i < height_out;
i++)
115 for(
size_t j = 0; j < width_out; j++)
118 const float x_out = (float)j /(
float)width_out;
119 const float y_out = (float)
i /(
float)height_out;
122 const float x_in = x_out * (float)width_in;
123 const float y_in = y_out * (float)height_in;
126 size_t x_prev = (size_t)floorf(x_in);
127 size_t x_next = x_prev + 1;
128 size_t y_prev = (size_t)floorf(y_in);
129 size_t y_next = y_prev + 1;
131 x_prev = (x_prev < width_in) ? x_prev : width_in - 1;
132 x_next = (x_next < width_in) ? x_next : width_in - 1;
133 y_prev = (y_prev < height_in) ? y_prev : height_in - 1;
134 y_next = (y_next < height_in) ? y_next : height_in - 1;
137 const size_t Y_prev = y_prev * width_in;
138 const size_t Y_next = y_next * width_in;
139 const float *
const Q_NW = (
float *)in + (Y_prev + x_prev) * ch;
140 const float *
const Q_NE = (
float *)in + (Y_prev + x_next) * ch;
141 const float *
const Q_SE = (
float *)in + (Y_next + x_next) * ch;
142 const float *
const Q_SW = (
float *)in + (Y_next + x_prev) * ch;
145 const float Dy_next = (float)y_next - y_in;
146 const float Dy_prev = 1.f - Dy_next;
147 const float Dx_next = (float)x_next - x_in;
148 const float Dx_prev = 1.f - Dx_next;
151 float *
const pixel_out = (
float *)
out + (
i * width_out + j) * ch;
154 for(
size_t c = 0;
c < ch;
c++)
156 pixel_out[
c] = Dy_prev * (Q_SW[
c] * Dx_next + Q_SE[
c] * Dx_prev) +
157 Dy_next * (Q_NW[
c] * Dx_next + Q_NE[
c] * Dx_prev);
166 const float *
const restrict mask,
167 float *
const restrict ab,
169 const int radius,
const float feathering)
177 const size_t Ndimch = Ndim * 4;
183 if(input == NULL)
return 1;
187#pragma omp parallel for default(none) \
188 dt_omp_firstprivate(guide, mask, Ndim, radius, input) \
189 schedule(simd:static)
191 for(
size_t k = 0; k < Ndim; k++)
193 const size_t index = k * 4;
194 input[index] = guide[k];
195 input[index + 1] = mask[k];
196 input[index + 2] = guide[k] * guide[k];
197 input[index + 3] = guide[k] * mask[k];
209#pragma omp parallel for default(none) \
210 dt_omp_firstprivate(ab, input, width, height, feathering) \
215 const float d = fmaxf((input[4*idx+2] - input[4*idx+0] * input[4*idx+0]) + feathering, 1e-15f);
216 const float a = (input[4*idx+3] - input[4*idx+0] * input[4*idx+1]) /
d;
217 const float b = input[4*idx+1] -
a * input[4*idx+0];
229 const float *
const restrict ab,
230 const size_t num_elem)
233#pragma omp parallel for simd default(none) \
234dt_omp_firstprivate(image, ab, num_elem) \
235schedule(simd:static) aligned(image, ab:64)
237 for(
size_t k = 0; k < num_elem; k++)
240 image[k] = fmaxf(image[k] * ab[k * 2] + ab[k * 2 + 1],
MIN_FLOAT);
247 const float *
const restrict ab,
248 const size_t num_elem)
251#pragma omp parallel for simd default(none) \
252dt_omp_firstprivate(image, ab, num_elem) \
253schedule(simd:static) aligned(image, ab:64)
255 for(
size_t k = 0; k < num_elem; k++)
258 image[k] = sqrtf(image[k] * fmaxf(image[k] * ab[k * 2] + ab[k * 2 + 1],
MIN_FLOAT));
264static inline void quantize(
const float *
const restrict image,
265 float *
const restrict
out,
266 const size_t num_elem,
267 const float sampling,
const float clip_min,
const float clip_max)
276 else if(sampling == 1.0f)
280#pragma omp parallel for simd default(none) \
281dt_omp_firstprivate(image, out, num_elem, sampling, clip_min, clip_max) \
282schedule(simd:static) aligned(image, out:64)
284 for(
size_t k = 0; k < num_elem; k++)
285 out[k] =
fast_clamp(exp2f(floorf(log2f(image[k]))), clip_min, clip_max);
292#pragma omp parallel for simd default(none) \
293dt_omp_firstprivate(image, out, num_elem, sampling, clip_min, clip_max) \
294schedule(simd:static) aligned(image, out:64)
296 for(
size_t k = 0; k < num_elem; k++)
297 out[k] =
fast_clamp(exp2f(floorf(log2f(image[k]) / sampling) * sampling), clip_min, clip_max);
305 const int radius,
float feathering,
const int iterations,
307 const float quantization,
const float quantize_min,
const float quantize_max)
314 const int ds_radius = (radius < 4) ? 1 : radius /
scaling;
319 const size_t num_elem_ds = ds_width * ds_height;
327 if(!ds_image || !ds_mask || !ds_ab || !ab)
329 dt_control_log(_(
"fast guided filter failed to allocate memory, check your RAM settings"));
341 for(
int i = 0;
i < iterations; ++
i)
344 quantize(ds_image, ds_mask, ds_width * ds_height, quantization, quantize_min, quantize_max);
348 if(
variance_analyse(ds_mask, ds_image, ds_ab, ds_width, ds_height, ds_radius, feathering) != 0)
358 if(
dt_box_mean(ds_ab, ds_height, ds_width, 2, ds_radius, 1) != 0)
367 if(
i != iterations - 1)
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
int dt_box_mean(float *const buf, const size_t height, const size_t width, const int ch, const int radius, const unsigned iterations)
Definition box_filters.c:1235
static const float scaling
Definition chromatic_adaptation.h:299
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const float d
Definition colorspaces_inline_conversions.h:931
const float b
Definition colorspaces_inline_conversions.h:1326
const float a
Definition colorspaces_inline_conversions.h:1292
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
const float top
Definition colorspaces_inline_conversions.h:672
void dt_control_log(const char *msg,...)
Definition control.c:530
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:371
static size_t dt_round_size_sse(const size_t size)
Definition darktable.h:327
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
#define __DT_CLONE_TARGETS__
Definition darktable.h:291
static const dt_aligned_pixel_simd_t value
Definition darktable.h:501
static __DT_CLONE_TARGETS__ int variance_analyse(const float *const restrict guide, const float *const restrict mask, float *const restrict ab, const size_t width, const size_t height, const int radius, const float feathering)
Definition fast_guided_filter.h:165
static __DT_CLONE_TARGETS__ int fast_surface_blur(float *const restrict image, const size_t width, const size_t height, const int radius, float feathering, const int iterations, const dt_iop_guided_filter_blending_t filter, const float scale, const float quantization, const float quantize_min, const float quantize_max)
Definition fast_guided_filter.h:303
dt_iop_guided_filter_blending_t
Definition fast_guided_filter.h:51
@ DT_GF_BLENDING_LINEAR
Definition fast_guided_filter.h:52
@ DT_GF_BLENDING_GEOMEAN
Definition fast_guided_filter.h:53
static __DT_CLONE_TARGETS__ void quantize(const float *const restrict image, float *const restrict out, const size_t num_elem, const float sampling, const float clip_min, const float clip_max)
Definition fast_guided_filter.h:264
#define MIN_FLOAT
Definition fast_guided_filter.h:47
static __DT_CLONE_TARGETS__ void apply_linear_blending_w_geomean(float *const restrict image, const float *const restrict ab, const size_t num_elem)
Definition fast_guided_filter.h:246
static __DT_CLONE_TARGETS__ void apply_linear_blending(float *const restrict image, const float *const restrict ab, const size_t num_elem)
Definition fast_guided_filter.h:228
static float fast_clamp(const float value, const float bottom, const float top)
Definition fast_guided_filter.h:95
static __DT_CLONE_TARGETS__ void interpolate_bilinear(const float *const restrict in, const size_t width_in, const size_t height_in, float *const restrict out, const size_t width_out, const size_t height_out, const size_t ch)
Definition fast_guided_filter.h:103
__DT_CLONE_TARGETS__ void dt_iop_image_copy(float *const __restrict__ out, const float *const __restrict__ in, const size_t nfloats)
Definition imagebuf.c:138