42#define DT_COMMON_BILATERAL_MAX_RES_S 3000
43#define DT_COMMON_BILATERAL_MAX_RES_R 50
63 b->sigma_r = L_range / _z;
65 b->size_x = (int)ceilf(
width / b->sigma_s) + 1;
66 b->size_y = (int)ceilf(
height / b->sigma_s) + 1;
67 b->size_z = (int)ceilf(L_range / b->sigma_r) + 1;
69 if (b->sigma_s !=
sigma_s) fprintf(stderr,
"[bilateral] clamped sigma_s (%g -> %g)!\n",
sigma_s,b->sigma_s);
70 if (b->sigma_r !=
sigma_r) fprintf(stderr,
"[bilateral] clamped sigma_r (%g -> %g)!\n",
sigma_r,b->sigma_r);
81 size_t grid_size = b.size_x * b.size_y * b.size_z;
84 return 2 * grid_size *
sizeof(float);
109 size_t grid_size = b.size_x * b.size_y * b.size_z;
126 float *xf,
float *yf,
float *zf)
128 float x =
CLAMPS(
i / b->sigma_s, 0, b->size_x - 1);
129 float y =
CLAMPS(j / b->sigma_s, 0, b->size_y - 1);
130 float z =
CLAMPS(
L / b->sigma_r, 0, b->size_z - 1);
131 const int xi =
MIN((
int)
x, b->size_x - 2);
132 const int yi =
MIN((
int)y, b->size_y - 2);
133 const int zi =
MIN((
int)z, b->size_z - 2);
137 return ((xi + yi * b->size_x) * b->size_z) + zi;
140static inline __attribute__((always_inline))
size_t image_to_relgrid(
const dt_bilateral_t *
const b,
const int i,
const float L,
float *xf,
float *zf)
142 float x =
CLAMPS(
i /
b->sigma_s, 0,
b->size_x - 1);
143 float z =
CLAMPS(
L /
b->sigma_r, 0,
b->size_z - 1);
144 const int xi =
MIN((
int)
x,
b->size_x - 2);
145 const int zi =
MIN((
int)z,
b->size_z - 2);
148 return (xi *
b->size_z) + zi;
162 b->sliceheight = (
height + b->numslices - 1) / b->numslices;
163 b->slicerows = (b->size_y + b->numslices - 1) / b->numslices + 2;
165 if(b->buf) memset(b->buf, 0,
sizeof(
float) * b->size_x * b->size_z * b->numslices * b->slicerows);
168 fprintf(stderr,
"[bilateral] unable to allocate buffer for %" G_GSIZE_FORMAT
"x%" G_GSIZE_FORMAT
"x%" G_GSIZE_FORMAT
" grid\n",b->size_x,b->size_y,b->size_z);
172 dt_print(
DT_DEBUG_DEV,
"[bilateral] created grid [%" G_GSIZE_FORMAT
" %" G_GSIZE_FORMAT
" %" G_GSIZE_FORMAT
"] with sigma (%f %f) (%f %f)\n",
173 b->size_x, b->size_y, b->size_z, b->sigma_s,
sigma_s, b->sigma_r,
sigma_r);
179 const int ox = b->size_z;
180 const int oy = b->size_x * b->size_z;
182 const float sigma_s = b->sigma_s * b->sigma_s;
183 float *
const buf = b->buf;
188 const size_t offsets[8] =
200 for(
int slice = 0; slice < b->numslices; slice++)
202 const int firstrow = slice * b->sliceheight;
203 const int lastrow =
MIN((slice+1)*b->sliceheight,b->height);
206 const int slice_offset = slice * b->slicerows - (int)(firstrow / b->sigma_s);
208 for(
int j = firstrow; j < lastrow; j++)
210 float y =
CLAMPS(j / b->sigma_s, 0, b->size_y - 1);
211 const int yi =
MIN((
int)y, b->size_y - 2);
212 const float yf = y - yi;
213 const size_t base = (size_t)(yi + slice_offset) * oy;
214 for(
int i = 0;
i < b->width;
i++)
216 size_t index = 4 * (j * b->width +
i);
218 const float L = in[index];
220 const size_t grid_index = base + image_to_relgrid(b,
i,
L, &xf, &zf);
224 (1.0f - xf) * (1.0f - yf) * 100.0f /
sigma_s,
225 xf * (1.0f - yf) * 100.0f /
sigma_s,
226 (1.0f - xf) * yf * 100.0f /
sigma_s,
230 for(
int k = 0;
k < 4;
k++)
232 buf[grid_index + offsets[
k]] += (contrib[
k] * (1.0f - zf));
233 buf[grid_index + offsets[
k+4]] += (contrib[
k] * zf);
240 for (
int slice = 1 ; slice < nthreads; slice++)
243 const int destrow = (int)(slice * b->sliceheight / b->sigma_s);
244 float *dest = buf + destrow * oy;
246 for(
int j = slice * b->slicerows; j < (slice+1)*b->slicerows; j++)
248 float *src = buf + j * oy;
249 for(
int i = 0;
i < oy;
i++)
257 memset(buf + j*oy,
'\0',
sizeof(
float) * oy);
262static void blur_line_z(
float *buf,
const int offset1,
const int offset2,
const int offset3,
const int size1,
263 const int size2,
const int size3)
265 const float w1 = 4.f / 16.f;
266 const float w2 = 2.f / 16.f;
268 for(
int k = 0;
k < size1;
k++)
270 size_t index = (size_t)
k * offset1;
271 for(
int j = 0; j < size2; j++)
273 float tmp1 = buf[index];
274 buf[index] =
w1 * buf[index + offset3] +
w2 * buf[index + 2 * offset3];
276 float tmp2 = buf[index];
277 buf[index] =
w1 * (buf[index + offset3] - tmp1) +
w2 * buf[index + 2 * offset3];
279 for(
int i = 2;
i < size3 - 2;
i++)
281 const float tmp3 = buf[index];
282 buf[index] = +
w1 * (buf[index + offset3] - tmp2) +
w2 * (buf[index + 2 * offset3] - tmp1);
287 const float tmp3 = buf[index];
288 buf[index] =
w1 * (buf[index + offset3] - tmp2) -
w2 * tmp1;
290 buf[index] = -
w1 * tmp3 -
w2 * tmp2;
292 index += offset2 - offset3 * size3;
297static void blur_line(
float *buf,
const int offset1,
const int offset2,
const int offset3,
const int size1,
298 const int size2,
const int size3)
300 const float w0 = 6.f / 16.f;
301 const float w1 = 4.f / 16.f;
302 const float w2 = 1.f / 16.f;
304 for(
int k = 0;
k < size1;
k++)
306 size_t index = (size_t)
k * offset1;
307 for(
int j = 0; j < size2; j++)
309 float tmp1 = buf[index];
310 buf[index] = buf[index] * w0 +
w1 * buf[index + offset3] +
w2 * buf[index + 2 * offset3];
312 float tmp2 = buf[index];
313 buf[index] = buf[index] * w0 +
w1 * (buf[index + offset3] + tmp1) +
w2 * buf[index + 2 * offset3];
315 for(
int i = 2;
i < size3 - 2;
i++)
317 const float tmp3 = buf[index];
319 = buf[index] * w0 +
w1 * (buf[index + offset3] + tmp2) +
w2 * (buf[index + 2 * offset3] + tmp1);
324 const float tmp3 = buf[index];
325 buf[index] = buf[index] * w0 +
w1 * (buf[index + offset3] + tmp2) +
w2 * tmp1;
327 buf[index] = buf[index] * w0 +
w1 * tmp3 +
w2 * tmp2;
329 index += offset2 - offset3 * size3;
339 const int ox = b->size_z;
340 const int oy = b->size_x * b->size_z;
343 blur_line(b->buf, oz, oy, ox, b->size_z, b->size_y, b->size_x);
345 blur_line(b->buf, oz, ox, oy, b->size_z, b->size_x, b->size_y);
347 blur_line_z(b->buf, ox, oy, oz, b->size_x, b->size_y, b->size_z);
353 const float norm = -detail * b->sigma_r * 0.04f;
354 const int ox = b->size_z;
355 const int oy = b->size_x * b->size_z;
357 float *
const buf = b->buf;
358 const int width = b->width;
359 const int height = b->height;
363 for(
int j = 0; j <
height; j++)
367 size_t index = 4 * (j *
width +
i);
369 const float L = in[index];
372 const float Lout = fmaxf( 0.0f,
L
373 + norm * (buf[gi] * (1.0f - xf) * (1.0f - yf) * (1.0f - zf)
374 + buf[gi + ox] * (xf) * (1.0f - yf) * (1.0f - zf)
375 + buf[gi + oy] * (1.0f - xf) * (yf) * (1.0f - zf)
376 + buf[gi + ox + oy] * (xf) * (yf) * (1.0f - zf)
377 + buf[gi + oz] * (1.0f - xf) * (1.0f - yf) * (zf)
378 + buf[gi + ox + oz] * (xf) * (1.0f - yf) * (zf)
379 + buf[gi + oy + oz] * (1.0f - xf) * (yf) * (zf)
380 + buf[gi + ox + oy + oz] * (xf) * (yf) * (zf)));
383 out[index + 1] = in[index + 1];
384 out[index + 2] = in[index + 2];
385 out[index + 3] = in[index + 3];
394 const float norm = -detail * b->sigma_r * 0.04f;
395 const int ox = b->size_z;
396 const int oy = b->size_x * b->size_z;
398 float *
const buf = b->buf;
399 const int width = b->width;
400 const int height = b->height;
404 for(
int j = 0; j <
height; j++)
408 size_t index = 4 * (j *
width +
i);
410 const float L = in[index];
413 const float Lout = norm * (buf[gi] * (1.0f - xf) * (1.0f - yf) * (1.0f - zf)
414 + buf[gi + ox] * (xf) * (1.0f - yf) * (1.0f - zf)
415 + buf[gi + oy] * (1.0f - xf) * (yf) * (1.0f - zf)
416 + buf[gi + ox + oy] * (xf) * (yf) * (1.0f - zf)
417 + buf[gi + oz] * (1.0f - xf) * (1.0f - yf) * (zf)
418 + buf[gi + ox + oz] * (xf) * (1.0f - yf) * (zf)
419 + buf[gi + oy + oz] * (1.0f - xf) * (yf) * (zf)
420 + buf[gi + ox + oy + oz] * (xf) * (yf) * (zf));
421 out[index] =
MAX(0.0f,
out[index] + Lout);
433#undef DT_COMMON_BILATERAL_MAX_RES_S
434#undef DT_COMMON_BILATERAL_MAX_RES_R
void dt_bilateral_free(dt_bilateral_t *b)
static __DT_CLONE_TARGETS__ void blur_line(float *buf, const int offset1, const int offset2, const int offset3, const int size1, const int size2, const int size3)
static __DT_CLONE_TARGETS__ void blur_line_z(float *buf, const int offset1, const int offset2, const int offset3, const int size1, const int size2, const int size3)
__DT_CLONE_TARGETS__ void dt_bilateral_splat(const dt_bilateral_t *b, const float *const in)
void dt_bilateral_grid_size(dt_bilateral_t *b, const int width, const int height, const float L_range, float sigma_s, const float sigma_r)
size_t dt_bilateral_memory_use(const int width, const int height, const float sigma_s, const float sigma_r)
#define DT_COMMON_BILATERAL_MAX_RES_S
dt_bilateral_t * dt_bilateral_init(const int width, const int height, const float sigma_s, const float sigma_r)
#define DT_COMMON_BILATERAL_MAX_RES_R
__DT_CLONE_TARGETS__ void dt_bilateral_slice_to_output(const dt_bilateral_t *const b, const float *const in, float *out, const float detail)
size_t dt_bilateral_singlebuffer_size(const int width, const int height, const float sigma_s, const float sigma_r)
__DT_CLONE_TARGETS__ void dt_bilateral_slice(const dt_bilateral_t *const b, const float *const in, float *out, const float detail)
void dt_bilateral_blur(const dt_bilateral_t *b)
size_t dt_bilateral_memory_use2(const int width, const int height, const float sigma_s, const float sigma_r)
size_t dt_bilateral_singlebuffer_size2(const int width, const int height, const float sigma_s, const float sigma_r)
static void image_to_grid(const dt_iop_colorreconstruct_bilateral_t *const b, const float i, const float j, const float L, float *x, float *y, float *z)
const dt_colormatrix_t dt_aligned_pixel_t out
void dt_print(dt_debug_thread_t thread, const char *msg,...)
#define __OMP_SIMD__(...)
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
#define dt_pixelpipe_cache_free_align(mem)
#define __DT_CLONE_TARGETS__
#define __OMP_PARALLEL_FOR__(...)
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
float *const restrict const size_t k
float dt_aligned_pixel_t[4]
int32_t num_openmp_threads