41 const int return_layer,
const int merge_from_scale,
void *user_data,
42 const float preview_scale,
const int use_sse)
52 p->return_layer = return_layer;
53 p->merge_from_scale = merge_from_scale;
54 p->user_data = user_data;
55 p->preview_scale = preview_scale;
75 float size_tmp = ((
size >>= 1) * preview_scale);
78 size_tmp = ((
size >>= 1) * preview_scale);
84 while((maxscale > 0) && ((1 << maxscale) * preview_scale >=
size)) maxscale--;
91 return _get_max_scale(
p->width /
p->preview_scale,
p->height /
p->preview_scale,
p->preview_scale);
99 for(
unsigned int lev = 0; lev < num_scales; lev++)
105 first_scale = lev + 1;
120 if(
p->image != layer) memcpy(
p->image, layer,
sizeof(
float) *
p->width *
p->height *
p->ch);
126 const size_t height,
const size_t width,
const size_t lev)
128 const size_t vscale =
MIN(1 << lev,
height-1);
130 for(
int rowid = 0; rowid <
height ; rowid++)
138 const size_t rowstart = (size_t)4 *
row *
width;
139 const size_t above_row = (
row > vscale) ?
row - vscale : vscale -
row;
141 const float*
const restrict center = in + rowstart;
142 const float*
const restrict above = in + 4 * above_row *
width;
143 const float*
const restrict below = in + 4 * below_row *
width;
144 float*
const restrict temprow =
out + rowstart;
145 for (
size_t col = 0; col < 4*
width; col += 4)
149 temprow[col + c] = 2.f * center[col+c] + above[col+c] + below[col+c];
159 const size_t height,
const size_t width,
const size_t lev)
161 const int hscale =
MIN(1 << lev,
width);
170 const size_t rowindex = (size_t)4 * (
row *
width);
172 float*
const restrict details = in + rowindex;
173 float*
const restrict coarse =
out + rowindex;
175 for (
int col = 0; col <
width - hscale; col++)
177 const size_t leftpos = (size_t)4*abs(col-hscale);
178 const size_t rightpos = (size_t)4*(col+hscale);
181 const float left = coarse[leftpos+c];
182 const float right = coarse[rightpos+c];
184 const float hat = (2.f * coarse[4*col+c] + left + right) / 16.f;
186 temprow[4*col+c] = hat;
187 details[4*col+c] -= hat;
191 for (
int col =
width - hscale; col <
width; col++)
193 const size_t leftpos = (size_t)4 * abs(col-hscale);
194 const size_t rightpos = (size_t)4 * (2*
width - 2 - (col+hscale));
197 const float left = coarse[leftpos+c];
198 const float right = coarse[rightpos+c];
200 const float hat = (2.f * coarse[4*col+c] + left + right) / 16.f;
202 temprow[4*col+c] = hat;
203 details[4*col+c] -= hat;
208 memcpy(coarse, temprow,
sizeof(
float) * 4 *
width);
213static inline __attribute__((always_inline))
void dwt_decompose_layer(
float *
const restrict
out,
float *
const restrict in,
float *
const temp,
const int lev,
226 float *layers = NULL;
227 float *merged_layers = NULL;
228 float *buffer[2] = { 0, 0 };
231 const size_t size = (size_t)
p->width *
p->height *
p->ch;
235 if(layer_func && layer_func(img,
p, 0) != 0)
return 1;
250 printf(
"not enough memory for wavelet decomposition");
256 if(
p->merge_from_scale > 0)
261 printf(
"not enough memory for wavelet decomposition");
269 unsigned int hpass = 0;
270 for(
unsigned int lev = 0; lev <
p->scales && bcontinue; lev++)
272 unsigned int lpass = (1 - (lev & 1));
274 dwt_decompose_layer(buffer[lpass], buffer[hpass], temp, lev,
p);
277 if(
p->merge_from_scale == 0 ||
p->merge_from_scale > lev + 1)
280 if(layer_func && layer_func(buffer[hpass],
p, lev + 1) != 0)
287 if(
p->return_layer == lev + 1)
290 dwt_get_image_layer(buffer[hpass],
p);
295 else if(
p->return_layer == 0)
308 if(layer_func && layer_func(merged_layers,
p, lev + 1) != 0)
315 if(
p->return_layer == lev + 1)
318 dwt_get_image_layer(merged_layers,
p);
331 if(layer_func && layer_func(buffer[hpass],
p,
p->scales + 1) != 0)
338 if(
p->return_layer ==
p->scales + 1)
341 dwt_get_image_layer(buffer[hpass],
p);
344 else if(
p->return_layer == 0)
347 if(
p->merge_from_scale > 0)
357 if(layer_func && layer_func(layers,
p,
p->scales + 2) != 0)
364 dwt_get_image_layer(layers,
p);
380 if(
p->preview_scale <= 0.f)
p->preview_scale = 1.f;
383 if(
p->return_layer >
p->scales + 1)
385 p->return_layer =
p->scales + 1;
391 if(
p->scales > max_scale)
394 if(
p->return_layer >
p->scales)
p->return_layer = max_scale + 1;
396 else if(
p->return_layer > max_scale)
397 p->return_layer = max_scale;
399 p->scales = max_scale;
409 const size_t height,
const size_t width,
const size_t lev)
411 const int vscale =
MIN(1 << lev,
height);
413 for(
int rowid = 0; rowid <
height ; rowid++)
421 const size_t rowstart = (size_t)
row *
width;
423 const float *
const restrict center = in + rowstart;
424 const float *
const restrict above = in + abs(
row - vscale) *
width;
425 const float *
const restrict below = in + below_row *
width;
426 float*
const restrict outrow =
out + rowstart;
428 for (
int col= 0; col <
width; col++)
430 outrow[col] = 2.f * center[col] + above[col] + below[col];
439 float *
const restrict accum,
const size_t height,
const size_t width,
440 const size_t lev,
const float thold,
const int last)
442 const int hscale =
MIN(1 << lev,
width);
451 const size_t rowindex = (size_t)
row *
width;
452 float *
const restrict details = in + rowindex;
453 float *
const restrict coarse =
out + rowindex;
454 float *
const restrict accum_row = accum + rowindex;
457 for (
int col = 0; col < hscale; col++)
460 const float hat = (2.f * coarse[col] + coarse[hscale-col] + coarse[col+hscale]) / 16.f;
463 const float diff = details[col] - hat;
468 accum_row[col] +=
MAX(diff - thold,0.0f) +
MIN(diff + thold, 0.0f);
471 for (
int col = hscale; col <
width - hscale; col++)
474 const float hat = (2.f * coarse[col] + coarse[col-hscale] + coarse[col+hscale]) / 16.f;
477 const float diff = details[col] - hat;
482 accum_row[col] +=
MAX(diff - thold,0.0f) +
MIN(diff + thold, 0.0f);
486 for (
int col =
width - hscale; col <
width; col++)
488 const float right = coarse[2*
width - 2 - (col+hscale)];
490 const float hat = (2.f * coarse[col] + coarse[col-hscale] + right) / 16.f;
493 const float diff = details[col] - hat;
495 accum_row[col] +=
MAX(diff - thold,0.0f) +
MIN(diff + thold, 0.0f);
500 for (
int col = 0; col <
width; col++)
502 details[col] += accum_row[col];
522 for(
int lev = 0; lev < bands; lev++)
524 const int last = (lev+1) == bands;
542 const int program = 20;
566 const int return_layer,
const int merge_from_scale,
void *user_data,
567 const float preview_scale)
579 p->return_layer = return_layer;
580 p->merge_from_scale = merge_from_scale;
581 p->user_data = user_data;
582 p->preview_scale = preview_scale;
595 return _get_max_scale(
p->width /
p->preview_scale,
p->height /
p->preview_scale,
p->preview_scale);
605 cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
607 const int devid =
p->devid;
608 const int kernel =
p->global->kernel_dwt_subtract_layer;
612 const float lpass_mult = (1.f / 16.f);
613 const int width =
p->width;
628 cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
630 const int devid =
p->devid;
631 const int kernel =
p->global->kernel_dwt_add_img_to_layer;
635 const int width =
p->width;
649 cl_int err = CL_SUCCESS;
651 if(
p->image != layer)
653 (
size_t)
p->width *
p->height *
p->ch *
sizeof(
float));
660 cl_int err = CL_SUCCESS;
662 const int devid =
p->devid;
665 cl_mem layers = NULL;
666 cl_mem merged_layers = NULL;
667 unsigned int lpass, hpass;
668 cl_mem buffer[2] = { 0, 0 };
673 err = layer_func(img,
p, 0);
674 if(err != CL_SUCCESS)
goto cleanup;
683 if(buffer[1] == NULL)
685 printf(
"not enough memory for wavelet decomposition");
686 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
694 printf(
"not enough memory for wavelet decomposition");
695 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
700 const int kernel =
p->global->kernel_dwt_init_buffer;
703 const int width =
p->width;
710 if(err != CL_SUCCESS)
goto cleanup;
713 if(
p->merge_from_scale > 0)
718 printf(
"not enough memory for wavelet decomposition");
719 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
724 const int kernel =
p->global->kernel_dwt_init_buffer;
727 const int width =
p->width;
734 if(err != CL_SUCCESS)
goto cleanup;
741 for(
unsigned int lev = 0; lev <
p->scales && bcontinue; lev++)
743 lpass = (1 - (lev & 1));
750 printf(
"not enough memory for wavelet decomposition");
751 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
757 const int kernel =
p->global->kernel_dwt_hat_transform_row;
760 sc = (int)(sc *
p->preview_scale);
761 if(sc >
p->width) sc =
p->width;
771 if(err != CL_SUCCESS)
goto cleanup;
776 const int kernel =
p->global->kernel_dwt_hat_transform_col;
779 sc = (int)(sc *
p->preview_scale);
780 if(sc >
p->height) sc =
p->height;
781 const float lpass_mult = (1.f / 16.f);
792 if(err != CL_SUCCESS)
goto cleanup;
802 if(err != CL_SUCCESS)
goto cleanup;
805 if(
p->merge_from_scale == 0 ||
p->merge_from_scale > lev + 1)
810 err = layer_func(buffer[hpass],
p, lev + 1);
811 if(err != CL_SUCCESS)
goto cleanup;
815 if(
p->return_layer == lev + 1)
819 if(err != CL_SUCCESS)
goto cleanup;
824 else if(
p->return_layer == 0)
828 if(err != CL_SUCCESS)
goto cleanup;
836 if(err != CL_SUCCESS)
goto cleanup;
841 err = layer_func(merged_layers,
p, lev + 1);
842 if(err != CL_SUCCESS)
goto cleanup;
846 if(
p->return_layer == lev + 1)
850 if(err != CL_SUCCESS)
goto cleanup;
865 err = layer_func(buffer[hpass],
p,
p->scales + 1);
866 if(err != CL_SUCCESS)
goto cleanup;
870 if(
p->return_layer ==
p->scales + 1)
874 if(err != CL_SUCCESS)
goto cleanup;
877 else if(
p->return_layer == 0)
880 if(
p->merge_from_scale > 0)
884 if(err != CL_SUCCESS)
goto cleanup;
889 if(err != CL_SUCCESS)
goto cleanup;
894 err = layer_func(layers,
p,
p->scales + 2);
895 if(err != CL_SUCCESS)
goto cleanup;
900 if(err != CL_SUCCESS)
goto cleanup;
915 cl_int err = CL_SUCCESS;
918 if(
p->preview_scale <= 0.f)
p->preview_scale = 1.f;
921 if(
p->return_layer >
p->scales + 1)
923 p->return_layer =
p->scales + 1;
929 if(
p->scales > max_scale)
932 if(
p->return_layer >
p->scales)
p->return_layer = max_scale + 1;
934 else if(
p->return_layer > max_scale)
935 p->return_layer = max_scale;
937 p->scales = max_scale;
void cleanup(dt_imageio_module_format_t *self)
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
const dt_colormatrix_t dt_aligned_pixel_t out
#define __OMP_SIMD__(...)
#define for_each_channel(_var,...)
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
static int dt_get_thread_num()
#define dt_pixelpipe_cache_free_align(mem)
#define __DT_CLONE_TARGETS__
#define __OMP_PARALLEL_FOR__(...)
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
static __DT_CLONE_TARGETS__ void dwt_decompose_horiz(float *const restrict out, float *const restrict in, float *const temp, const size_t height, const size_t width, const size_t lev)
static __DT_CLONE_TARGETS__ int dwt_wavelet_decompose(float *img, dwt_params_t *const p, _dwt_layer_func layer_func)
__DT_CLONE_TARGETS__ int dwt_denoise(float *const img, const int width, const int height, const int bands, const float *const noise)
dt_dwt_cl_global_t * dt_dwt_init_cl_global()
int dt_dwt_first_scale_visible_cl(dwt_params_cl_t *p)
static cl_int dwt_wavelet_decompose_cl(cl_mem img, dwt_params_cl_t *const p, _dwt_layer_func_cl layer_func)
int dwt_decompose(dwt_params_t *p, _dwt_layer_func layer_func)
int dwt_get_max_scale(dwt_params_t *p)
void dt_dwt_free(dwt_params_t *p)
dwt_params_cl_t * dt_dwt_init_cl(const int devid, cl_mem image, const int width, const int height, const int scales, const int return_layer, const int merge_from_scale, void *user_data, const float preview_scale)
static __DT_CLONE_TARGETS__ void dwt_denoise_horiz_1ch(float *const restrict out, float *const restrict in, float *const restrict accum, const size_t height, const size_t width, const size_t lev, const float thold, const int last)
static __DT_CLONE_TARGETS__ void dwt_decompose_vert(float *const restrict out, const float *const restrict in, const size_t height, const size_t width, const size_t lev)
static __DT_CLONE_TARGETS__ void dwt_denoise_vert_1ch(float *const restrict out, const float *const restrict in, const size_t height, const size_t width, const size_t lev)
void dt_dwt_free_cl_global(dt_dwt_cl_global_t *g)
void dt_dwt_free_cl(dwt_params_cl_t *p)
dwt_params_t * dt_dwt_init(float *image, const int width, const int height, const int ch, const int scales, const int return_layer, const int merge_from_scale, void *user_data, const float preview_scale, const int use_sse)
static __DT_CLONE_TARGETS__ int _get_max_scale(const int width, const int height, const float preview_scale)
cl_int dwt_decompose_cl(dwt_params_cl_t *p, _dwt_layer_func_cl layer_func)
int dwt_get_max_scale_cl(dwt_params_cl_t *p)
static __DT_CLONE_TARGETS__ int _first_scale_visible(const int num_scales, const float preview_scale)
static cl_int dwt_get_image_layer_cl(cl_mem layer, dwt_params_cl_t *const p)
int dt_dwt_first_scale_visible(dwt_params_t *p)
static cl_int dwt_add_layer_cl(cl_mem img, cl_mem layers, dwt_params_cl_t *const p, const int n_scale)
static cl_int dwt_subtract_layer_cl(cl_mem bl, cl_mem bh, dwt_params_cl_t *const p)
int() _dwt_layer_func(float *layer, dwt_params_t *const p, const int scale)
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
cl_int() _dwt_layer_func_cl(cl_mem layer, dwt_params_cl_t *const p, const int scale)
__DT_CLONE_TARGETS__ void dt_iop_image_add_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_fill(float *const buf, const float fill_value, const size_t width, const size_t height, const size_t ch)
static float kernel(const float *x, const float *y)
float *const restrict const size_t const size_t ch
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
int dt_opencl_create_kernel(const int prog, const char *name)
void dt_opencl_free_kernel(const int kernel)
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer, size_t srcoffset, size_t dstoffset, size_t size)
void dt_opencl_release_mem_object(cl_mem mem)
int32_t num_openmp_threads
struct dt_opencl_t * opencl
int kernel_dwt_add_img_to_layer
struct dt_dwt_cl_global_t * dwt
dt_dwt_cl_global_t * global