60static void _heal_sub(
const float *
const top_buffer,
const float *
const bottom_buffer,
61 float *
const restrict red_buffer,
float *
const restrict black_buffer,
65 const size_t res_stride = 4 * ((
width + 1) / 2);
69 const int parity =
row & 1;
70 const size_t row_start = (
row+1) * res_stride;
71 float *
const buf1 = parity ? red_buffer + row_start : black_buffer + row_start;
72 float *
const buf2 = parity ? black_buffer + row_start : red_buffer + row_start;
74 for(
size_t col = 0; col <
width/2; col++)
76 const size_t idx = 4 * (
row *
width + 2*col);
79 buf1[4*col + c] = top_buffer[idx + c] - bottom_buffer[idx + c];
80 buf2[4*col + c] = top_buffer[idx+4 + c] - bottom_buffer[idx+4 + c];
87 const size_t res_idx = (
width-1)/2;
91 buf1[4*res_idx + c] = top_buffer[idx + c] - bottom_buffer[idx + c];
92 buf2[4*res_idx + c] = 0.0f;
97 memset(red_buffer, 0, res_stride *
sizeof(
float));
98 memset(red_buffer + (
height+1)*res_stride, 0, res_stride *
sizeof(
float));
99 memset(black_buffer, 0, res_stride *
sizeof(
float));
100 memset(black_buffer + (
height+1)*res_stride, 0, res_stride *
sizeof(
float));
104static void _heal_add(
const float *
const restrict red_buffer,
const float *
const black_buffer,
105 const float *
const restrict second_buffer,
float *
const restrict result_buffer,
110 const size_t res_stride = 4 * ((
width + 1) / 2);
114 const int parity =
row & 1;
115 const size_t row_start = (
row+1) * res_stride;
116 const float *
const restrict buf1 = parity ? red_buffer + row_start : black_buffer + row_start;
117 const float *
const restrict buf2 = parity ? black_buffer + row_start : red_buffer + row_start;
119 for(
size_t col = 0; col <
width/2; col++)
121 const size_t idx = 4 * (
row *
width + 2*col);
124 result_buffer[idx + c] = buf1[4*col + c] + second_buffer[idx + c];
125 result_buffer[idx + 4 + c] = buf2[4*col + c] + second_buffer[idx + 4 + c];
131 const size_t res_idx = (
width-1)/2;
134 result_buffer[idx + c] = buf1[4*res_idx + c] + second_buffer[idx + c];
148#pragma omp declare reduction(vsum:_aligned_pixel:omp_out=_add_float4(omp_out,omp_in)) \
149 initializer(omp_priv = { { 0.0f, 0.0f, 0.0f, 0.0f } })
154 const float *
const restrict neighbor_pixels,
155 const size_t height,
const size_t width,
const unsigned *
const restrict runs,
156 const size_t num_runs,
const size_t start_parity,
const float w)
177#if !(defined(__apple_build_version__) && __apple_build_version__ < 11030000)
180 for(
size_t i = 0;
i < num_runs;
i++)
182 const size_t idx = runs[2*
i];
183 const unsigned count = runs[2*
i+1];
184 const size_t index = (size_t)4 * idx;
187 if(
row == 1) a -= 1.0f;
189 const size_t vert_offset = 4 *
width;
190 const size_t lroffset = 4 * (start_parity ^ (
row & 1));
193 const size_t col = idx %
width;
197 if(col > 0 || lroffset)
201 if(col + 1 <
width || lroffset == 0)
209 diff[c] = w * ((aa * active_pixels[index+c])
210 - (neighbor_pixels[index - vert_offset + c] + neighbor_pixels[index + vert_offset + c]
211 + left[c] + right[c]));
212 active_pixels[index + c] -= diff[c];
213 err.
v[c] += (diff[c] * diff[c]);
218 copy_pixel(left, neighbor_pixels + index - 4 + lroffset);
219 for(
size_t j = 0; j < count; j++)
221 const size_t pixidx = index + 4*j;
226 right[c] = neighbor_pixels[pixidx + lroffset + c];
227 diff[c] = w * (a * active_pixels[pixidx+c]
228 - (neighbor_pixels[pixidx - vert_offset + c] + neighbor_pixels[pixidx + vert_offset + c]
229 + left[c] + right[c]));
230 active_pixels[pixidx + c] -= diff[c];
231 err.
v[c] += (diff[c] * diff[c]);
236 return err.
v[0] + err.
v[1] + err.
v[2];
242 size_t start,
const size_t width,
243 unsigned *
const restrict runs,
size_t count,
size_t *nmask)
249 if(start == 0 && mask[start])
251 runs[2*count] = start_index;
257 gboolean in_run =
FALSE;
258 unsigned run_start = 0;
260 for(col = start; col <
width; col += 2)
273 runs[2*count] = start_index + run_start / 2;
274 runs[2*count + 1] = (col - run_start) / 2;
281 runs[2*count] = start_index + run_start / 2;
282 const unsigned runlen = (col - run_start) / 2;
283 runs[2*count + 1] = runlen;
284 if(runlen > 1 && col >
width)
288 runs[2*count + 2] = runs[2*count] + runs[2*count+1];
289 runs[2*count + 3] = 1;
300 const size_t subwidth,
unsigned *
const restrict runs,
size_t *count,
size_t *nmask)
304 const int parity = start ^ (
row & 1);
305 const size_t index = (
row + 1) * subwidth;
306 const size_t mask_index =
row *
width;
314 const float *
const restrict mask,
const int max_iter)
323 const size_t subwidth = (
width+1)/2;
325 sizeof(
unsigned) * subwidth * (
height + 2),
328 sizeof(
unsigned) * subwidth * (
height + 2),
332 fprintf(stderr,
"_heal_laplace_loop: error allocating memory for healing\n");
337 size_t num_black = 0;
338 size_t nmask_red = 0;
339 size_t nmask_black = 0;
342#pragma omp parallel sections
351 const size_t nmask = nmask_red + nmask_black;
357 const float w = ((2.0f - 1.0f / (0.1575f * sqrtf(nmask) + 0.8f)) * .25f);
359 const float epsilon = (0.1 / 255);
360 const float err_exit = epsilon * epsilon * w * w;
363 for(
int iter = 0; iter < max_iter; iter++)
369 if(err < err_exit)
break;
383void dt_heal(
const float *
const src_buffer,
float *dest_buffer,
const float *
const mask_buffer,
const int width,
384 const int height,
const int ch,
const int max_iter)
388 fprintf(stderr,
"dt_heal: full-color image required\n");
391 const size_t subwidth = 4 * ((
width+1)/2);
396 fprintf(stderr,
"dt_heal: error allocating memory for healing\n");
448 const int width,
const int height,
const int max_iter)
450 cl_int err = CL_SUCCESS;
454 float *src_buffer = NULL;
455 float *dest_buffer = NULL;
460 fprintf(stderr,
"dt_heal_cl: error allocating memory for healing\n");
468 fprintf(stderr,
"dt_heal_cl: error allocating memory for healing\n");
475 if(err != CL_SUCCESS)
482 if(err != CL_SUCCESS)
491 if(err != CL_SUCCESS)
void cleanup(dt_imageio_module_format_t *self)
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
static void copy_pixel(float *const __restrict__ out, const float *const __restrict__ in)
#define for_each_channel(_var,...)
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
#define dt_pixelpipe_cache_alloc_align_cache(size, id)
#define dt_pixelpipe_cache_free_align(mem)
#define __OMP_PARALLEL_FOR__(...)
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
void dt_heal_free_cl_global(dt_heal_cl_global_t *g)
static size_t _collect_color_runs(const float *const restrict mask, const size_t start_index, size_t start, const size_t width, unsigned *const restrict runs, size_t count, size_t *nmask)
void dt_heal_free_cl(heal_params_cl_t *p)
void dt_heal(const float *const src_buffer, float *dest_buffer, const float *const mask_buffer, const int width, const int height, const int ch, const int max_iter)
static void collect_runs(const int start, const float *const restrict mask, const size_t width, const size_t height, const size_t subwidth, unsigned *const restrict runs, size_t *count, size_t *nmask)
dt_heal_cl_global_t * dt_heal_init_cl_global()
cl_int dt_heal_cl(heal_params_cl_t *p, cl_mem dev_src, cl_mem dev_dest, const float *const mask_buffer, const int width, const int height, const int max_iter)
heal_params_cl_t * dt_heal_init_cl(const int devid)
static void _heal_add(const float *const restrict red_buffer, const float *const black_buffer, const float *const restrict second_buffer, float *const restrict result_buffer, const size_t width, const size_t height)
static void _heal_laplace_loop(float *const restrict red_pixels, float *const restrict black_pixels, const size_t width, const size_t height, const float *const restrict mask, const int max_iter)
static void _heal_sub(const float *const top_buffer, const float *const bottom_buffer, float *const restrict red_buffer, float *const restrict black_buffer, const size_t width, const size_t height)
static float _heal_laplace_iteration(float *const restrict active_pixels, const float *const restrict neighbor_pixels, const size_t height, const size_t width, const unsigned *const restrict runs, const size_t num_runs, const size_t start_parity, const float w)
float *const restrict const size_t const size_t ch
float dt_aligned_pixel_t[4]
int dt_opencl_write_buffer_to_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
int dt_opencl_read_buffer_from_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
#define DT_OPENCL_SYSMEM_ALLOCATION
struct dt_opencl_t * opencl
struct dt_heal_cl_global_t * heal
dt_heal_cl_global_t * global