Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
guided_filter.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2017-2020 Heiko Bauke.
4 Copyright (C) 2019, 2021 luzpaz.
5 Copyright (C) 2020 Hubert Kowalski.
6 Copyright (C) 2020-2021 Pascal Obry.
7 Copyright (C) 2020-2021 Ralf Brown.
8 Copyright (C) 2022 Hanno Schwalm.
9 Copyright (C) 2022 Martin Bařinka.
10 Copyright (C) 2024 Alban Gruin.
11 Copyright (C) 2024 Alynx Zhou.
12 Copyright (C) 2025-2026 Aurélien PIERRE.
13
14 darktable is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 darktable is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with darktable. If not, see <http://www.gnu.org/licenses/>.
26
27
28 Implementation of the guided image filter as described in
29
30 "Guided Image Filtering" by Kaiming He, Jian Sun, and Xiaoou Tang in
31 K. Daniilidis, P. Maragos, N. Paragios (Eds.): ECCV 2010, Part I,
32 LNCS 6311, pp. 1-14, 2010. Springer-Verlag Berlin Heidelberg 2010
33
34 "Guided Image Filtering" by Kaiming He, Jian Sun, and Xiaoou Tang in
35 IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 35,
36 no. 6, June 2013, 1397-1409
37
38*/
39
40#include "common/darktable.h"
41#include "common/box_filters.h"
43#include "common/math.h"
44#include "common/opencl.h"
45#include <assert.h>
46#include <float.h>
47#include <stdlib.h>
48#include <string.h>
49
50// processing is split into tiles of this size (or three times the filter
51// width, if greater) to keep memory use under control.
52#define GF_TILE_SIZE 512
53
54// some shorthand to make code more legible
55// if we have OpenMP simd enabled, declare a vectorizable for loop;
56// otherwise, just leave it a plain for()
57#if defined(_OPENMP) && defined(OPENMP_SIMD_)
58#define SIMD_FOR \
59 _Pragma("omp simd") \
60 for
61#else
62#define SIMD_FOR for
63#endif
64
65// avoid cluttering the scalar codepath with #ifdefs by hiding the dependency on SSE2
66#if !(defined(__x86_64__) || defined(__i386__))
67# define _mm_prefetch(where,hint)
68#endif
69
70// the filter does internal tiling to keep memory requirements reasonable, so this structure
71// defines the position of the tile being processed
72typedef struct tile
73{
76
77typedef struct color_image
78{
79 float *data;
82
83// allocate space for n-component image of size width x height
84static inline __attribute__((always_inline)) int new_color_image(color_image *img, int width, int height, int ch)
85{
87 if(IS_NULL_PTR(img->data)) return 1;
88 img->width = width;
89 img->height = height;
90 img->stride = ch;
91 return 0;
92}
93
94// free space for n-component image
95static inline __attribute__((always_inline)) void free_color_image(color_image *img_p)
96{
98 img_p->data = NULL;
99}
100
101// get a pointer to pixel number 'i' within the image
102static inline float *get_color_pixel(color_image img, size_t i)
103{
104 return img.data + i * img.stride;
105}
106
107
108// apply guided filter to single-component image img using the 3-components image imgg as a guide
109// the filtering applies a monochrome box filter to a total of 13 image channels:
110// 1 monochrome input image
111// 3 color guide image
112// 3 covariance (R, G, B)
113// 6 variance (R-R, R-G, R-B, G-G, G-B, B-B)
114// for computational efficiency, we'll pack them into a four-channel image and a 9-channel image
115// image instead of running 13 separate box filters: guide+input, R/G/B/R-R/R-G/R-B/G-G/G-B/B-B.
117static int guided_filter_tiling(color_image imgg, gray_image img, gray_image img_out, tile target, const int w,
118 const float eps, const float guide_weight, const float min, const float max)
119{
120 const tile source = { max_i(target.left - 2 * w, 0), min_i(target.right + 2 * w, imgg.width),
121 max_i(target.lower - 2 * w, 0), min_i(target.upper + 2 * w, imgg.height) };
122 const int width = source.right - source.left;
123 const int height = source.upper - source.lower;
124 size_t size = (size_t)width * (size_t)height;
125// since we're packing multiple monochrome planes into a color image, define symbolic constants so that
126// we can keep track of which values we're actually using
127#define INP_MEAN 0
128#define GUIDE_MEAN_R 1
129#define GUIDE_MEAN_G 2
130#define GUIDE_MEAN_B 3
131#define COV_R 0
132#define COV_G 1
133#define COV_B 2
134#define VAR_RR 3
135#define VAR_RG 4
136#define VAR_RB 5
137#define VAR_GG 6
138#define VAR_BB 8
139#define VAR_GB 7
140 color_image mean = { 0 };
141 color_image variance = { 0 };
142 if(new_color_image(&mean, width, height, 4) != 0)
143 return 1;
144
145 if(new_color_image(&variance, width, height, 9) != 0)
146 {
147 free_color_image(&mean);
148 return 1;
149 }
150 const size_t img_dimen = mean.width;
151 size_t img_bak_sz;
152 float *img_bak = dt_pixelpipe_cache_alloc_perthread_float(9*img_dimen, &img_bak_sz);
153 if(IS_NULL_PTR(img_bak))
154 {
155 free_color_image(&variance);
156 free_color_image(&mean);
157 return 1;
158 }
159 int err = 0;
161 for(int j_imgg = source.lower; j_imgg < source.upper; j_imgg++)
162 {
163 int j = j_imgg - source.lower;
164 float *const restrict meanpx = mean.data + 4 * j * mean.width;
165 float *const restrict varpx = variance.data + 9 * j * variance.width;
166 for(int i_imgg = source.left; i_imgg < source.right; i_imgg++)
167 {
168 size_t i = i_imgg - source.left;
169 const float *pixel_ = get_color_pixel(imgg, i_imgg + (size_t)j_imgg * imgg.width);
170 dt_aligned_pixel_t pixel =
171 { pixel_[0] * guide_weight, pixel_[1] * guide_weight, pixel_[2] * guide_weight, pixel_[3] * guide_weight };
172 const float input = img.data[i_imgg + (size_t)j_imgg * img.width];
173 meanpx[4*i+INP_MEAN] = input;
174 meanpx[4*i+GUIDE_MEAN_R] = pixel[0];
175 meanpx[4*i+GUIDE_MEAN_G] = pixel[1];
176 meanpx[4*i+GUIDE_MEAN_B] = pixel[2];
177 varpx[9*i+COV_R] = pixel[0] * input;
178 varpx[9*i+COV_G] = pixel[1] * input;
179 varpx[9*i+COV_B] = pixel[2] * input;
180 varpx[9*i+VAR_RR] = pixel[0] * pixel[0];
181 varpx[9*i+VAR_RG] = pixel[0] * pixel[1];
182 varpx[9*i+VAR_RB] = pixel[0] * pixel[2];
183 varpx[9*i+VAR_GG] = pixel[1] * pixel[1];
184 varpx[9*i+VAR_GB] = pixel[1] * pixel[2];
185 varpx[9*i+VAR_BB] = pixel[2] * pixel[2];
186 }
187 // apply horizontal pass of box mean filter while the cache is still hot
188 float *const restrict scratch = dt_get_perthread(img_bak, img_bak_sz);
189 if(IS_NULL_PTR(scratch)
190 || dt_box_mean_horizontal(meanpx, mean.width, 4|BOXFILTER_KAHAN_SUM, w, scratch) != 0
191 || dt_box_mean_horizontal(varpx, variance.width, 9|BOXFILTER_KAHAN_SUM, w, scratch) != 0)
192 {
193#ifdef _OPENMP
194#pragma omp atomic write
195#endif
196 err = 1;
197 }
198 }
200 if(!err && dt_box_mean_vertical(mean.data, mean.height, mean.width, 4|BOXFILTER_KAHAN_SUM, w) != 0)
201 err = 1;
202 if(!err && dt_box_mean_vertical(variance.data, variance.height, variance.width, 9|BOXFILTER_KAHAN_SUM, w) != 0)
203 err = 1;
204
205 if(err)
206 {
207 free_color_image(&variance);
208 free_color_image(&mean);
209 return 1;
210 }
211 // we will recycle memory of 'mean' for the new coefficient arrays a_? and b to reduce memory foot print
212 color_image a_b = mean;
213 #define A_RED 0
214 #define A_GREEN 1
215 #define A_BLUE 2
216 #define B 3
218 for(size_t i = 0; i < size; i++)
219 {
220 const float *meanpx = get_color_pixel(mean, i);
221 const float inp_mean = meanpx[INP_MEAN];
222 const float guide_r = meanpx[GUIDE_MEAN_R];
223 const float guide_g = meanpx[GUIDE_MEAN_G];
224 const float guide_b = meanpx[GUIDE_MEAN_B];
225 float *const varpx = get_color_pixel(variance, i);
226 // solve linear system of equations of size 3x3 via Cramer's rule
227 // symmetric coefficient matrix
228 const float Sigma_0_0 = varpx[VAR_RR] - (guide_r * guide_r) + eps;
229 const float Sigma_0_1 = varpx[VAR_RG] - (guide_r * guide_g);
230 const float Sigma_0_2 = varpx[VAR_RB] - (guide_r * guide_b);
231 const float Sigma_1_1 = varpx[VAR_GG] - (guide_g * guide_g) + eps;;
232 const float Sigma_1_2 = varpx[VAR_GB] - (guide_g * guide_b);
233 const float Sigma_2_2 = varpx[VAR_BB] - (guide_b * guide_b) + eps;
234 const float det0 = Sigma_0_0 * (Sigma_1_1 * Sigma_2_2 - Sigma_1_2 * Sigma_1_2)
235 - Sigma_0_1 * (Sigma_0_1 * Sigma_2_2 - Sigma_0_2 * Sigma_1_2)
236 + Sigma_0_2 * (Sigma_0_1 * Sigma_1_2 - Sigma_0_2 * Sigma_1_1);
237 float a_r_, a_g_, a_b_, b_;
238 if(fabsf(det0) > 4.f * FLT_EPSILON)
239 {
240 const float cov_r = varpx[COV_R] - guide_r * inp_mean;
241 const float cov_g = varpx[COV_G] - guide_g * inp_mean;
242 const float cov_b = varpx[COV_B] - guide_b * inp_mean;
243 const float det1 = cov_r * (Sigma_1_1 * Sigma_2_2 - Sigma_1_2 * Sigma_1_2)
244 - Sigma_0_1 * (cov_g * Sigma_2_2 - cov_b * Sigma_1_2)
245 + Sigma_0_2 * (cov_g * Sigma_1_2 - cov_b * Sigma_1_1);
246 const float det2 = Sigma_0_0 * (cov_g * Sigma_2_2 - cov_b * Sigma_1_2)
247 - cov_r * (Sigma_0_1 * Sigma_2_2 - Sigma_0_2 * Sigma_1_2)
248 + Sigma_0_2 * (Sigma_0_1 * cov_b - Sigma_0_2 * cov_g);
249 const float det3 = Sigma_0_0 * (Sigma_1_1 * cov_b - Sigma_1_2 * cov_g)
250 - Sigma_0_1 * (Sigma_0_1 * cov_b - Sigma_0_2 * cov_g)
251 + cov_r * (Sigma_0_1 * Sigma_1_2 - Sigma_0_2 * Sigma_1_1);
252 a_r_ = det1 / det0;
253 a_g_ = det2 / det0;
254 a_b_ = det3 / det0;
255 b_ = inp_mean - a_r_ * guide_r - a_g_ * guide_g - a_b_ * guide_b;
256 }
257 else
258 {
259 // linear system is singular
260 a_r_ = 0.f;
261 a_g_ = 0.f;
262 a_b_ = 0.f;
263 b_ = get_color_pixel(mean, i)[INP_MEAN];
264 }
265 // now data of imgg_mean_? is no longer needed, we can safely overwrite aliasing arrays
266 a_b.data[4*i+A_RED] = a_r_;
267 a_b.data[4*i+A_GREEN] = a_g_;
268 a_b.data[4*i+A_BLUE] = a_b_;
269 a_b.data[4*i+B] = b_;
270 }
271 free_color_image(&variance);
272
273 if(dt_box_mean(a_b.data, a_b.height, a_b.width, a_b.stride|BOXFILTER_KAHAN_SUM, w, 1))
274 {
275 free_color_image(&mean);
276 return 1;
277 }
279 for(int j_imgg = target.lower; j_imgg < target.upper; j_imgg++)
280 {
281 // index of the left most target pixel in the current row
282 size_t l = target.left + (size_t)j_imgg * imgg.width;
283 // index of the left most source pixel in the current row of the
284 // smaller auxiliary gray-scale images a_r, a_g, a_b, and b
285 // excluding boundary data from neighboring tiles
286 size_t k = (target.left - source.left) + (size_t)(j_imgg - source.lower) * width;
287 for(int i_imgg = target.left; i_imgg < target.right; i_imgg++, k++, l++)
288 {
289 const float *pixel = get_color_pixel(imgg, l);
290 const float *px_ab = get_color_pixel(a_b, k);
291 float res = guide_weight * (px_ab[A_RED] * pixel[0] + px_ab[A_GREEN] * pixel[1] + px_ab[A_BLUE] * pixel[2]);
292 res += px_ab[B];
293 img_out.data[i_imgg + (size_t)j_imgg * imgg.width] = CLAMP(res, min, max);
294 }
295 }
296 free_color_image(&mean);
297 return 0;
298}
299
300static inline __attribute__((always_inline)) int compute_tile_height(const int height, const int w)
301{
302 int tile_h = max_i(3 * w, GF_TILE_SIZE);
303#if 0 // enabling the below doesn't make any measureable speed difference, but does cause a handful of pixels
304 // to round off differently (as does changing GF_TILE_SIZE)
305 if ((height % tile_h) > 0 && (height % tile_h) < GF_TILE_SIZE/3)
306 {
307 // if there's just a sliver left over for the last row of tiles, see whether slicing off a few pixels
308 // gives us a mostly-full tile
309 if (height % (tile_h - 8) >= GF_TILE_SIZE/3)
310 tile_h -= 8;
311 else if (height % (tile_h - w/4) >= GF_TILE_SIZE/3)
312 tile_h -= (w/4);
313 else if (height % (tile_h - w/2) >= GF_TILE_SIZE/3)
314 tile_h -= (w/2);
315 // try adding a few pixels
316 else if (height % (tile_h + 8) >= GF_TILE_SIZE/3)
317 tile_h += 8;
318 else if (height % (tile_h + 16) >= GF_TILE_SIZE/3)
319 tile_h += 16;
320 }
321#endif
322 return tile_h;
323}
324
325static inline __attribute__((always_inline)) int compute_tile_width(const int width, const int w)
326{
327 int tile_w = max_i(3 * w, GF_TILE_SIZE);
328#if 0 // enabling the below doesn't make any measureable speed difference, but does cause a handful of pixels
329 // to round off differently (as does changing GF_TILE_SIZE)
330 if ((width % tile_w) > 0 && (width % tile_w) < GF_TILE_SIZE/2)
331 {
332 // if there's just a sliver left over for the last column of tiles, see whether slicing off a few pixels
333 // gives us a mostly-full tile
334 if (width % (tile_w - 8) >= GF_TILE_SIZE/3)
335 tile_w -= 8;
336 else if (width % (tile_w - w/4) >= GF_TILE_SIZE/3)
337 tile_w -= (w/4);
338 else if (width % (tile_w - w/2) >= GF_TILE_SIZE/3)
339 tile_w -= (w/2);
340 // try adding a few pixels
341 else if (width % (tile_w + 8) >= GF_TILE_SIZE/3)
342 tile_w += 8;
343 else if (width % (tile_w + 16) >= GF_TILE_SIZE/3)
344 tile_w += 16;
345 }
346#endif
347 return tile_w;
348}
349
351int guided_filter(const float *const guide, const float *const in, float *const out, const int width,
352 const int height, const int ch,
353 const int w, // window size
354 const float sqrt_eps, // regularization parameter
355 const float guide_weight, // to balance the amplitudes in the guiding image and the input image
356 const float min, const float max)
357{
358 assert(ch >= 3);
359 assert(w >= 1);
360
361 color_image img_guide = (color_image){ (float *)guide, width, height, ch };
362 gray_image img_in = (gray_image){ (float *)in, width, height };
363 gray_image img_out = (gray_image){ out, width, height };
364 const int tile_width = compute_tile_width(width,w);
365 const int tile_height = compute_tile_height(height,w);
366 const float eps = sqrt_eps * sqrt_eps; // this is the regularization parameter of the original papers
367
368 for(int j = 0; j < height; j += tile_height)
369 {
370 for(int i = 0; i < width; i += tile_width)
371 {
372 tile target = { i, min_i(i + tile_width, width), j, min_i(j + tile_height, height) };
373 if(guided_filter_tiling(img_guide, img_in, img_out, target, w, eps, guide_weight, min, max) != 0)
374 return 1;
375 }
376 }
377 return 0;
378}
379
380#ifdef HAVE_OPENCL
381
383{
384 dt_guided_filter_cl_global_t *g = malloc(sizeof(*g));
385 const int program = 26; // guided_filter.cl, from programs.conf
386 g->kernel_guided_filter_split_rgb = dt_opencl_create_kernel(program, "guided_filter_split_rgb_image");
387 g->kernel_guided_filter_box_mean_x = dt_opencl_create_kernel(program, "guided_filter_box_mean_x");
388 g->kernel_guided_filter_box_mean_y = dt_opencl_create_kernel(program, "guided_filter_box_mean_y");
389 g->kernel_guided_filter_guided_filter_covariances
390 = dt_opencl_create_kernel(program, "guided_filter_covariances");
391 g->kernel_guided_filter_guided_filter_variances = dt_opencl_create_kernel(program, "guided_filter_variances");
392 g->kernel_guided_filter_update_covariance = dt_opencl_create_kernel(program, "guided_filter_update_covariance");
393 g->kernel_guided_filter_solve = dt_opencl_create_kernel(program, "guided_filter_solve");
394 g->kernel_guided_filter_generate_result = dt_opencl_create_kernel(program, "guided_filter_generate_result");
395 return g;
396}
397
398
400{
401 if(IS_NULL_PTR(g)) return;
402 // destroy kernels
403 dt_opencl_free_kernel(g->kernel_guided_filter_split_rgb);
404 dt_opencl_free_kernel(g->kernel_guided_filter_box_mean_x);
405 dt_opencl_free_kernel(g->kernel_guided_filter_box_mean_y);
406 dt_opencl_free_kernel(g->kernel_guided_filter_guided_filter_covariances);
407 dt_opencl_free_kernel(g->kernel_guided_filter_guided_filter_variances);
408 dt_opencl_free_kernel(g->kernel_guided_filter_update_covariance);
409 dt_opencl_free_kernel(g->kernel_guided_filter_solve);
410 dt_opencl_free_kernel(g->kernel_guided_filter_generate_result);
411 dt_free(g);
412}
413
414
415static int cl_split_rgb(const int devid, const int width, const int height, cl_mem guide, cl_mem imgg_r,
416 cl_mem imgg_g, cl_mem imgg_b, const float guide_weight)
417{
419 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
420 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
421 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &guide);
422 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &imgg_r);
423 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &imgg_g);
424 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &imgg_b);
425 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(float), &guide_weight);
426 const size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
427 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
428}
429
430
431static int cl_box_mean(const int devid, const int width, const int height, const int w, cl_mem in, cl_mem out,
432 cl_mem temp)
433{
435 dt_opencl_set_kernel_arg(devid, kernel_x, 0, sizeof(int), &width);
436 dt_opencl_set_kernel_arg(devid, kernel_x, 1, sizeof(int), &height);
437 dt_opencl_set_kernel_arg(devid, kernel_x, 2, sizeof(cl_mem), &in);
438 dt_opencl_set_kernel_arg(devid, kernel_x, 3, sizeof(cl_mem), &temp);
439 dt_opencl_set_kernel_arg(devid, kernel_x, 4, sizeof(int), &w);
440 const size_t sizes_x[] = { 1, ROUNDUPDHT(height, devid), 1 };
441 const int err = dt_opencl_enqueue_kernel_2d(devid, kernel_x, sizes_x);
442 if(err != CL_SUCCESS) return err;
443
445 dt_opencl_set_kernel_arg(devid, kernel_y, 0, sizeof(int), &width);
446 dt_opencl_set_kernel_arg(devid, kernel_y, 1, sizeof(int), &height);
447 dt_opencl_set_kernel_arg(devid, kernel_y, 2, sizeof(cl_mem), &temp);
448 dt_opencl_set_kernel_arg(devid, kernel_y, 3, sizeof(cl_mem), &out);
449 dt_opencl_set_kernel_arg(devid, kernel_y, 4, sizeof(int), &w);
450 const size_t sizes_y[] = { ROUNDUPDWD(width, devid), 1, 1 };
451 return dt_opencl_enqueue_kernel_2d(devid, kernel_y, sizes_y);
452}
453
454
455static int cl_covariances(const int devid, const int width, const int height, cl_mem guide, cl_mem in,
456 cl_mem cov_imgg_img_r, cl_mem cov_imgg_img_g, cl_mem cov_imgg_img_b,
457 const float guide_weight)
458{
460 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
461 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
462 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &guide);
463 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &in);
464 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &cov_imgg_img_r);
465 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &cov_imgg_img_g);
466 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), &cov_imgg_img_b);
467 dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(float), &guide_weight);
468 const size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
469 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
470}
471
472
473static int cl_variances(const int devid, const int width, const int height, cl_mem guide, cl_mem var_imgg_rr,
474 cl_mem var_imgg_rg, cl_mem var_imgg_rb, cl_mem var_imgg_gg, cl_mem var_imgg_gb,
475 cl_mem var_imgg_bb, const float guide_weight)
476{
478 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
479 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
480 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &guide);
481 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &var_imgg_rr);
482 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &var_imgg_rg);
483 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &var_imgg_rb);
484 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), &var_imgg_gg);
485 dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(cl_mem), &var_imgg_gb);
486 dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(cl_mem), &var_imgg_bb);
487 dt_opencl_set_kernel_arg(devid, kernel, 9, sizeof(float), &guide_weight);
488 size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
489 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
490}
491
492
493static int cl_update_covariance(const int devid, const int width, const int height, cl_mem in, cl_mem out,
494 cl_mem a, cl_mem b, float eps)
495{
497 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
498 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
499 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &in);
500 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &out);
501 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &a);
502 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &b);
503 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(float), &eps);
504 const size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
505 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
506}
507
508
509static int cl_solve(const int devid, const int width, const int height, cl_mem img_mean, cl_mem imgg_mean_r,
510 cl_mem imgg_mean_g, cl_mem imgg_mean_b, cl_mem cov_imgg_img_r, cl_mem cov_imgg_img_g,
511 cl_mem cov_imgg_img_b, cl_mem var_imgg_rr, cl_mem var_imgg_rg, cl_mem var_imgg_rb,
512 cl_mem var_imgg_gg, cl_mem var_imgg_gb, cl_mem var_imgg_bb, cl_mem a_r, cl_mem a_g, cl_mem a_b,
513 cl_mem b)
514{
516 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
517 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
518 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &img_mean);
519 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &imgg_mean_r);
520 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &imgg_mean_g);
521 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &imgg_mean_b);
522 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), &cov_imgg_img_r);
523 dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(cl_mem), &cov_imgg_img_g);
524 dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(cl_mem), &cov_imgg_img_b);
525 dt_opencl_set_kernel_arg(devid, kernel, 9, sizeof(cl_mem), &var_imgg_rr);
526 dt_opencl_set_kernel_arg(devid, kernel, 10, sizeof(cl_mem), &var_imgg_rg);
527 dt_opencl_set_kernel_arg(devid, kernel, 11, sizeof(cl_mem), &var_imgg_rb);
528 dt_opencl_set_kernel_arg(devid, kernel, 12, sizeof(cl_mem), &var_imgg_gg);
529 dt_opencl_set_kernel_arg(devid, kernel, 13, sizeof(cl_mem), &var_imgg_gb);
530 dt_opencl_set_kernel_arg(devid, kernel, 14, sizeof(cl_mem), &var_imgg_bb);
531 dt_opencl_set_kernel_arg(devid, kernel, 15, sizeof(cl_mem), &a_r);
532 dt_opencl_set_kernel_arg(devid, kernel, 16, sizeof(cl_mem), &a_g);
533 dt_opencl_set_kernel_arg(devid, kernel, 17, sizeof(cl_mem), &a_b);
534 dt_opencl_set_kernel_arg(devid, kernel, 18, sizeof(cl_mem), &b);
535 const size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
536 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
537}
538
539
540static int cl_generate_result(const int devid, const int width, const int height, cl_mem guide, cl_mem a_r,
541 cl_mem a_g, cl_mem a_b, cl_mem b, cl_mem out, const float guide_weight,
542 const float min, const float max)
543{
545 dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(int), &width);
546 dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(int), &height);
547 dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(cl_mem), &guide);
548 dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(cl_mem), &a_r);
549 dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), &a_g);
550 dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(cl_mem), &a_b);
551 dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), &b);
552 dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(cl_mem), &out);
553 dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(float), &guide_weight);
554 dt_opencl_set_kernel_arg(devid, kernel, 9, sizeof(float), &min);
555 dt_opencl_set_kernel_arg(devid, kernel, 10, sizeof(float), &max);
556 const size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
557 return dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
558}
559
560
561static int guided_filter_cl_impl(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width, const int height,
562 const int ch,
563 const int w, // window size
564 const float sqrt_eps, // regularization parameter
565 const float guide_weight, // to balance the amplitudes in the guiding image and
566 // the input// image
567 const float min, const float max)
568{
569 const float eps = sqrt_eps * sqrt_eps; // this is the regularization parameter of the original papers
570
571 void *temp1 = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
572 void *temp2 = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
573 void *imgg_mean_r = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
574 void *imgg_mean_g = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
575 void *imgg_mean_b = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
576 void *img_mean = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
577 void *cov_imgg_img_r = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
578 void *cov_imgg_img_g = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
579 void *cov_imgg_img_b = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
580 void *var_imgg_rr = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
581 void *var_imgg_gg = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
582 void *var_imgg_bb = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
583 void *var_imgg_rg = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
584 void *var_imgg_rb = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
585 void *var_imgg_gb = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
586 void *a_r = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
587 void *a_g = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
588 void *a_b = dt_opencl_alloc_device(devid, width, height, (int)sizeof(float));
589 void *b = temp2;
590
591 int err = CL_SUCCESS;
592 if(IS_NULL_PTR(temp1) || IS_NULL_PTR(temp2) || //
593 IS_NULL_PTR(imgg_mean_r) || IS_NULL_PTR(imgg_mean_g) || IS_NULL_PTR(imgg_mean_b) || IS_NULL_PTR(img_mean) || //
594 IS_NULL_PTR(cov_imgg_img_r) || IS_NULL_PTR(cov_imgg_img_g) || IS_NULL_PTR(cov_imgg_img_b) || //
595 IS_NULL_PTR(var_imgg_rr) || IS_NULL_PTR(var_imgg_gg) || IS_NULL_PTR(var_imgg_bb) || //
596 IS_NULL_PTR(var_imgg_rg) || IS_NULL_PTR(var_imgg_rb) || IS_NULL_PTR(var_imgg_gb) || //
597 IS_NULL_PTR(a_r) || IS_NULL_PTR(a_g) || IS_NULL_PTR(a_b))
598 {
599 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
600 goto error;
601 }
602
603 err = cl_split_rgb(devid, width, height, guide, imgg_mean_r, imgg_mean_g, imgg_mean_b, guide_weight);
604 if(err != CL_SUCCESS) goto error;
605
606 err = cl_box_mean(devid, width, height, w, in, img_mean, temp1);
607 if(err != CL_SUCCESS) goto error;
608 err = cl_box_mean(devid, width, height, w, imgg_mean_r, imgg_mean_r, temp1);
609 if(err != CL_SUCCESS) goto error;
610 err = cl_box_mean(devid, width, height, w, imgg_mean_g, imgg_mean_g, temp1);
611 if(err != CL_SUCCESS) goto error;
612 err = cl_box_mean(devid, width, height, w, imgg_mean_b, imgg_mean_b, temp1);
613 if(err != CL_SUCCESS) goto error;
614
615 err = cl_covariances(devid, width, height, guide, in, cov_imgg_img_r, cov_imgg_img_g, cov_imgg_img_b,
616 guide_weight);
617 if(err != CL_SUCCESS) goto error;
618
619 err = cl_variances(devid, width, height, guide, var_imgg_rr, var_imgg_rg, var_imgg_rb, var_imgg_gg, var_imgg_gb,
620 var_imgg_bb, guide_weight);
621 if(err != CL_SUCCESS) goto error;
622
623 err = cl_box_mean(devid, width, height, w, cov_imgg_img_r, temp2, temp1);
624 if(err != CL_SUCCESS) goto error;
625 err = cl_update_covariance(devid, width, height, temp2, cov_imgg_img_r, imgg_mean_r, img_mean, 0.f);
626 if(err != CL_SUCCESS) goto error;
627 err = cl_box_mean(devid, width, height, w, cov_imgg_img_g, temp2, temp1);
628 if(err != CL_SUCCESS) goto error;
629 err = cl_update_covariance(devid, width, height, temp2, cov_imgg_img_g, imgg_mean_g, img_mean, 0.f);
630 if(err != CL_SUCCESS) goto error;
631 err = cl_box_mean(devid, width, height, w, cov_imgg_img_b, temp2, temp1);
632 if(err != CL_SUCCESS) goto error;
633 err = cl_update_covariance(devid, width, height, temp2, cov_imgg_img_b, imgg_mean_b, img_mean, 0.f);
634 if(err != CL_SUCCESS) goto error;
635 err = cl_box_mean(devid, width, height, w, var_imgg_rr, temp2, temp1);
636 if(err != CL_SUCCESS) goto error;
637 err = cl_update_covariance(devid, width, height, temp2, var_imgg_rr, imgg_mean_r, imgg_mean_r, eps);
638 if(err != CL_SUCCESS) goto error;
639 err = cl_box_mean(devid, width, height, w, var_imgg_rg, temp2, temp1);
640 if(err != CL_SUCCESS) goto error;
641 err = cl_update_covariance(devid, width, height, temp2, var_imgg_rg, imgg_mean_r, imgg_mean_g, 0.f);
642 if(err != CL_SUCCESS) goto error;
643 err = cl_box_mean(devid, width, height, w, var_imgg_rb, temp2, temp1);
644 if(err != CL_SUCCESS) goto error;
645 err = cl_update_covariance(devid, width, height, temp2, var_imgg_rb, imgg_mean_r, imgg_mean_b, 0.f);
646 if(err != CL_SUCCESS) goto error;
647 err = cl_box_mean(devid, width, height, w, var_imgg_gg, temp2, temp1);
648 if(err != CL_SUCCESS) goto error;
649 err = cl_update_covariance(devid, width, height, temp2, var_imgg_gg, imgg_mean_g, imgg_mean_g, eps);
650 if(err != CL_SUCCESS) goto error;
651 err = cl_box_mean(devid, width, height, w, var_imgg_gb, temp2, temp1);
652 if(err != CL_SUCCESS) goto error;
653 err = cl_update_covariance(devid, width, height, temp2, var_imgg_gb, imgg_mean_g, imgg_mean_b, 0.f);
654 if(err != CL_SUCCESS) goto error;
655 err = cl_box_mean(devid, width, height, w, var_imgg_bb, temp2, temp1);
656 if(err != CL_SUCCESS) goto error;
657 err = cl_update_covariance(devid, width, height, temp2, var_imgg_bb, imgg_mean_b, imgg_mean_b, eps);
658 if(err != CL_SUCCESS) goto error;
659
660 err = cl_solve(devid, width, height, img_mean, imgg_mean_r, imgg_mean_g, imgg_mean_b, cov_imgg_img_r,
661 cov_imgg_img_g, cov_imgg_img_b, var_imgg_rr, var_imgg_rg, var_imgg_rb, var_imgg_gg, var_imgg_gb,
662 var_imgg_bb, a_r, a_g, a_b, b);
663 if(err != CL_SUCCESS) goto error;
664
665 err = cl_box_mean(devid, width, height, w, a_r, a_r, temp1);
666 if(err != CL_SUCCESS) goto error;
667 err = cl_box_mean(devid, width, height, w, a_g, a_g, temp1);
668 if(err != CL_SUCCESS) goto error;
669 err = cl_box_mean(devid, width, height, w, a_b, a_b, temp1);
670 if(err != CL_SUCCESS) goto error;
671 err = cl_box_mean(devid, width, height, w, b, b, temp1);
672 if(err != CL_SUCCESS) goto error;
673
674 err = cl_generate_result(devid, width, height, guide, a_r, a_g, a_b, b, out, guide_weight, min, max);
675
676error:
680 dt_opencl_release_mem_object(var_imgg_rr);
681 dt_opencl_release_mem_object(var_imgg_rg);
682 dt_opencl_release_mem_object(var_imgg_rb);
683 dt_opencl_release_mem_object(var_imgg_gg);
684 dt_opencl_release_mem_object(var_imgg_gb);
685 dt_opencl_release_mem_object(var_imgg_bb);
686 dt_opencl_release_mem_object(cov_imgg_img_r);
687 dt_opencl_release_mem_object(cov_imgg_img_g);
688 dt_opencl_release_mem_object(cov_imgg_img_b);
690 dt_opencl_release_mem_object(imgg_mean_r);
691 dt_opencl_release_mem_object(imgg_mean_g);
692 dt_opencl_release_mem_object(imgg_mean_b);
695
696 return err;
697}
698
699
700static int guided_filter_cl_fallback(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width,
701 const int height, const int ch,
702 const int w, // window size
703 const float sqrt_eps, // regularization parameter
704 const float guide_weight, // to balance the amplitudes in the guiding image
705 // and the input// image
706 const float min, const float max)
707{
708 // fall-back implementation: copy data from device memory to host memory and perform filter
709 // by CPU until there is a proper OpenCL implementation
711 width * height * ch,
712 0);
714 width * height,
715 0);
717 width * height,
718 0);
719 if(!guide_host || IS_NULL_PTR(in_host) || IS_NULL_PTR(out_host))
720 {
724 return 1;
725 }
726 int err;
727 err = dt_opencl_read_host_from_device(devid, guide_host, guide, width, height, ch * sizeof(float));
728 if(err != CL_SUCCESS) goto error;
729 err = dt_opencl_read_host_from_device(devid, in_host, in, width, height, sizeof(float));
730 if(err != CL_SUCCESS) goto error;
731 if(guided_filter(guide_host, in_host, out_host, width, height, ch, w, sqrt_eps, guide_weight, min, max) != 0)
732 {
733 err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
734 goto error;
735 }
736 err = dt_opencl_write_host_to_device(devid, out_host, out, width, height, sizeof(float));
737 if(err != CL_SUCCESS) goto error;
738error:
742 return err == CL_SUCCESS ? 0 : 1;
743}
744
745
746int guided_filter_cl(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width, const int height,
747 const int ch,
748 const int w, // window size
749 const float sqrt_eps, // regularization parameter
750 const float guide_weight, // to balance the amplitudes in the guiding image and the input
751 // image
752 const float min, const float max)
753{
754 assert(ch >= 3);
755 assert(w >= 1);
756
757 const gboolean fits = dt_opencl_image_fits_device(devid, width, height, sizeof(float), 18.0f, 0);
758 if(!fits)
759 dt_print(DT_DEBUG_OPENCL, "[guided filter] fall back to cpu implementation due to insufficient gpu memory\n");
760
761
762 int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
763 if(fits)
764 err = guided_filter_cl_impl(devid, guide, in, out, width, height, ch, w, sqrt_eps, guide_weight, min, max);
765 if(err != CL_SUCCESS)
766 {
767 if(guided_filter_cl_fallback(devid, guide, in, out, width, height, ch, w, sqrt_eps, guide_weight, min, max) != 0)
768 return 1;
769 }
770 return 0;
771}
772
773#endif
774// clang-format off
775// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
776// vim: shiftwidth=2 expandtab tabstop=2 cindent
777// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
778// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
int dt_box_mean_horizontal(float *const restrict buf, const size_t width, const int ch, const int radius, float *const restrict user_scratch)
int dt_box_mean(float *const buf, const size_t height, const size_t width, const int ch, const int radius, const unsigned iterations)
int dt_box_mean_vertical(float *const buf, const size_t height, const size_t width, const int ch, const int radius)
#define BOXFILTER_KAHAN_SUM
Definition box_filters.h:36
static const float const float const float min
const float max
const dt_colormatrix_t dt_aligned_pixel_t out
darktable_t darktable
Definition darktable.c:181
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
@ DT_DEBUG_OPENCL
Definition darktable.h:722
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define dt_get_perthread(buf, padsize)
Definition darktable.h:1035
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define dt_pixelpipe_cache_alloc_perthread_float(n, padded_size)
Definition darktable.h:1030
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
#define B
static int cl_generate_result(const int devid, const int width, const int height, cl_mem guide, cl_mem a_r, cl_mem a_g, cl_mem a_b, cl_mem b, cl_mem out, const float guide_weight, const float min, const float max)
#define VAR_GG
#define COV_R
#define GUIDE_MEAN_R
#define A_GREEN
static int cl_variances(const int devid, const int width, const int height, cl_mem guide, cl_mem var_imgg_rr, cl_mem var_imgg_rg, cl_mem var_imgg_rb, cl_mem var_imgg_gg, cl_mem var_imgg_gb, cl_mem var_imgg_bb, const float guide_weight)
static int cl_box_mean(const int devid, const int width, const int height, const int w, cl_mem in, cl_mem out, cl_mem temp)
#define GUIDE_MEAN_B
static int guided_filter_cl_fallback(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width, const int height, const int ch, const int w, const float sqrt_eps, const float guide_weight, const float min, const float max)
#define VAR_RR
#define COV_B
dt_guided_filter_cl_global_t * dt_guided_filter_init_cl_global()
#define A_BLUE
#define GF_TILE_SIZE
static int cl_split_rgb(const int devid, const int width, const int height, cl_mem guide, cl_mem imgg_r, cl_mem imgg_g, cl_mem imgg_b, const float guide_weight)
#define VAR_GB
static float * get_color_pixel(color_image img, size_t i)
#define GUIDE_MEAN_G
void dt_guided_filter_free_cl_global(dt_guided_filter_cl_global_t *g)
static int cl_solve(const int devid, const int width, const int height, cl_mem img_mean, cl_mem imgg_mean_r, cl_mem imgg_mean_g, cl_mem imgg_mean_b, cl_mem cov_imgg_img_r, cl_mem cov_imgg_img_g, cl_mem cov_imgg_img_b, cl_mem var_imgg_rr, cl_mem var_imgg_rg, cl_mem var_imgg_rb, cl_mem var_imgg_gg, cl_mem var_imgg_gb, cl_mem var_imgg_bb, cl_mem a_r, cl_mem a_g, cl_mem a_b, cl_mem b)
static int cl_update_covariance(const int devid, const int width, const int height, cl_mem in, cl_mem out, cl_mem a, cl_mem b, float eps)
#define COV_G
struct tile tile
static int cl_covariances(const int devid, const int width, const int height, cl_mem guide, cl_mem in, cl_mem cov_imgg_img_r, cl_mem cov_imgg_img_g, cl_mem cov_imgg_img_b, const float guide_weight)
static __DT_CLONE_TARGETS__ int guided_filter_tiling(color_image imgg, gray_image img, gray_image img_out, tile target, const int w, const float eps, const float guide_weight, const float min, const float max)
#define VAR_RB
#define A_RED
int guided_filter_cl(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width, const int height, const int ch, const int w, const float sqrt_eps, const float guide_weight, const float min, const float max)
#define INP_MEAN
static int guided_filter_cl_impl(int devid, cl_mem guide, cl_mem in, cl_mem out, const int width, const int height, const int ch, const int w, const float sqrt_eps, const float guide_weight, const float min, const float max)
#define VAR_RG
__DT_CLONE_TARGETS__ int guided_filter(const float *const guide, const float *const in, float *const out, const int width, const int height, const int ch, const int w, const float sqrt_eps, const float guide_weight, const float min, const float max)
#define VAR_BB
static int max_i(int a, int b)
static int min_i(int a, int b)
static float kernel(const float *x, const float *y)
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
size_t size
Definition mipmap_cache.c:3
float dt_aligned_pixel_t[4]
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
gboolean dt_opencl_image_fits_device(const int devid, const size_t width, const size_t height, const unsigned bpp, const float factor, const size_t overhead)
Definition opencl.c:2683
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
int dt_opencl_read_host_from_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2169
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
int dt_opencl_write_host_to_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2216
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
#define eps
Definition rcd.c:81
struct dt_opencl_t * opencl
Definition darktable.h:785
struct dt_guided_filter_cl_global_t * guided_filter
Definition opencl.h:275
float * data
int lower
int left
int right
int upper