Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
bilateralcl.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2012 johannes hanika.
4 Copyright (C) 2012-2014, 2016 Tobias Ellinghaus.
5 Copyright (C) 2012, 2014, 2016-2017 Ulrich Pegelow.
6 Copyright (C) 2016 Roman Lebedev.
7 Copyright (C) 2019 Marcus Rückert.
8 Copyright (C) 2020 Hubert Kowalski.
9 Copyright (C) 2020 Pascal Obry.
10 Copyright (C) 2020 Ralf Brown.
11 Copyright (C) 2022 Hanno Schwalm.
12 Copyright (C) 2022 Martin Bařinka.
13
14 darktable is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 darktable is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with darktable. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#ifdef HAVE_OPENCL
29
30#include "common/bilateral.h"
31#include "common/bilateralcl.h"
32#include "common/darktable.h" // for CLAMPS, dt_print, darktable, darktable_t
33#include "common/opencl.h" // for dt_opencl_set_kernel_arg, dt_opencl_cr...
34#include <glib.h> // for MAX
35#include <math.h> // for roundf
36#include <stdlib.h> // for free, malloc
37
39{
41
42 const int program = 10; // bilateral.cl, from programs.conf
43 b->kernel_zero = dt_opencl_create_kernel(program, "zero");
44 b->kernel_splat = dt_opencl_create_kernel(program, "splat");
45 b->kernel_blur_line = dt_opencl_create_kernel(program, "blur_line");
46 b->kernel_blur_line_z = dt_opencl_create_kernel(program, "blur_line_z");
47 b->kernel_slice = dt_opencl_create_kernel(program, "slice");
48 b->kernel_slice2 = dt_opencl_create_kernel(program, "slice_to_output");
49 return b;
50}
51
53{
54 if(IS_NULL_PTR(b)) return;
55 // free device mem
57 dt_opencl_release_mem_object(b->dev_grid_tmp);
58 dt_free(b);
59}
60
61
62// modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
63// takes account of an additional temp buffer needed in the OpenCL code path
65 const int height,
66 const float sigma_s,
67 const float sigma_r)
68{
69 return dt_bilateral_memory_use(width, height, sigma_s, sigma_r) + sizeof(float) * 4 * width * height;
70}
71
72// modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
73// takes account of an additional temp buffer needed in the OpenCL code path
75 const int height,
76 const float sigma_s,
77 const float sigma_r)
78{
79 return MAX(dt_bilateral_singlebuffer_size(width, height, sigma_s, sigma_r), sizeof(float) * 4 * width * height);
80}
81
82
84 const int width, // width of input image
85 const int height, // height of input image
86 const float sigma_s, // spatial sigma (blur pixel coords)
87 const float sigma_r) // range sigma (blur luma values)
88{
90 = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
91 .cellsize = 8 * sizeof(float) + sizeof(int), .overhead = 0,
92 .sizex = 1 << 6, .sizey = 1 << 6 };
93
95 {
97 "[opencl_bilateral] can not identify resource limits for device %d in bilateral grid\n", devid);
98 return NULL;
99 }
100
101 if(locopt.sizex * locopt.sizey < 16 * 16)
102 {
104 "[opencl_bilateral] device %d does not offer sufficient resources to run bilateral grid\n",
105 devid);
106 return NULL;
107 }
108
110 if(IS_NULL_PTR(b)) return NULL;
111
112 b->global = darktable.opencl->bilateral;
113 b->width = width;
114 b->height = height;
115 b->blocksizex = locopt.sizex;
116 b->blocksizey = locopt.sizey;
117 b->devid = devid;
118 b->dev_grid = NULL;
119 b->dev_grid_tmp = NULL;
122 b->size_x = b2.size_x;
123 b->size_y = b2.size_y;
124 b->size_z = b2.size_z;
125 b->sigma_s = b2.sigma_s;
126 b->sigma_r = b2.sigma_r;
127
128 // alloc grid buffer:
129 b->dev_grid
130 = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
131 if(!b->dev_grid)
132 {
134 return NULL;
135 }
136
137 // alloc temporary grid buffer
138 b->dev_grid_tmp
139 = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
140 if(!b->dev_grid_tmp)
141 {
143 return NULL;
144 }
145
146 // zero out grid
147 int wd = b->size_x, ht = b->size_y * b->size_z;
148 size_t sizes[] = { ROUNDUPDWD(wd, b->devid), ROUNDUPDHT(ht, b->devid), 1 };
149 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 0, sizeof(cl_mem), (void *)&b->dev_grid);
150 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 1, sizeof(int), (void *)&wd);
151 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 2, sizeof(int), (void *)&ht);
152 cl_int err = -666;
153 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_zero, sizes);
154 if(err != CL_SUCCESS)
155 {
157 return NULL;
158 }
159
160#if 0
161 fprintf(stderr, "[bilateral] created grid [%d %d %d]"
162 " with sigma (%f %f) (%f %f)\n", b->size_x, b->size_y, b->size_z,
163 b->sigma_s, sigma_s, b->sigma_r, sigma_r);
164#endif
165 return b;
166}
167
169{
170 cl_int err = -666;
171 size_t sizes[] = { ROUNDUP(b->width, b->blocksizex), ROUNDUP(b->height, b->blocksizey), 1 };
172 size_t local[] = { b->blocksizex, b->blocksizey, 1 };
173 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 0, sizeof(cl_mem), (void *)&in);
174 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 1, sizeof(cl_mem), (void *)&b->dev_grid);
175 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 2, sizeof(int), (void *)&b->width);
176 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 3, sizeof(int), (void *)&b->height);
177 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 4, sizeof(int), (void *)&b->size_x);
178 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 5, sizeof(int), (void *)&b->size_y);
179 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 6, sizeof(int), (void *)&b->size_z);
180 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 7, sizeof(float), (void *)&b->sigma_s);
181 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 8, sizeof(float), (void *)&b->sigma_r);
182 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 9, b->blocksizex * b->blocksizey * sizeof(int),
183 NULL);
184 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 10,
185 b->blocksizex * b->blocksizey * 8 * sizeof(float), NULL);
186 err = dt_opencl_enqueue_kernel_2d_with_local(b->devid, b->global->kernel_splat, sizes, local);
187 return err;
188}
189
191{
192 cl_int err = -666;
193 size_t sizes[3] = { 0, 0, 1 };
194
195 err = dt_opencl_enqueue_copy_buffer_to_buffer(b->devid, b->dev_grid, b->dev_grid_tmp, 0, 0,
196 b->size_x * b->size_y * b->size_z * sizeof(float));
197 if(err != CL_SUCCESS) return err;
198
199 sizes[0] = ROUNDUPDWD(b->size_z, b->devid);
200 sizes[1] = ROUNDUPDHT(b->size_y, b->devid);
201 int stride1, stride2, stride3;
202 stride1 = b->size_x * b->size_y;
203 stride2 = b->size_x;
204 stride3 = 1;
205 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
206 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid);
207 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
208 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
209 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
210 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
211 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_y);
212 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_x);
213 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
214 if(err != CL_SUCCESS) return err;
215
216 stride1 = b->size_x * b->size_y;
217 stride2 = 1;
218 stride3 = b->size_x;
219 sizes[0] = ROUNDUPDWD(b->size_z, b->devid);
220 sizes[1] = ROUNDUPDHT(b->size_x, b->devid);
221 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid);
222 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
223 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
224 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
225 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
226 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
227 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_x);
228 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_y);
229 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
230 if(err != CL_SUCCESS) return err;
231
232 stride1 = 1;
233 stride2 = b->size_x;
234 stride3 = b->size_x * b->size_y;
235 sizes[0] = ROUNDUPDWD(b->size_x, b->devid);
236 sizes[1] = ROUNDUPDHT(b->size_y, b->devid);
237 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 0, sizeof(cl_mem),
238 (void *)&b->dev_grid_tmp);
239 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 1, sizeof(cl_mem), (void *)&b->dev_grid);
240 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 2, sizeof(int), (void *)&stride1);
241 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 3, sizeof(int), (void *)&stride2);
242 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 4, sizeof(int), (void *)&stride3);
243 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 5, sizeof(int), (void *)&b->size_x);
244 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 6, sizeof(int), (void *)&b->size_y);
245 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 7, sizeof(int), (void *)&b->size_z);
246 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line_z, sizes);
247 return err;
248}
249
250cl_int dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
251{
252 cl_int err = -666;
253 cl_mem tmp = NULL;
254
255 tmp = dt_opencl_alloc_device(b->devid, b->width, b->height, sizeof(float) * 4);
256 if(IS_NULL_PTR(tmp)) goto error;
257
258 size_t origin[] = { 0, 0, 0 };
259 size_t region[] = { b->width, b->height, 1 };
260 err = dt_opencl_enqueue_copy_image(b->devid, out, tmp, origin, origin, region);
261 if(err != CL_SUCCESS) goto error;
262
263 size_t sizes[] = { ROUNDUPDWD(b->width, b->devid), ROUNDUPDHT(b->height, b->devid), 1 };
264 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 0, sizeof(cl_mem), (void *)&in);
265 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 1, sizeof(cl_mem), (void *)&tmp);
266 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 2, sizeof(cl_mem), (void *)&out);
267 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 3, sizeof(cl_mem), (void *)&b->dev_grid);
268 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 4, sizeof(int), (void *)&b->width);
269 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 5, sizeof(int), (void *)&b->height);
270 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 6, sizeof(int), (void *)&b->size_x);
271 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 7, sizeof(int), (void *)&b->size_y);
272 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 8, sizeof(int), (void *)&b->size_z);
273 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 9, sizeof(float), (void *)&b->sigma_s);
274 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 10, sizeof(float), (void *)&b->sigma_r);
275 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 11, sizeof(float), (void *)&detail);
276 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice2, sizes);
277
279 return err;
280
281error:
283 return err;
284}
285
286cl_int dt_bilateral_slice_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
287{
288 cl_int err = -666;
289 size_t sizes[] = { ROUNDUPDWD(b->width, b->devid), ROUNDUPDHT(b->height, b->devid), 1 };
290 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 0, sizeof(cl_mem), (void *)&in);
291 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 1, sizeof(cl_mem), (void *)&out);
292 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 2, sizeof(cl_mem), (void *)&b->dev_grid);
293 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 3, sizeof(int), (void *)&b->width);
294 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 4, sizeof(int), (void *)&b->height);
295 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 5, sizeof(int), (void *)&b->size_x);
296 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 6, sizeof(int), (void *)&b->size_y);
297 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 7, sizeof(int), (void *)&b->size_z);
298 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 8, sizeof(float), (void *)&b->sigma_s);
299 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 9, sizeof(float), (void *)&b->sigma_r);
300 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 10, sizeof(float), (void *)&detail);
301 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice, sizes);
302 return err;
303}
304
306{
307 if(IS_NULL_PTR(b)) return;
308 // destroy kernels
309 dt_opencl_free_kernel(b->kernel_zero);
310 dt_opencl_free_kernel(b->kernel_splat);
311 dt_opencl_free_kernel(b->kernel_blur_line);
312 dt_opencl_free_kernel(b->kernel_blur_line_z);
313 dt_opencl_free_kernel(b->kernel_slice);
314 dt_opencl_free_kernel(b->kernel_slice2);
315 dt_free(b);
316}
317
318#endif
319
320// clang-format off
321// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
322// vim: shiftwidth=2 expandtab tabstop=2 cindent
323// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
324// clang-format on
325
static void error(char *msg)
Definition ashift_lsd.c:202
void dt_bilateral_grid_size(dt_bilateral_t *b, const int width, const int height, const float L_range, float sigma_s, const float sigma_r)
Definition bilateral.c:45
size_t dt_bilateral_memory_use(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateral.c:74
size_t dt_bilateral_singlebuffer_size(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateral.c:102
int width
Definition bilateral.h:1
float sigma_s
Definition bilateral.h:3
int height
Definition bilateral.h:1
float sigma_r
Definition bilateral.h:3
size_t dt_bilateral_memory_use2(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateralcl.c:64
dt_bilateral_cl_global_t * dt_bilateral_init_cl_global()
Definition bilateralcl.c:38
void dt_bilateral_free_cl(dt_bilateral_cl_t *b)
Definition bilateralcl.c:52
size_t dt_bilateral_singlebuffer_size2(const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateralcl.c:74
void dt_bilateral_free_cl_global(dt_bilateral_cl_global_t *b)
cl_int dt_bilateral_slice_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
cl_int dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
dt_bilateral_cl_t * dt_bilateral_init_cl(const int devid, const int width, const int height, const float sigma_s, const float sigma_r)
Definition bilateralcl.c:83
cl_int dt_bilateral_blur_cl(dt_bilateral_cl_t *b)
cl_int dt_bilateral_splat_cl(dt_bilateral_cl_t *b, cl_mem in)
const dt_colormatrix_t dt_aligned_pixel_t out
darktable_t darktable
Definition darktable.c:181
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
@ DT_DEBUG_OPENCL
Definition darktable.h:722
#define dt_free(ptr)
Definition darktable.h:456
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:3156
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2544
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
int dt_opencl_enqueue_copy_image(const int devid, cl_mem src, cl_mem dst, size_t *orig_src, size_t *orig_dst, size_t *region)
Definition opencl.c:2261
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:2142
int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer, size_t srcoffset, size_t dstoffset, size_t size)
Definition opencl.c:2296
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
#define ROUNDUP(a, n)
Definition opencl.h:78
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
struct dt_opencl_t * opencl
Definition darktable.h:785
size_t size_y
Definition bilateral.h:33
size_t size_z
Definition bilateral.h:33
size_t size_x
Definition bilateral.h:33
struct dt_bilateral_cl_global_t * bilateral
Definition opencl.h:254
#define MAX(a, b)
Definition thinplate.c:29