Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
gaussian.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2012, 2014, 2016-2017 Ulrich Pegelow.
4 Copyright (C) 2013-2016 Tobias Ellinghaus.
5 Copyright (C) 2014, 2016 Roman Lebedev.
6 Copyright (C) 2016 johannes hanika.
7 Copyright (C) 2019 Andreas Schneider.
8 Copyright (C) 2019, 2025-2026 Aurélien PIERRE.
9 Copyright (C) 2020-2021 Hubert Kowalski.
10 Copyright (C) 2020 Pascal Obry.
11 Copyright (C) 2020-2021 Ralf Brown.
12 Copyright (C) 2022 Hanno Schwalm.
13 Copyright (C) 2022 Martin Bařinka.
14
15 darktable is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 darktable is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with darktable. If not, see <http://www.gnu.org/licenses/>.
27*/
28
29
30#include "common/darktable.h"
31#include <assert.h>
32#include <math.h>
33#include "common/gaussian.h"
34#include "common/math.h"
35#include "common/opencl.h"
36
37#define BLOCKSIZE (1 << 6)
38
39static inline __attribute__((always_inline)) void compute_gauss_params(const float sigma, dt_gaussian_order_t order, float *a0, float *a1,
40 float *a2, float *a3, float *b1, float *b2, float *coefp, float *coefn)
41{
42 const float alpha = 1.695f / sigma;
43 const float ema = expf(-alpha);
44 const float ema2 = expf(-2.0f * alpha);
45 *b1 = -2.0f * ema;
46 *b2 = ema2;
47 *a0 = 0.0f;
48 *a1 = 0.0f;
49 *a2 = 0.0f;
50 *a3 = 0.0f;
51 *coefp = 0.0f;
52 *coefn = 0.0f;
53
54 switch(order)
55 {
56 default:
58 {
59 const float k = (1.0f - ema) * (1.0f - ema) / (1.0f + (2.0f * alpha * ema) - ema2);
60 *a0 = k;
61 *a1 = k * (alpha - 1.0f) * ema;
62 *a2 = k * (alpha + 1.0f) * ema;
63 *a3 = -k * ema2;
64 }
65 break;
66
68 {
69 *a0 = (1.0f - ema) * (1.0f - ema);
70 *a1 = 0.0f;
71 *a2 = -*a0;
72 *a3 = 0.0f;
73 }
74 break;
75
77 {
78 const float k = -(ema2 - 1.0f) / (2.0f * alpha * ema);
79 float kn = -2.0f * (-1.0f + (3.0f * ema) - (3.0f * ema * ema) + (ema * ema * ema));
80 kn /= ((3.0f * ema) + 1.0f + (3.0f * ema * ema) + (ema * ema * ema));
81 *a0 = kn;
82 *a1 = -kn * (1.0f + (k * alpha)) * ema;
83 *a2 = kn * (1.0f - (k * alpha)) * ema;
84 *a3 = -kn * ema2;
85 }
86 }
87
88 *coefp = (*a0 + *a1) / (1.0f + *b1 + *b2);
89 *coefn = (*a2 + *a3) / (1.0f + *b1 + *b2);
90}
91
92size_t dt_gaussian_memory_use(const int width, // width of input image
93 const int height, // height of input image
94 const int channels) // channels per pixel
95{
96 return sizeof(float) * channels * width * height;
97}
98
99#ifdef HAVE_OPENCL
100size_t dt_gaussian_memory_use_cl(const int width, // width of input image
101 const int height, // height of input image
102 const int channels) // channels per pixel
103{
104 return sizeof(float) * channels * (width + BLOCKSIZE) * (height + BLOCKSIZE) * 2;
105}
106#endif /* HAVE_OPENCL */
107
108size_t dt_gaussian_singlebuffer_size(const int width, // width of input image
109 const int height, // height of input image
110 const int channels) // channels per pixel
111{
112 size_t mem_use;
113#ifdef HAVE_OPENCL
114 mem_use = sizeof(float) * channels * (width + BLOCKSIZE) * (height + BLOCKSIZE);
115#else
116 mem_use = sizeof(float) * channels * width * height;
117#endif
118 return mem_use;
119}
120
121
122dt_gaussian_t *dt_gaussian_init(const int width, // width of input image
123 const int height, // height of input image
124 const int channels, // channels per pixel
125 const float *max, // maximum allowed values per channel for clamping
126 const float *min, // minimum allowed values per channel for clamping
127 const float sigma, // gaussian sigma
128 const int order) // order of gaussian blur
129{
130 dt_gaussian_t *g = (dt_gaussian_t *)malloc(sizeof(dt_gaussian_t));
131 if(IS_NULL_PTR(g)) return NULL;
132
133 g->width = width;
134 g->height = height;
135 g->channels = channels;
136 g->sigma = sigma;
137 g->order = order;
138 g->buf = NULL;
139 g->max = (float *)calloc(channels, sizeof(float));
140 g->min = (float *)calloc(channels, sizeof(float));
141
142 if(IS_NULL_PTR(g->min) || IS_NULL_PTR(g->max)) goto error;
143
144 for(int k = 0; k < channels; k++)
145 {
146 g->max[k] = max[k];
147 g->min[k] = min[k];
148 }
149
150 g->buf = dt_pixelpipe_cache_alloc_align_float_cache((size_t)channels * width * height, 0);
151 if(IS_NULL_PTR(g->buf)) goto error;
152
153 return g;
154
155error:
157 if(g->max)
158 {
159 dt_free(g->max);
160 }
161 if(g->min)
162 {
163 dt_free(g->min);
164 }
165 dt_free(g);
166 return NULL;
167}
168
169
171void dt_gaussian_blur(dt_gaussian_t *g, const float *const in, float *const out)
172{
173
174 const int width = g->width;
175 const int height = g->height;
176 const int ch = MIN(4, g->channels); // just to appease zealous compiler warnings about stack usage
177
178 float a0, a1, a2, a3, b1, b2, coefp, coefn;
179
180 compute_gauss_params(g->sigma, g->order, &a0, &a1, &a2, &a3, &b1, &b2, &coefp, &coefn);
181
182 float *temp = g->buf;
183
184 float *Labmax = g->max;
185 float *Labmin = g->min;
186
187// vertical blur column by column
189 for(int i = 0; i < width; i++)
190 {
191 dt_aligned_pixel_t xp = {0.0f};
192 dt_aligned_pixel_t yb = {0.0f};
193 dt_aligned_pixel_t yp = {0.0f};
194
195 // forward filter
196 for(int k = 0; k < ch; k++)
197 {
198 xp[k] = CLAMPF(in[(size_t)i * ch + k], Labmin[k], Labmax[k]);
199 yb[k] = xp[k] * coefp;
200 yp[k] = yb[k];
201 }
202
203 dt_aligned_pixel_t xc = {0.0f};
204 dt_aligned_pixel_t yc = {0.0f};
205 dt_aligned_pixel_t xn = {0.0f};
206 dt_aligned_pixel_t xa = {0.0f};
207 dt_aligned_pixel_t yn = {0.0f};
208 dt_aligned_pixel_t ya = {0.0f};
209 for(int j = 0; j < height; j++)
210 {
211 size_t offset = ((size_t)j * width + i) * ch;
212
213 for(int k = 0; k < ch; k++)
214 {
215 xc[k] = CLAMPF(in[offset + k], Labmin[k], Labmax[k]);
216 yc[k] = (a0 * xc[k]) + (a1 * xp[k]) - (b1 * yp[k]) - (b2 * yb[k]);
217
218 temp[offset + k] = yc[k];
219
220 xp[k] = xc[k];
221 yb[k] = yp[k];
222 yp[k] = yc[k];
223 }
224 }
225
226 // backward filter
227 for(int k = 0; k < ch; k++)
228 {
229 xn[k] = CLAMPF(in[((size_t)(height - 1) * width + i) * ch + k], Labmin[k], Labmax[k]);
230 xa[k] = xn[k];
231 yn[k] = xn[k] * coefn;
232 ya[k] = yn[k];
233 }
234
235 for(int j = height - 1; j > -1; j--)
236 {
237 size_t offset = ((size_t)j * width + i) * ch;
238
239 for(int k = 0; k < ch; k++)
240 {
241 xc[k] = CLAMPF(in[offset + k], Labmin[k], Labmax[k]);
242
243 yc[k] = (a2 * xn[k]) + (a3 * xa[k]) - (b1 * yn[k]) - (b2 * ya[k]);
244
245 xa[k] = xn[k];
246 xn[k] = xc[k];
247 ya[k] = yn[k];
248 yn[k] = yc[k];
249
250 temp[offset + k] += yc[k];
251 }
252 }
253 }
254
255// horizontal blur line by line
257 for(int j = 0; j < height; j++)
258 {
259 dt_aligned_pixel_t xp = {0.0f};
260 dt_aligned_pixel_t yb = {0.0f};
261 dt_aligned_pixel_t yp = {0.0f};
262
263 // forward filter
264 for(int k = 0; k < ch; k++)
265 {
266 xp[k] = CLAMPF(temp[(size_t)j * width * ch + k], Labmin[k], Labmax[k]);
267 yb[k] = xp[k] * coefp;
268 yp[k] = yb[k];
269 }
270
271 dt_aligned_pixel_t xc = {0.0f};
272 dt_aligned_pixel_t yc = {0.0f};
273 dt_aligned_pixel_t xn = {0.0f};
274 dt_aligned_pixel_t xa = {0.0f};
275 dt_aligned_pixel_t yn = {0.0f};
276 dt_aligned_pixel_t ya = {0.0f};
277
278 for(int i = 0; i < width; i++)
279 {
280 size_t offset = ((size_t)j * width + i) * ch;
281
282 for(int k = 0; k < ch; k++)
283 {
284 xc[k] = CLAMPF(temp[offset + k], Labmin[k], Labmax[k]);
285 yc[k] = (a0 * xc[k]) + (a1 * xp[k]) - (b1 * yp[k]) - (b2 * yb[k]);
286
287 out[offset + k] = yc[k];
288
289 xp[k] = xc[k];
290 yb[k] = yp[k];
291 yp[k] = yc[k];
292 }
293 }
294
295 // backward filter
296 for(int k = 0; k < ch; k++)
297 {
298 xn[k] = CLAMPF(temp[((size_t)(j + 1) * width - 1) * ch + k], Labmin[k], Labmax[k]);
299 xa[k] = xn[k];
300 yn[k] = xn[k] * coefn;
301 ya[k] = yn[k];
302 }
303
304 for(int i = width - 1; i > -1; i--)
305 {
306 size_t offset = ((size_t)j * width + i) * ch;
307
308 for(int k = 0; k < ch; k++)
309 {
310 xc[k] = CLAMPF(temp[offset + k], Labmin[k], Labmax[k]);
311
312 yc[k] = (a2 * xn[k]) + (a3 * xa[k]) - (b1 * yn[k]) - (b2 * ya[k]);
313
314 xa[k] = xn[k];
315 xn[k] = xc[k];
316 ya[k] = yn[k];
317 yn[k] = yc[k];
318
319 out[offset + k] += yc[k];
320 }
321 }
322 }
323}
324
325void dt_gaussian_blur_4c(dt_gaussian_t *g, const float *const in, float *const out)
326{
327 return dt_gaussian_blur(g, in, out);
328}
329
331{
332 if(IS_NULL_PTR(g)) return;
334 dt_free(g->min);
335 dt_free(g->max);
336 dt_free(g);
337}
338
339
340#ifdef HAVE_OPENCL
342{
344
345 const int program = 6; // gaussian.cl, from programs.conf
346 g->kernel_gaussian_column_1c = dt_opencl_create_kernel(program, "gaussian_column_1c");
347 g->kernel_gaussian_transpose_1c = dt_opencl_create_kernel(program, "gaussian_transpose_1c");
348 g->kernel_gaussian_column_4c = dt_opencl_create_kernel(program, "gaussian_column_4c");
349 g->kernel_gaussian_transpose_4c = dt_opencl_create_kernel(program, "gaussian_transpose_4c");
350 return g;
351}
352
354{
355 if(IS_NULL_PTR(g)) return;
356 dt_free(g->min);
357 dt_free(g->max);
358 // free device mem
361 dt_free(g);
362}
363
365 const int width, // width of input image
366 const int height, // height of input image
367 const int channels, // channels per pixel
368 const float *max, // maximum allowed values per channel for clamping
369 const float *min, // minimum allowed values per channel for clamping
370 const float sigma, // gaussian sigma
371 const int order) // order of gaussian blur
372{
373 assert(channels == 1 || channels == 4);
374
375 if(!(channels == 1 || channels == 4)) return NULL;
376
378 if(IS_NULL_PTR(g)) return NULL;
379
381 g->devid = devid;
382 g->width = width;
383 g->height = height;
384 g->channels = channels;
385 g->sigma = sigma;
386 g->order = order;
387 g->dev_temp1 = NULL;
388 g->dev_temp2 = NULL;
389 g->max = (float *)calloc(channels, sizeof(float));
390 g->min = (float *)calloc(channels, sizeof(float));
391
392 if(IS_NULL_PTR(g->min) || IS_NULL_PTR(g->max)) goto error;
393
394 for(int k = 0; k < channels; k++)
395 {
396 g->max[k] = max[k];
397 g->min[k] = min[k];
398 }
399
400 int kernel_gaussian_transpose = (channels == 1) ? g->global->kernel_gaussian_transpose_1c
401 : g->global->kernel_gaussian_transpose_4c;
402 int blocksize;
403
405 = (dt_opencl_local_buffer_t){ .xoffset = 1, .xfactor = 1, .yoffset = 0, .yfactor = 1,
406 .cellsize = channels * sizeof(float), .overhead = 0,
407 .sizex = BLOCKSIZE, .sizey = BLOCKSIZE };
408
409 if(dt_opencl_local_buffer_opt(devid, kernel_gaussian_transpose, &locopt))
410 blocksize = MIN(locopt.sizex, locopt.sizey);
411 else
412 blocksize = 1;
413
414 // width and height of intermediate buffers. Need to be multiples of blocksize
415 const size_t bwidth = ROUNDUP(width, blocksize);
416 const size_t bheight = ROUNDUP(height, blocksize);
417
418 g->blocksize = blocksize;
419 g->bwidth = bwidth;
420 g->bheight = bheight;
421
422 // get intermediate vector buffers with read-write access
423 g->dev_temp1 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * channels * bwidth * bheight);
424 if(IS_NULL_PTR(g->dev_temp1)) goto error;
425 g->dev_temp2 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * channels * bwidth * bheight);
426 if(IS_NULL_PTR(g->dev_temp2)) goto error;
427
428 return g;
429
430error:
431 dt_free(g->min);
432 dt_free(g->max);
435 g->dev_temp1 = g->dev_temp2 = NULL;
436 dt_free(g);
437 return NULL;
438}
439
440
441cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
442{
443 cl_int err = -999;
444 const int devid = g->devid;
445
446 const int width = g->width;
447 const int height = g->height;
448 const int channels = g->channels;
449 const size_t bpp = sizeof(float) * channels;
450 cl_mem dev_temp1 = g->dev_temp1;
451 cl_mem dev_temp2 = g->dev_temp2;
452
453 const int blocksize = g->blocksize;
454 const int bwidth = g->bwidth;
455 const int bheight = g->bheight;
456
457 dt_aligned_pixel_t Labmax = { 0.0f };
458 dt_aligned_pixel_t Labmin = { 0.0f };
459
460 for(int k = 0; k < MIN(channels, 4); k++)
461 {
462 Labmax[k] = g->max[k];
463 Labmin[k] = g->min[k];
464 }
465
466 int kernel_gaussian_column = -1;
467 int kernel_gaussian_transpose = -1;
468
469 if(channels == 1)
470 {
471 kernel_gaussian_column = g->global->kernel_gaussian_column_1c;
472 kernel_gaussian_transpose = g->global->kernel_gaussian_transpose_1c;
473 }
474 else if(channels == 4)
475 {
476 kernel_gaussian_column = g->global->kernel_gaussian_column_4c;
477 kernel_gaussian_transpose = g->global->kernel_gaussian_transpose_4c;
478 }
479 else
480 return err;
481
482 size_t origin[] = { 0, 0, 0 };
483 size_t region[] = { width, height, 1 };
484 size_t local[] = { blocksize, blocksize, 1 };
485 size_t sizes[3];
486
487 // compute gaussian parameters
488 float a0, a1, a2, a3, b1, b2, coefp, coefn;
489 compute_gauss_params(g->sigma, g->order, &a0, &a1, &a2, &a3, &b1, &b2, &coefp, &coefn);
490
491 // copy dev_in to intermediate buffer dev_temp1
492 err = dt_opencl_enqueue_copy_image_to_buffer(devid, dev_in, dev_temp1, origin, region, 0);
493 if(err != CL_SUCCESS) return err;
494
495 // first blur step: column by column with dev_temp1 -> dev_temp2
496 sizes[0] = ROUNDUPDWD(width, devid);
497 sizes[1] = 1;
498 sizes[2] = 1;
499 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 0, sizeof(cl_mem), (void *)&dev_temp1);
500 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 1, sizeof(cl_mem), (void *)&dev_temp2);
501 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 2, sizeof(int), (void *)&width);
502 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 3, sizeof(int), (void *)&height);
503 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 4, sizeof(float), (void *)&a0);
504 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 5, sizeof(float), (void *)&a1);
505 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 6, sizeof(float), (void *)&a2);
506 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 7, sizeof(float), (void *)&a3);
507 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 8, sizeof(float), (void *)&b1);
508 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 9, sizeof(float), (void *)&b2);
509 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 10, sizeof(float), (void *)&coefp);
510 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 11, sizeof(float), (void *)&coefn);
511 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 12, sizeof(float) * channels, (void *)&Labmax);
512 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 13, sizeof(float) * channels, (void *)&Labmin);
513 err = dt_opencl_enqueue_kernel_2d(devid, kernel_gaussian_column, sizes);
514 if(err != CL_SUCCESS) return err;
515
516 // intermediate step: transpose dev_temp2 -> dev_temp1
517 sizes[0] = bwidth;
518 sizes[1] = bheight;
519 sizes[2] = 1;
520 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 0, sizeof(cl_mem), (void *)&dev_temp2);
521 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 1, sizeof(cl_mem), (void *)&dev_temp1);
522 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 2, sizeof(int), (void *)&width);
523 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 3, sizeof(int), (void *)&height);
524 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 4, sizeof(int), (void *)&blocksize);
525 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 5, bpp * blocksize * (blocksize + 1), NULL);
526 err = dt_opencl_enqueue_kernel_2d_with_local(devid, kernel_gaussian_transpose, sizes, local);
527 if(err != CL_SUCCESS) return err;
528
529
530 // second blur step: column by column of transposed image with dev_temp1 -> dev_temp2 (!! height <-> width
531 // !!)
532 sizes[0] = ROUNDUPDHT(height, devid);
533 sizes[1] = 1;
534 sizes[2] = 1;
535 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 0, sizeof(cl_mem), (void *)&dev_temp1);
536 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 1, sizeof(cl_mem), (void *)&dev_temp2);
537 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 2, sizeof(int), (void *)&height);
538 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 3, sizeof(int), (void *)&width);
539 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 4, sizeof(float), (void *)&a0);
540 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 5, sizeof(float), (void *)&a1);
541 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 6, sizeof(float), (void *)&a2);
542 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 7, sizeof(float), (void *)&a3);
543 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 8, sizeof(float), (void *)&b1);
544 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 9, sizeof(float), (void *)&b2);
545 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 10, sizeof(float), (void *)&coefp);
546 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 11, sizeof(float), (void *)&coefn);
547 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 12, sizeof(float) * channels, (void *)&Labmax);
548 dt_opencl_set_kernel_arg(devid, kernel_gaussian_column, 13, sizeof(float) * channels, (void *)&Labmin);
549 err = dt_opencl_enqueue_kernel_2d(devid, kernel_gaussian_column, sizes);
550 if(err != CL_SUCCESS) return err;
551
552
553 // transpose back dev_temp2 -> dev_temp1
554 sizes[0] = bheight;
555 sizes[1] = bwidth;
556 sizes[2] = 1;
557 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 0, sizeof(cl_mem), (void *)&dev_temp2);
558 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 1, sizeof(cl_mem), (void *)&dev_temp1);
559 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 2, sizeof(int), (void *)&height);
560 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 3, sizeof(int), (void *)&width);
561 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 4, sizeof(int), (void *)&blocksize);
562 dt_opencl_set_kernel_arg(devid, kernel_gaussian_transpose, 5, bpp * blocksize * (blocksize + 1), NULL);
563 err = dt_opencl_enqueue_kernel_2d_with_local(devid, kernel_gaussian_transpose, sizes, local);
564 if(err != CL_SUCCESS) return err;
565
566 // finally produce output in dev_out
567 err = dt_opencl_enqueue_copy_buffer_to_image(devid, dev_temp1, dev_out, 0, origin, region);
568 if(err != CL_SUCCESS) return err;
569
570 return CL_SUCCESS;
571}
572
573
575{
576 if(IS_NULL_PTR(g)) return;
577 // destroy kernels
578 dt_opencl_free_kernel(g->kernel_gaussian_column_1c);
579 dt_opencl_free_kernel(g->kernel_gaussian_transpose_1c);
580 dt_opencl_free_kernel(g->kernel_gaussian_column_4c);
581 dt_opencl_free_kernel(g->kernel_gaussian_transpose_4c);
582 dt_free(g);
583}
584
585#endif
586// clang-format off
587// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
588// vim: shiftwidth=2 expandtab tabstop=2 cindent
589// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
590// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static const float const float const float min
const float max
const dt_colormatrix_t dt_aligned_pixel_t out
darktable_t darktable
Definition darktable.c:181
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
void dt_gaussian_free(dt_gaussian_t *g)
Definition gaussian.c:330
void dt_gaussian_free_cl(dt_gaussian_cl_t *g)
Definition gaussian.c:353
size_t dt_gaussian_memory_use_cl(const int width, const int height, const int channels)
Definition gaussian.c:100
void dt_gaussian_free_cl_global(dt_gaussian_cl_global_t *g)
Definition gaussian.c:574
size_t dt_gaussian_singlebuffer_size(const int width, const int height, const int channels)
Definition gaussian.c:108
dt_gaussian_cl_global_t * dt_gaussian_init_cl_global()
Definition gaussian.c:341
cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
Definition gaussian.c:441
__DT_CLONE_TARGETS__ void dt_gaussian_blur(dt_gaussian_t *g, const float *const in, float *const out)
Definition gaussian.c:171
void dt_gaussian_blur_4c(dt_gaussian_t *g, const float *const in, float *const out)
Definition gaussian.c:325
dt_gaussian_cl_t * dt_gaussian_init_cl(const int devid, const int width, const int height, const int channels, const float *max, const float *min, const float sigma, const int order)
Definition gaussian.c:364
dt_gaussian_t * dt_gaussian_init(const int width, const int height, const int channels, const float *max, const float *min, const float sigma, const int order)
Definition gaussian.c:122
size_t dt_gaussian_memory_use(const int width, const int height, const int channels)
Definition gaussian.c:92
#define BLOCKSIZE
Definition gaussian.c:37
dt_gaussian_order_t
Definition gaussian.h:32
@ DT_IOP_GAUSSIAN_TWO
Definition gaussian.h:35
@ DT_IOP_GAUSSIAN_ONE
Definition gaussian.h:34
@ DT_IOP_GAUSSIAN_ZERO
Definition gaussian.h:33
int bpp
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
#define CLAMPF(a, mn, mx)
Definition math.h:89
float dt_aligned_pixel_t[4]
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:3156
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2544
int dt_opencl_enqueue_copy_buffer_to_image(const int devid, cl_mem src_buffer, cl_mem dst_image, size_t offset, size_t *origin, size_t *region)
Definition opencl.c:2284
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
int dt_opencl_enqueue_copy_image_to_buffer(const int devid, cl_mem src_image, cl_mem dst_buffer, size_t *origin, size_t *region, size_t offset)
Definition opencl.c:2272
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:2142
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
#define ROUNDUP(a, n)
Definition opencl.h:78
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
const float sigma
struct dt_opencl_t * opencl
Definition darktable.h:785
dt_gaussian_cl_global_t * global
Definition gaussian.h:76
struct dt_gaussian_cl_global_t * gaussian
Definition opencl.h:257
#define MIN(a, b)
Definition thinplate.c:32