Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
imagebuf.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2020-2021 Ralf Brown.
4 Copyright (C) 2021-2022 Pascal Obry.
5 Copyright (C) 2022 Martin Bařinka.
6 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
7
8 darktable is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 darktable is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with darktable. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <stdarg.h>
23#include "common/imagebuf.h"
24
25#ifdef _OPENMP
26static size_t parallel_imgop_minimum = 500000;
27#endif
28
29// Allocate one or more buffers as detailed in the given parameters. If any allocation fails, free all of them,
30// set the module's trouble flag, and return 1 (0 on success).
32 const struct dt_iop_roi_t *const roi_in,
33 const struct dt_iop_roi_t *const roi_out, ...)
34{
35 int err = 0;
36 va_list args;
37 // first pass: zero out all of the given buffer pointers
38 va_start(args,roi_out);
39 while (TRUE)
40 {
41 const int size = va_arg(args,int);
42 float **bufptr = va_arg(args,float**);
44 (void)va_arg(args,size_t*); // skip the extra pointer for per-thread allocations
45 if (size == 0 || IS_NULL_PTR(bufptr)) // end of arg list?
46 break;
47 *bufptr = NULL;
48 }
49 va_end(args);
50
51 // second pass: attempt to allocate the requested buffers
52 va_start(args,roi_out);
53 while (!err)
54 {
55 const int size = va_arg(args,int);
56 float **bufptr = va_arg(args,float**);
57 size_t *paddedsize = (size & DT_IMGSZ_PERTHREAD) ? va_arg(args,size_t*) : NULL;
58 if (size == 0 || IS_NULL_PTR(bufptr))
59 break;
60 const size_t channels = size & DT_IMGSZ_CH_MASK;
61 size_t nfloats;
63 {
65 nfloats = channels * roi_out->width * roi_out->height;
66 break;
68 nfloats = channels * roi_out->height;
69 break;
71 nfloats = channels * roi_out->width;
72 break;
74 nfloats = channels * MAX(roi_out->width, roi_out->height);
75 break;
77 nfloats = channels * roi_in->width * roi_in->height;
78 break;
80 nfloats = channels * roi_in->height;
81 break;
83 nfloats = channels * roi_in->width;
84 break;
86 nfloats = channels * MAX(roi_in->width, roi_in->height);
87 break;
88 default:
89 nfloats = 0;
90 break;
91 }
93 {
94 *bufptr = dt_pixelpipe_cache_alloc_perthread_float(nfloats,paddedsize);
95 if ((size & DT_IMGSZ_CLEARBUF) && *bufptr)
96 memset(*bufptr, 0, *paddedsize * darktable.num_openmp_threads * sizeof(float));
97 }
98 else
99 {
100 *bufptr = dt_pixelpipe_cache_alloc_align_float_cache(nfloats, 0);
101 if ((size & DT_IMGSZ_CLEARBUF) && *bufptr)
102 memset(*bufptr, 0, nfloats * sizeof(float));
103 }
104 if (!*bufptr)
105 {
106 err = 1;
107 break;
108 }
109 }
110 va_end(args);
111
112 // finally, check whether successful and clean up if something went wrong
113 if (err)
114 {
115 va_start(args,roi_out);
116 while (TRUE)
117 {
118 const int size = va_arg(args,int);
119 float **bufptr = va_arg(args,float**);
121 (void)va_arg(args,size_t*); // skip the extra pointer for per-thread allocations
122 if (size == 0 || IS_NULL_PTR(bufptr) || !*bufptr)
123 break; // end of arg list or this attempted allocation failed
125 *bufptr = NULL;
126 }
127 va_end(args);
128 // set the module's trouble flag
129 }
130 return err;
131}
132
133
134// Copy an image buffer, specifying the number of floats it contains. Use of this function is to be preferred
135// over a bare memcpy both because it helps document the purpose of the code and because it gives us a single
136// point where we can optimize performance on different architectures.
138void dt_iop_image_copy(float *const __restrict__ out, const float *const __restrict__ in, const size_t nfloats)
139{
140#ifdef _OPENMP
141 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
142 {
143 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
144 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
145 // memory won't be able to take advantage of more than four cores).
146#pragma omp parallel for simd aligned(in, out : 16) default(firstprivate)
147 for(size_t k = 0; k < nfloats; k++)
148 out[k] = in[k];
149 return;
150 }
151#endif // _OPENMP
152 // no OpenMP, or image too small to bother parallelizing
153 memcpy(out, in, nfloats * sizeof(float));
154}
155
156// Copy an image buffer, specifying the regions of interest. The output RoI may be larger than the input RoI,
157// in which case the result is optionally padded with zeros. If the output RoI is smaller than the input RoI,
158// only a portion of the input buffer will be copied.
159void dt_iop_copy_image_roi(float *const __restrict__ out, const float *const __restrict__ in, const size_t ch,
160 const dt_iop_roi_t *const __restrict__ roi_in,
161 const dt_iop_roi_t *const __restrict__ roi_out, const int zero_pad)
162{
163 if (roi_in->width == roi_out->width && roi_in->height == roi_out->height)
164 {
165 // fast path, just copy the entire contents of the buffer
166 dt_iop_image_copy_by_size(out, in, roi_out->width, roi_out->height, ch);
167 }
168 else if (roi_in->width <= roi_out->width && roi_in->height <= roi_out->height)
169 {
170 // output needs padding
171 fprintf(stderr,"copy_image_roi with larger output not yet implemented\n");
172 //TODO
173 }
174 else if (roi_in->width >= roi_out->width && roi_in->height >= roi_out->height)
175 {
176 // copy only a portion of the input
177 fprintf(stderr,"copy_image_roi with smaller output not yet implemented\n");
178 //TODO
179 }
180 else
181 {
182 // inconsistent RoIs!!
183 fprintf(stderr,"copy_image_roi called with inconsistent RoI!\n");
184 //TODO
185 }
186}
187
189void dt_iop_image_scaled_copy(float *const restrict buf, const float *const restrict src, const float scale,
190 const size_t width, const size_t height, const size_t ch)
191{
192 const size_t nfloats = width * height * ch;
193#ifdef _OPENMP
194 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
195 {
196 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
197 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
198 // memory won't be able to take advantage of more than four cores).
199#pragma omp parallel for simd aligned(buf, src : 16) default(firstprivate)
200 for(size_t k = 0; k < nfloats; k++)
201 buf[k] = scale * src[k];
202 return;
203 }
204#endif // _OPENMP
205 // no OpenMP, or image too small to bother parallelizing
206#ifdef _OPENMP
207#pragma omp simd aligned(buf, src : 16)
208#endif
209 for (size_t k = 0; k < nfloats; k++)
210 buf[k] = scale * src[k];
211}
212
214void dt_iop_image_fill(float *const buf, const float fill_value, const size_t width, const size_t height,
215 const size_t ch)
216{
217 const size_t nfloats = width * height * ch;
218#ifdef _OPENMP
219 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
220 {
221 const size_t nthreads = MIN(16,darktable.num_openmp_threads);
222 // determine the number of 4-float vectors to be processed by each thread
223 const size_t chunksize = (((nfloats + nthreads - 1) / nthreads) + 3) / 4;
224#pragma omp parallel for default(firstprivate) num_threads(nthreads)
225 for(size_t chunk = 0; chunk < nthreads; chunk++)
226 {
227#pragma omp simd aligned(buf:16)
228 for(size_t k = 4 * chunk * chunksize; k < MIN(4*(chunk+1)*chunksize, nfloats); k++)
229 buf[k] = fill_value;
230 }
231 return;
232 }
233#endif // _OPENMP
234 // no OpenMP, or image too small to bother parallelizing
235 if (fill_value == 0.0f)
236 {
237 // take advantage of compiler intrinsic which is hopefully highly optimized
238 memset(buf, 0, sizeof(float) * nfloats);
239 }
240 else
241 {
242#ifdef _OPENMP
243#pragma omp simd aligned(buf:16)
244#endif
245 for (size_t k = 0; k < nfloats; k++)
246 buf[k] = fill_value;
247 }
248}
249
251void dt_iop_image_add_const(float *const buf, const float add_value, const size_t width, const size_t height,
252 const size_t ch)
253{
254 const size_t nfloats = width * height * ch;
255#ifdef _OPENMP
256 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
257 {
258 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
259 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
260 // memory won't be able to take advantage of more than four cores).
261#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
262 for(size_t k = 0; k < nfloats; k++)
263 buf[k] += add_value;
264 return;
265 }
266#endif // _OPENMP
267 // no OpenMP, or image too small to bother parallelizing
268#ifdef _OPENMP
269#pragma omp simd aligned(buf:16)
270#endif
271 for (size_t k = 0; k < nfloats; k++)
272 buf[k] += add_value;
273}
274
276void dt_iop_image_add_image(float *const buf, const float* const other_image,
277 const size_t width, const size_t height, const size_t ch)
278{
279 const size_t nfloats = width * height * ch;
280#ifdef _OPENMP
281 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
282 {
283 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
284 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
285 // memory won't be able to take advantage of more than four cores).
286#pragma omp parallel for simd aligned(buf, other_image : 16) default(firstprivate)
287 for(size_t k = 0; k < nfloats; k++)
288 buf[k] += other_image[k];
289 return;
290 }
291#endif // _OPENMP
292 // no OpenMP, or image too small to bother parallelizing
293#ifdef _OPENMP
294#pragma omp simd aligned(buf, other_image : 16)
295#endif
296 for (size_t k = 0; k < nfloats; k++)
297 buf[k] += other_image[k];
298}
299
301void dt_iop_image_sub_image(float *const buf, const float* const other_image,
302 const size_t width, const size_t height, const size_t ch)
303{
304 const size_t nfloats = width * height * ch;
305#ifdef _OPENMP
306 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
307 {
308 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
309 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
310 // memory won't be able to take advantage of more than four cores).
311#pragma omp parallel for simd aligned(buf, other_image : 16) default(firstprivate)
312 for(size_t k = 0; k < nfloats; k++)
313 buf[k] -= other_image[k];
314 return;
315 }
316#endif // _OPENMP
317 // no OpenMP, or image too small to bother parallelizing
318#ifdef _OPENMP
319#pragma omp simd aligned(buf, other_image : 16)
320#endif
321 for (size_t k = 0; k < nfloats; k++)
322 buf[k] -= other_image[k];
323}
324
326void dt_iop_image_invert(float *const buf, const float max_value, const size_t width, const size_t height,
327 const size_t ch)
328{
329 const size_t nfloats = width * height * ch;
330#ifdef _OPENMP
331 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
332 {
333 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
334 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
335 // memory won't be able to take advantage of more than four cores).
336#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
337 for(size_t k = 0; k < nfloats; k++)
338 buf[k] = max_value - buf[k];
339 return;
340 }
341#endif // _OPENMP
342 // no OpenMP, or image too small to bother parallelizing
343#ifdef _OPENMP
344#pragma omp simd aligned(buf:16)
345#endif
346 for (size_t k = 0; k < nfloats; k++)
347 buf[k] = max_value - buf[k];
348}
349
351void dt_iop_image_mul_const(float *const buf, const float mul_value, const size_t width, const size_t height,
352 const size_t ch)
353{
354 const size_t nfloats = width * height * ch;
355#ifdef _OPENMP
356 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
357 {
358 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
359 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
360 // memory won't be able to take advantage of more than four cores).
361#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
362 for(size_t k = 0; k < nfloats; k++)
363 buf[k] *= mul_value;
364 return;
365 }
366#endif // _OPENMP
367 // no OpenMP, or image too small to bother parallelizing
368#ifdef _OPENMP
369#pragma omp simd aligned(buf:16)
370#endif
371 for (size_t k = 0; k < nfloats; k++)
372 buf[k] *= mul_value;
373}
374
376void dt_iop_image_div_const(float *const buf, const float div_value, const size_t width, const size_t height,
377 const size_t ch)
378{
379 const size_t nfloats = width * height * ch;
380#ifdef _OPENMP
381 if (nfloats > parallel_imgop_minimum) // is the copy big enough to outweigh threading overhead?
382 {
383 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
384 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
385 // memory won't be able to take advantage of more than four cores).
386#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
387 for(size_t k = 0; k < nfloats; k++)
388 buf[k] /= div_value;
389 return;
390 }
391#endif // _OPENMP
392 // no OpenMP, or image too small to bother parallelizing
393#ifdef _OPENMP
394#pragma omp simd aligned(buf:16)
395#endif
396 for (size_t k = 0; k < nfloats; k++)
397 buf[k] /= div_value;
398}
399
400// elementwise: buf = lammda*buf + (1-lambda)*other
402void dt_iop_image_linear_blend(float *const restrict buf, const float lambda, const float *const restrict other,
403 const size_t width, const size_t height, const size_t ch)
404{
405 const size_t nfloats = width * height * ch;
406 const float lambda_1 = 1.0f - lambda;
407#ifdef _OPENMP
408 if (nfloats > parallel_imgop_minimum/2) // is the task big enough to outweigh threading overhead?
409 {
410 // we can gain a little by using a small number of threads in parallel, but not much since the memory bus
411 // quickly saturates (basically, each core can saturate a memory channel, so a system with quad-channel
412 // memory won't be able to take advantage of more than four cores).
413#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
414 for(size_t k = 0; k < nfloats; k++)
415 buf[k] = lambda*buf[k] + lambda_1*other[k];
416 return;
417 }
418#endif // _OPENMP
419 // no OpenMP, or image too small to bother parallelizing
420#ifdef _OPENMP
421#pragma omp simd aligned(buf:16)
422#endif
423 for (size_t k = 0; k < nfloats; k++)
424 buf[k] = lambda*buf[k] + lambda_1*other[k];
425}
426
427
428// clang-format off
429// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
430// vim: shiftwidth=2 expandtab tabstop=2 cindent
431// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
432// clang-format on
#define TRUE
Definition ashift_lsd.c:162
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const dt_colormatrix_t dt_aligned_pixel_t out
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
darktable_t darktable
Definition darktable.c:181
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define dt_pixelpipe_cache_alloc_perthread_float(n, padded_size)
Definition darktable.h:1030
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
__DT_CLONE_TARGETS__ void dt_iop_image_add_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:276
__DT_CLONE_TARGETS__ void dt_iop_image_mul_const(float *const buf, const float mul_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:351
__DT_CLONE_TARGETS__ void dt_iop_image_copy(float *const __restrict__ out, const float *const __restrict__ in, const size_t nfloats)
Definition imagebuf.c:138
__DT_CLONE_TARGETS__ void dt_iop_image_sub_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:301
int dt_iop_alloc_image_buffers(struct dt_iop_module_t *const module, const struct dt_iop_roi_t *const roi_in, const struct dt_iop_roi_t *const roi_out,...)
Definition imagebuf.c:31
__DT_CLONE_TARGETS__ void dt_iop_image_add_const(float *const buf, const float add_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:251
void dt_iop_copy_image_roi(float *const __restrict__ out, const float *const __restrict__ in, const size_t ch, const dt_iop_roi_t *const __restrict__ roi_in, const dt_iop_roi_t *const __restrict__ roi_out, const int zero_pad)
Definition imagebuf.c:159
__DT_CLONE_TARGETS__ void dt_iop_image_invert(float *const buf, const float max_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:326
__DT_CLONE_TARGETS__ void dt_iop_image_fill(float *const buf, const float fill_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:214
__DT_CLONE_TARGETS__ void dt_iop_image_linear_blend(float *const restrict buf, const float lambda, const float *const restrict other, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:402
__DT_CLONE_TARGETS__ void dt_iop_image_scaled_copy(float *const restrict buf, const float *const restrict src, const float scale, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:189
__DT_CLONE_TARGETS__ void dt_iop_image_div_const(float *const buf, const float div_value, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.c:376
#define DT_IMGSZ_ROI_MASK
Definition imagebuf.h:53
#define DT_IMGSZ_LONGEST
Definition imagebuf.h:64
#define DT_IMGSZ_INPUT
Definition imagebuf.h:55
#define DT_IMGSZ_CLEARBUF
Definition imagebuf.h:58
static void dt_iop_image_copy_by_size(float *const __restrict__ out, const float *const __restrict__ in, const size_t width, const size_t height, const size_t ch)
Definition imagebuf.h:87
#define DT_IMGSZ_OUTPUT
Definition imagebuf.h:54
#define DT_IMGSZ_WIDTH
Definition imagebuf.h:63
#define DT_IMGSZ_PERTHREAD
Definition imagebuf.h:57
#define DT_IMGSZ_HEIGHT
Definition imagebuf.h:62
#define DT_IMGSZ_CH_MASK
Definition imagebuf.h:51
#define DT_IMGSZ_DIM_MASK
Definition imagebuf.h:60
#define DT_IMGSZ_FULL
Definition imagebuf.h:61
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
size_t size
Definition mipmap_cache.c:3
int32_t num_openmp_threads
Definition darktable.h:758
Region of interest passed through the pixelpipe.
Definition imageop.h:72
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29