26static size_t parallel_imgop_minimum = 500000;
38 va_start(args,roi_out);
41 const int size = va_arg(args,
int);
42 float **bufptr = va_arg(args,
float**);
44 (
void)va_arg(args,
size_t*);
52 va_start(args,roi_out);
55 const int size = va_arg(args,
int);
56 float **bufptr = va_arg(args,
float**);
65 nfloats = channels * roi_out->
width * roi_out->
height;
68 nfloats = channels * roi_out->
height;
71 nfloats = channels * roi_out->
width;
77 nfloats = channels * roi_in->
width * roi_in->
height;
80 nfloats = channels * roi_in->
height;
83 nfloats = channels * roi_in->
width;
102 memset(*bufptr, 0, nfloats *
sizeof(
float));
115 va_start(args,roi_out);
118 const int size = va_arg(args,
int);
119 float **bufptr = va_arg(args,
float**);
121 (
void)va_arg(args,
size_t*);
141 if (nfloats > parallel_imgop_minimum)
146#pragma omp parallel for simd aligned(in, out : 16) default(firstprivate)
147 for(
size_t k = 0;
k < nfloats;
k++)
153 memcpy(
out, in, nfloats *
sizeof(
float));
161 const dt_iop_roi_t *
const __restrict__ roi_out,
const int zero_pad)
163 if (roi_in->width == roi_out->width && roi_in->height == roi_out->height)
168 else if (roi_in->width <= roi_out->width && roi_in->height <= roi_out->height)
171 fprintf(stderr,
"copy_image_roi with larger output not yet implemented\n");
174 else if (roi_in->width >= roi_out->width && roi_in->height >= roi_out->height)
177 fprintf(stderr,
"copy_image_roi with smaller output not yet implemented\n");
183 fprintf(stderr,
"copy_image_roi called with inconsistent RoI!\n");
194 if (nfloats > parallel_imgop_minimum)
199#pragma omp parallel for simd aligned(buf, src : 16) default(firstprivate)
200 for(
size_t k = 0;
k < nfloats;
k++)
201 buf[
k] = scale * src[
k];
207#pragma omp simd aligned(buf, src : 16)
209 for (
size_t k = 0;
k < nfloats;
k++)
210 buf[
k] = scale * src[
k];
219 if (nfloats > parallel_imgop_minimum)
223 const size_t chunksize = (((nfloats + nthreads - 1) / nthreads) + 3) / 4;
224#pragma omp parallel for default(firstprivate) num_threads(nthreads)
225 for(
size_t chunk = 0; chunk < nthreads; chunk++)
227#pragma omp simd aligned(buf:16)
228 for(
size_t k = 4 * chunk * chunksize;
k <
MIN(4*(chunk+1)*chunksize, nfloats);
k++)
235 if (fill_value == 0.0f)
238 memset(buf, 0,
sizeof(
float) * nfloats);
243#pragma omp simd aligned(buf:16)
245 for (
size_t k = 0;
k < nfloats;
k++)
256 if (nfloats > parallel_imgop_minimum)
261#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
262 for(
size_t k = 0;
k < nfloats;
k++)
269#pragma omp simd aligned(buf:16)
271 for (
size_t k = 0;
k < nfloats;
k++)
281 if (nfloats > parallel_imgop_minimum)
286#pragma omp parallel for simd aligned(buf, other_image : 16) default(firstprivate)
287 for(
size_t k = 0;
k < nfloats;
k++)
288 buf[
k] += other_image[
k];
294#pragma omp simd aligned(buf, other_image : 16)
296 for (
size_t k = 0;
k < nfloats;
k++)
297 buf[
k] += other_image[
k];
306 if (nfloats > parallel_imgop_minimum)
311#pragma omp parallel for simd aligned(buf, other_image : 16) default(firstprivate)
312 for(
size_t k = 0;
k < nfloats;
k++)
313 buf[
k] -= other_image[
k];
319#pragma omp simd aligned(buf, other_image : 16)
321 for (
size_t k = 0;
k < nfloats;
k++)
322 buf[
k] -= other_image[
k];
331 if (nfloats > parallel_imgop_minimum)
336#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
337 for(
size_t k = 0;
k < nfloats;
k++)
338 buf[
k] = max_value - buf[
k];
344#pragma omp simd aligned(buf:16)
346 for (
size_t k = 0;
k < nfloats;
k++)
347 buf[
k] = max_value - buf[
k];
356 if (nfloats > parallel_imgop_minimum)
361#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
362 for(
size_t k = 0;
k < nfloats;
k++)
369#pragma omp simd aligned(buf:16)
371 for (
size_t k = 0;
k < nfloats;
k++)
381 if (nfloats > parallel_imgop_minimum)
386#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
387 for(
size_t k = 0;
k < nfloats;
k++)
394#pragma omp simd aligned(buf:16)
396 for (
size_t k = 0;
k < nfloats;
k++)
406 const float lambda_1 = 1.0f - lambda;
408 if (nfloats > parallel_imgop_minimum/2)
413#pragma omp parallel for simd aligned(buf:16) default(firstprivate)
414 for(
size_t k = 0;
k < nfloats;
k++)
415 buf[
k] = lambda*buf[
k] + lambda_1*other[
k];
421#pragma omp simd aligned(buf:16)
423 for (
size_t k = 0;
k < nfloats;
k++)
424 buf[
k] = lambda*buf[
k] + lambda_1*other[
k];
const dt_colormatrix_t dt_aligned_pixel_t out
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
#define dt_pixelpipe_cache_free_align(mem)
#define __DT_CLONE_TARGETS__
#define dt_pixelpipe_cache_alloc_perthread_float(n, padded_size)
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
__DT_CLONE_TARGETS__ void dt_iop_image_add_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_mul_const(float *const buf, const float mul_value, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_copy(float *const __restrict__ out, const float *const __restrict__ in, const size_t nfloats)
__DT_CLONE_TARGETS__ void dt_iop_image_sub_image(float *const buf, const float *const other_image, const size_t width, const size_t height, const size_t ch)
int dt_iop_alloc_image_buffers(struct dt_iop_module_t *const module, const struct dt_iop_roi_t *const roi_in, const struct dt_iop_roi_t *const roi_out,...)
__DT_CLONE_TARGETS__ void dt_iop_image_add_const(float *const buf, const float add_value, const size_t width, const size_t height, const size_t ch)
void dt_iop_copy_image_roi(float *const __restrict__ out, const float *const __restrict__ in, const size_t ch, const dt_iop_roi_t *const __restrict__ roi_in, const dt_iop_roi_t *const __restrict__ roi_out, const int zero_pad)
__DT_CLONE_TARGETS__ void dt_iop_image_invert(float *const buf, const float max_value, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_fill(float *const buf, const float fill_value, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_linear_blend(float *const restrict buf, const float lambda, const float *const restrict other, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_scaled_copy(float *const restrict buf, const float *const restrict src, const float scale, const size_t width, const size_t height, const size_t ch)
__DT_CLONE_TARGETS__ void dt_iop_image_div_const(float *const buf, const float div_value, const size_t width, const size_t height, const size_t ch)
#define DT_IMGSZ_ROI_MASK
#define DT_IMGSZ_CLEARBUF
static void dt_iop_image_copy_by_size(float *const __restrict__ out, const float *const __restrict__ in, const size_t width, const size_t height, const size_t ch)
#define DT_IMGSZ_PERTHREAD
#define DT_IMGSZ_DIM_MASK
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
int32_t num_openmp_threads
Region of interest passed through the pixelpipe.