123static gboolean _cl_is_zero_copy_image(
const int devid, cl_mem mem,
void *host_ptr,
const dt_iop_roi_t *roi,
126 if(devid < 0 || !mem || !host_ptr || !roi || roi->width <= 0 || roi->
height <= 0 ||
bpp == 0)
return FALSE;
128 void *mapped = dt_opencl_map_image(devid, mem,
TRUE, CL_MAP_READ, roi->
width, roi->
height, (
int)
bpp);
129 if(!mapped)
return FALSE;
131 const gboolean ptr_matches = (mapped == host_ptr);
132 const gboolean is_zero_copy = ptr_matches;
133 const cl_int unmap_err = dt_opencl_unmap_mem_object(devid, mem, mapped);
134 if(unmap_err != CL_SUCCESS)
return FALSE;
162static void *_gpu_try_reuse_pinned_from_cache(
dt_pixel_cache_entry_t *cache_entry,
void *host_ptr,
int devid,
164 int *out_cst, gboolean *out_reused)
166 if(out_reused) *out_reused =
FALSE;
167 if(!cache_entry || !host_ptr || devid < 0)
return NULL;
174 if(out_reused) *out_reused =
TRUE;
175 if(out_cst && cached_cst !=
IOP_CS_NONE) *out_cst = cached_cst;
181static inline gboolean _is_gamma_rgba8_output(
const dt_iop_module_t *module,
const size_t bpp,
184 return module && message && bpp == 4 * sizeof(uint8_t) && strcmp(module->op, "gamma") == 0
185 && strcmp(message, "output") == 0;
188static void *_gpu_alloc_device_with_flush(
int devid,
const dt_iop_roi_t *roi,
const size_t bpp,
193 gboolean *out_reused);
208static void *_gpu_get_pinned_or_alloc(
int devid,
void *host_ptr,
const dt_iop_roi_t *roi,
const size_t bpp,
210 int *out_cst, gboolean *out_reused,
213 const int flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
215 const gboolean gamma_rgba8 = _is_gamma_rgba8_output(module,
bpp, message);
216 const int cl_bpp = gamma_rgba8 ? DT_OPENCL_BPP_ENCODE_RGBA8((
int)
bpp) : (int)
bpp;
218 if(out_reused) *out_reused =
FALSE;
221 mem = _gpu_try_reuse_pinned_from_cache(cache_entry, host_ptr, devid, roi,
bpp,
flags, out_cst, out_reused);
224 mem = dt_opencl_alloc_device_use_host_pointer(devid, roi->
width, roi->
height, cl_bpp, host_ptr,
flags);
230 mem = _gpu_try_reuse_pinned_from_cache(cache_entry, host_ptr, devid, roi,
bpp,
flags, out_cst, out_reused);
232 mem = dt_opencl_alloc_device_use_host_pointer(devid, roi->
width, roi->
height, cl_bpp, host_ptr,
flags);
247static void *_gpu_alloc_device_with_flush(
int devid,
const dt_iop_roi_t *roi,
const size_t bpp,
251 const gboolean gamma_rgba8 = _is_gamma_rgba8_output(module,
bpp, message);
252 const int cl_bpp = gamma_rgba8 ? DT_OPENCL_BPP_ENCODE_RGBA8((
int)
bpp) : (int)
bpp;
253 void *mem = dt_opencl_alloc_device(devid, roi->
width, roi->
height, cl_bpp);
257 mem = dt_opencl_alloc_device(devid, roi->
width, roi->
height, cl_bpp);
264 gboolean *out_reused)
266 if(out_reused) *out_reused =
FALSE;
267 if(!cache_entry || devid < 0 || !roi || roi->width <= 0 || roi->
height <= 0 ||
bpp == 0)
return NULL;
272 CL_MEM_READ_WRITE, NULL);
273 if(mem && out_reused) *out_reused =
TRUE;
284static void _gpu_log_pinned_reuse(
dt_iop_module_t *module,
const gboolean reused_from_cache)
289 if(reused_from_cache)
294 "[opencl_pixelpipe] %s reused pinned input from cache (hits=%d, misses=%d)\n",
295 module ? module->name() :
"unknown", hits, misses);
324static void *_gpu_init_buffer(
int devid,
void *
const host_ptr,
const dt_iop_roi_t *roi,
const size_t bpp,
327 const gboolean reuse_device,
328 int *out_cst, gboolean *out_reused,
void *keep)
331 void *cl_mem_input = NULL;
332 gboolean reused_from_cache =
FALSE;
333 const gboolean allow_reuse_pinned = reuse_pinned;
334 const gboolean allow_reuse_device = reuse_device;
336 if(out_reused) *out_reused =
FALSE;
340 cl_mem_input = _gpu_get_pinned_or_alloc(devid, host_ptr, roi,
bpp, cache_entry, allow_reuse_pinned,
341 out_cst, &reused_from_cache, module, message);
345 if(allow_reuse_device)
346 cl_mem_input = _gpu_try_reuse_device_from_cache(cache_entry, devid, roi,
bpp, &reused_from_cache);
349 cl_mem_input = _gpu_alloc_device_with_flush(devid, roi,
bpp, module, message, keep);
352 if(cl_mem_input == NULL)
355 module ? module->
op :
"unknown");
357 else if(allow_reuse_pinned && cache_entry && host_ptr)
359 if(out_reused) *out_reused = reused_from_cache;
360 _gpu_log_pinned_reuse(module, reused_from_cache);
362 else if(allow_reuse_device && cache_entry && !host_ptr && out_reused)
364 *out_reused = reused_from_cache;
393 const gboolean cache_device)
395 if(cl_mem_buffer && *cl_mem_buffer != NULL)
397 cl_mem mem = *cl_mem_buffer;
399 const gboolean can_cache_pinned = (cache_entry && host_ptr && (
flags & CL_MEM_USE_HOST_PTR));
400 const gboolean can_cache_device = (cache_entry && !host_ptr && cache_device && !(
flags & CL_MEM_USE_HOST_PTR));
401 const gboolean can_cache = (can_cache_pinned || can_cache_device);
404 const int devid = dt_opencl_get_mem_context_id(mem);
405 const int width = dt_opencl_get_image_width(mem);
406 const int height = dt_opencl_get_image_height(mem);
407 const int bpp = dt_opencl_get_image_element_size(mem);
408 const int tracked_flags = can_cache_device ? CL_MEM_READ_WRITE : (int)
flags;
416 *cl_mem_buffer = NULL;
448static int _cl_pinned_memory_copy(
const int devid,
void *host_ptr,
void *cl_mem_buffer,
const dt_iop_roi_t *roi,
451 if(!host_ptr || !cl_mem_buffer)
return 1;
453 const cl_mem mem = (cl_mem)cl_mem_buffer;
457 if(
flags & CL_MEM_USE_HOST_PTR)
459 void *mapped = dt_opencl_map_image(devid, mem,
TRUE, cl_mode, roi->
width, roi->
height, (
int)
bpp);
462 const gboolean ptr_matches = (mapped == host_ptr);
463 const cl_int unmap_err = dt_opencl_unmap_mem_object(devid, mem, mapped);
464 if(unmap_err != CL_SUCCESS)
return 1;
473 "[opencl_pixelpipe] successfully synced image %s via map/unmap for module %s (%s)\n",
474 (cl_mode == CL_MAP_WRITE) ?
"host to device" :
"device to host",
475 (module) ? module->op :
"base buffer", message);
482 cl_int err = CL_SUCCESS;
483 if(cl_mode == CL_MAP_WRITE)
484 err = dt_opencl_write_host_to_device(devid, host_ptr, mem, roi->
width, roi->
height, (
int)
bpp);
485 else if(cl_mode == CL_MAP_READ)
486 err = dt_opencl_read_host_from_device(devid, host_ptr, mem, roi->
width, roi->
height, (
int)
bpp);
490 if(err != CL_SUCCESS)
493 (cl_mode == CL_MAP_WRITE) ?
"host to device" :
"device to host",
494 (module) ? module->op :
"base buffer", message);
499 (cl_mode == CL_MAP_WRITE) ?
"host to device" :
"device to host",
500 (module) ? module->op :
"base buffer", message);
519static float *_resync_input_gpu_to_cache(
dt_dev_pixelpipe_t *pipe,
float *input,
void *cl_mem_input,
525 if(!cl_mem_input)
return input;
528 int fail = _cl_pinned_memory_copy(pipe->
devid, input, cl_mem_input, roi_in, CL_MAP_READ, in_bpp, module, message);
531 if(!fail) input_format->
cst = input_cst_cl;
538 input_format->
cst = input_cst_cl;
574 if(!locked_input_entry)
return 1;
575 *locked_input_entry = NULL;
577 if(*cl_mem_input != NULL)
584 const cl_mem mem = (cl_mem)*cl_mem_input;
586 if(
flags & CL_MEM_USE_HOST_PTR)
587 if(_cl_is_zero_copy_image(pipe->
devid, mem, input, roi_in, in_bpp))
590 *locked_input_entry = input_entry;
604 gboolean input_reused_from_cache =
FALSE;
605 *cl_mem_input = _gpu_init_buffer(pipe->
devid, input, roi_in, in_bpp, module,
"input", input_entry,
606 TRUE,
FALSE, input_cst_cl, &input_reused_from_cache, keep);
607 int fail = (*cl_mem_input == NULL);
612 gboolean keep_lock =
FALSE;
614 if(!fail && *cl_mem_input)
616 mem = (cl_mem)*cl_mem_input;
618 if(
flags & CL_MEM_USE_HOST_PTR)
619 keep_lock = _cl_is_zero_copy_image(pipe->
devid, mem, input, roi_in, in_bpp);
622 if(!fail && mem && !keep_lock)
624 const cl_int err = dt_opencl_write_host_to_device(pipe->
devid, input, mem, roi_in->
width, roi_in->
height,
626 if(err != CL_SUCCESS)
629 (module) ? module->
op :
"base buffer",
"cache to input");
635 (module) ? module->
op :
"base buffer",
"cache to input");
643 *locked_input_entry = input_entry;
662 const gboolean cache_device)
668 if(cl_mem_buffer) *cl_mem_buffer = NULL;
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
int dt_atomic_get_int(dt_atomic_int *var)
Definition atomic.h:63
int dt_atomic_add_int(dt_atomic_int *var, int incr)
Definition atomic.h:66
atomic_int dt_atomic_int
Definition atomic.h:60
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
dt_iop_colorspace_type_t
Definition color_conversion.h:30
@ IOP_CS_NONE
Definition color_conversion.h:31
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
darktable_t darktable
Definition darktable.c:178
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1528
@ DT_DEBUG_OPENCL
Definition darktable.h:642
dt_mipmap_buffer_dsc_flags flags
Definition mipmap_cache.c:4
static unsigned long dt_opencl_get_mem_flags(void *mem)
Definition opencl.h:615
static gboolean dt_opencl_finish(const int devid)
Definition opencl.h:526
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:619
static void dt_opencl_events_wait_for(const int devid)
Definition opencl.h:629
void dt_dev_pixelpipe_cache_rdlock_entry(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
Lock or release the read lock on the entry.
Definition pixelpipe_cache.c:1501
void dt_pixel_cache_clmem_put(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid, int width, int height, int bpp, int flags, int cst, void *mem)
Definition pixelpipe_cache.c:531
void dt_dev_pixelpipe_cache_wrlock_entry(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
Lock or release the write lock on the entry.
Definition pixelpipe_cache.c:1482
void dt_dev_pixelpipe_cache_flush_clmem(dt_dev_pixelpipe_cache_t *cache, const int devid, void *keep)
Release cached OpenCL buffers for a device (-1 for all).
Definition pixelpipe_cache.c:379
void dt_pixel_cache_clmem_remove(dt_pixel_cache_entry_t *entry, void *mem)
Definition pixelpipe_cache.c:577
void * dt_pixel_cache_clmem_get(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid, int width, int height, int bpp, int flags, int *out_cst)
Definition pixelpipe_cache.c:488
Pixelpipe cache for storing intermediate results in the pixelpipe.
#define DT_PIXELPIPE_CACHE_HASH_INVALID
Definition pixelpipe_cache.h:41
static void _gpu_clear_buffer(void **cl_mem_buffer, dt_pixel_cache_entry_t *cache_entry, void *host_ptr, int cst, const gboolean cache_device)
No-OpenCL stub for _gpu_clear_buffer().
Definition pixelpipe_cache_cl.c:661
struct dt_dev_pixelpipe_cache_t * pixelpipe_cache
Definition darktable.h:717
Definition pixelpipe_hb.h:179
int devid
Definition pixelpipe_hb.h:259
Definition develop/format.h:48
int cst
Definition develop/format.h:74
GModule *dt_dev_operation_t op
Definition imageop.h:227
int width
Definition imageop.h:68
int height
Definition imageop.h:68
Definition pixelpipe_cache.h:78