Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
pixelpipe_cache.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2009-2012, 2015 johannes hanika.
4 Copyright (C) 2010-2011 Henrik Andersson.
5 Copyright (C) 2011 Robert Bieber.
6 Copyright (C) 2011 Rostyslav Pidgornyi.
7 Copyright (C) 2012 Richard Wonka.
8 Copyright (C) 2012-2014, 2016 Tobias Ellinghaus.
9 Copyright (C) 2013-2014, 2016 Roman Lebedev.
10 Copyright (C) 2014 Ulrich Pegelow.
11 Copyright (C) 2019, 2023-2026 Aurélien PIERRE.
12 Copyright (C) 2019-2021 Pascal Obry.
13 Copyright (C) 2020, 2022 Hanno Schwalm.
14 Copyright (C) 2020 Ralf Brown.
15 Copyright (C) 2021 Aldric Renaudin.
16 Copyright (C) 2021 Dan Torop.
17 Copyright (C) 2022 Martin Bařinka.
18 Copyright (C) 2023 lologor.
19 Copyright (C) 2024 Alynx Zhou.
20 Copyright (C) 2025-2026 Guillaume Stutin.
21 Copyright (C) 2025 Miguel Moquillon.
22
23 darktable is free software: you can redistribute it and/or modify
24 it under the terms of the GNU General Public License as published by
25 the Free Software Foundation, either version 3 of the License, or
26 (at your option) any later version.
27
28 darktable is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
32
33 You should have received a copy of the GNU General Public License
34 along with darktable. If not, see <http://www.gnu.org/licenses/>.
35*/
36
37#include <inttypes.h>
38#include <glib.h>
39#include <stdlib.h>
40#include <signal.h>
41#include <string.h>
42
43#include "control/control.h"
44#include "control/signal.h"
46#include "develop/pixelpipe.h"
47#include "common/darktable.h"
48#include "common/debug.h"
49#include "common/opencl.h"
50#include "develop/format.h"
51
52static __thread const char *dt_pixelpipe_cache_current_module = NULL;
53
55 const uint64_t key);
56
57static inline const char *_cache_debug_module_name(void)
58{
60}
61
62static void _trace_exact_hit(const char *phase, const uint64_t hash, dt_pixel_cache_entry_t *cache_entry,
63 void *data, void *cl_mem_output, const int preferred_devid, const gboolean verbose)
64{
65 if(!(darktable.unmuted & DT_DEBUG_PIPECACHE)) return;
66 if(verbose && !(darktable.unmuted & DT_DEBUG_VERBOSE)) return;
67
69 "[pixelpipe_cache] exact-hit %s req=%" PRIu64 " entry=%" PRIu64 "/%" PRIu64
70 " data=%p cl=%p refs=%i auto=%i dev=%i module=%s name=%s\n",
71 phase, hash, cache_entry ? cache_entry->hash : DT_PIXELPIPE_CACHE_HASH_INVALID,
72 cache_entry ? cache_entry->serial : 0, data, cl_mem_output,
73 cache_entry ? dt_atomic_get_int(&cache_entry->refcount) : -1,
74 cache_entry ? cache_entry->auto_destroy : -1, preferred_devid, _cache_debug_module_name(),
75 (cache_entry && cache_entry->name) ? cache_entry->name : "-");
76}
77
78const char *dt_pixelpipe_cache_set_current_module(const char *module)
79{
80 const char *previous = dt_pixelpipe_cache_current_module;
82 return previous;
83}
84
85typedef struct dt_cache_clmem_t
86{
87 void *host_ptr;
88 void *mem;
89 int refs;
91
100
101
103 dt_pixel_cache_entry_t *cache_entry);
104static void _free_cache_entry(dt_pixel_cache_entry_t *cache_entry);
105static void _pixelpipe_cache_finalize_entry(dt_pixel_cache_entry_t *cache_entry, void **data,
106 const char *message);
107int _non_thread_safe_cache_remove(dt_dev_pixelpipe_cache_t *cache, const gboolean force,
108 dt_pixel_cache_entry_t *cache_entry, GHashTable *table);
109
111 const uint64_t hash, const size_t size,
112 const char *name, const int id);
113static dt_pixel_cache_entry_t *dt_pixel_cache_new_entry(const uint64_t hash, const size_t size,
114 const char *name, const int id,
115 dt_dev_pixelpipe_cache_t *cache, gboolean alloc,
116 GHashTable *table);
117static gboolean _cache_entry_clmem_flush_device(dt_pixel_cache_entry_t *entry, const int devid);
118static gboolean _cache_entry_materialize_host_data_locked(dt_pixel_cache_entry_t *entry, int preferred_devid,
119 gboolean prefer_device_payload);
121
122#ifdef HAVE_OPENCL
123static gboolean _cache_entry_clmem_flush_host_pinned_locked(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid);
124#endif
125
127{
128 if(IS_NULL_PTR(cache) || IS_NULL_PTR(host_ptr)) return NULL;
129
130 const uint64_t hash = (uint64_t)(uintptr_t)host_ptr;
132 if(entry && entry->external_alloc && entry->data == host_ptr) return entry;
133 return NULL;
134}
135
137 const uint64_t key)
138{
139 dt_pixel_cache_entry_t *entry = (dt_pixel_cache_entry_t *)g_hash_table_lookup(table, &key);
140 return entry;
141}
142
143
152
153
155{
156 if(!cache || !data) return NULL;
157
159
160 GHashTableIter iter;
161 gpointer key, value;
162
163 /* Search regular entries table */
164 g_hash_table_iter_init(&iter, cache->entries);
165 while(g_hash_table_iter_next(&iter, &key, &value))
166 {
168 if(entry && entry->data == data)
169 {
171 return entry;
172 }
173 }
174
175 /* Search external entries table */
176 g_hash_table_iter_init(&iter, cache->external_entries);
177 while(g_hash_table_iter_next(&iter, &key, &value))
178 {
180 if(entry && entry->data == data)
181 {
183 return entry;
184 }
185 }
186
188 return NULL;
189}
190
191
193{
194 return cache_entry->size / (1024 * 1024);
195}
196
197
198static void _pixel_cache_message(dt_pixel_cache_entry_t *cache_entry, const char *message, gboolean verbose)
199{
200 if(!(darktable.unmuted & DT_DEBUG_PIPECACHE)) return;
201 if(verbose && !(darktable.unmuted & DT_DEBUG_VERBOSE)) return;
203 "[pixelpipe] cache entry %" PRIu64 "/%" PRIu64 ": %s (data=%p - %" G_GSIZE_FORMAT " MiB - age %" PRId64
204 " - hits %i - refs %i - auto %i - ext %i - id %i - module %s) %s\n",
205 cache_entry->hash, cache_entry->serial,
206 cache_entry->name ? cache_entry->name : "-", cache_entry->data,
207 _pixel_cache_get_size(cache_entry), cache_entry->age, cache_entry->hits,
208 dt_atomic_get_int(&cache_entry->refcount), cache_entry->auto_destroy,
209 cache_entry->external_alloc, cache_entry->id, _cache_debug_module_name(), message);
210}
211
212static void _pixelpipe_cache_finalize_entry(dt_pixel_cache_entry_t *cache_entry, void **data,
213 const char *message)
214{
215 cache_entry->age = g_get_monotonic_time(); // Update MRU timestamp
216 if(data)
217 *data = cache_entry->data ? __builtin_assume_aligned(cache_entry->data, DT_CACHELINE_BYTES) : NULL;
218 _pixel_cache_message(cache_entry, message, FALSE);
219}
220
222 const uint64_t hash,
223 void **data,
225{
226 if(!IS_NULL_PTR(data)) *data = NULL;
227 if(!IS_NULL_PTR(entry)) *entry = NULL;
228 if(IS_NULL_PTR(cache) || hash == DT_PIXELPIPE_CACHE_HASH_INVALID) return FALSE;
229
231 cache->queries++;
232
233 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, hash);
234 if(!IS_NULL_PTR(cache_entry) && !cache_entry->auto_destroy)
235 {
236 cache->hits++;
237 cache_entry->hits++;
238 _non_thread_safe_cache_ref_count_entry(cache, TRUE, cache_entry);
239 _pixelpipe_cache_finalize_entry(cache_entry, data, "ref-by-hash");
240 if(!IS_NULL_PTR(entry)) *entry = cache_entry;
241 }
242
243 const gboolean found = !IS_NULL_PTR(cache_entry) && !cache_entry->auto_destroy;
245 return found;
246}
247
248
249// remove the cache entry with the given hash and update the cache memory usage
250// WARNING: not internally thread-safe, protect its calls with mutex lock
251// return 0 on success, 1 on error
253 dt_pixel_cache_entry_t *cache_entry, GHashTable *table)
254{
255 if(!IS_NULL_PTR(cache_entry))
256 {
257 // Returns 1 if the lock is captured by another thread
258 // 0 if WE capture the lock, and then need to release it
259 gboolean locked = dt_pthread_rwlock_trywrlock(&cache_entry->lock);
260 if(!locked) dt_pthread_rwlock_unlock(&cache_entry->lock);
261 gboolean used = dt_atomic_get_int(&cache_entry->refcount) > 0;
262
263 /* Force-removal may bypass caller lifecycle checks but must never destroy
264 * an entry that still has active readers/writers. Active users can still
265 * access cl_mem_list after this call (for example borrowed GPU payloads),
266 * so removing a referenced entry here would create dangling pointers. */
267 if(!used && (!locked || force))
268 {
269 // Note: the free callback takes care of flushing OpenCL buffers too
270 g_hash_table_remove(table, &cache_entry->hash);
271 return 0;
272 }
273 else if(used)
274 _pixel_cache_message(cache_entry, "cannot remove: used", TRUE);
275 else if(locked)
276 _pixel_cache_message(cache_entry, "cannot remove: locked", TRUE);
277 }
278 else
279 {
280 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe] cache entry not found, will not be removed\n");
281 }
282 return 1;
283}
284
285
287 dt_pixel_cache_entry_t *cache_entry)
288{
290 int error = _non_thread_safe_cache_remove(cache, force, cache_entry, cache->entries);
292 return error;
293}
294
295#ifdef HAVE_OPENCL
296static gboolean _cache_entry_materialize_host_data_locked(dt_pixel_cache_entry_t *entry, int preferred_devid,
297 gboolean prefer_device_payload)
298{
299 dt_cache_clmem_t *source = NULL;
300 gboolean ok = FALSE;
302
303 /* We materialize RAM from the most authoritative cached payload in one pass instead of
304 * walking the list multiple times with slightly different predicates:
305 * - when RAM existed before, prefer pinned host-backed payloads first because they should
306 * already alias the cacheline or be the cheapest path back to host,
307 * - when RAM has just been allocated for a GPU-only cacheline, prefer device payloads first,
308 * - if a preferred OpenCL device is known, rank payloads from that device ahead of the rest.
309 * This keeps the fallback order explicit without scattering it over six loops. */
311 for(GList *l = g_list_first(entry->cl_mem_list); l; l = g_list_next(l))
312 {
313 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
314 if(IS_NULL_PTR(c) || IS_NULL_PTR(c->mem)) continue;
315
316 /* We are looking for one authoritative cached payload to materialize back to RAM.
317 * Only consider records whose live OpenCL context still matches the recorded device.
318 * Other cached payloads may belong to a different pipeline/device and must stay untouched. */
319 const int mem_devid = dt_opencl_get_mem_context_id((cl_mem)c->mem);
320 if(mem_devid != preferred_devid) continue;
321
322 const gboolean host_backed = (c->host_ptr == entry->data);
323 const gboolean device_only = (IS_NULL_PTR(c->host_ptr));
324 if(!host_backed && !device_only) continue;
325
327 if(!prefer_device_payload)
328 {
329 if(host_backed && (preferred_devid < 0 || mem_devid == preferred_devid))
331 else if(device_only && preferred_devid >= 0 && mem_devid == preferred_devid)
333 else if(device_only)
335 else if(host_backed)
337 }
338 else
339 {
340 if(device_only && preferred_devid >= 0 && mem_devid == preferred_devid)
342 else if(device_only)
344 else if(host_backed && (preferred_devid < 0 || mem_devid == preferred_devid))
346 else if(host_backed)
348 }
349
350 if(rank > best_rank)
351 {
352 best_rank = rank;
353 source = c;
355 }
356 }
357
358 if(source)
359 {
360
361 const int devid = dt_opencl_get_mem_context_id(source->mem);
362 const int width = dt_opencl_get_image_width(source->mem);
363 const int height = dt_opencl_get_image_height(source->mem);
364 const int bpp = dt_opencl_get_image_element_size(source->mem);
365
366 if(dt_opencl_is_pinned_memory((cl_mem)source->mem) && source->host_ptr == entry->data)
367 {
368 void *mapped = dt_opencl_map_image(devid, (cl_mem)source->mem, TRUE, CL_MAP_READ,
369 width, height, bpp);
370 ok = (dt_opencl_unmap_mem_object(devid, (cl_mem)source->mem, mapped) == CL_SUCCESS);
371 }
372 if(!ok)
373 {
374 ok = (dt_opencl_read_host_from_device(devid, entry->data, source->mem,
375 width, height, bpp) == CL_SUCCESS);
376 }
377 }
378
380 return ok;
381}
382#else
383static gboolean _cache_entry_materialize_host_data_locked(dt_pixel_cache_entry_t *entry, int preferred_devid,
384 gboolean prefer_device_payload)
385{
386 (void)preferred_devid;
387 (void)prefer_device_payload;
388 return entry && !IS_NULL_PTR(entry->data);
389}
390#endif
391
392static gboolean _cache_entry_materialize_host_data(dt_dev_pixelpipe_cache_t *cache, int preferred_devid,
394{
395 if(IS_NULL_PTR(cache) || IS_NULL_PTR(entry)) return FALSE;
396 if(preferred_devid < 0 && dt_pixel_cache_entry_get_data(entry) == NULL) return FALSE;
397
399 gboolean use_host_ptr = TRUE;
401 {
402 dt_pixel_cache_alloc(cache, entry);
403 use_host_ptr = FALSE;
404 }
405 const gboolean ok = _cache_entry_materialize_host_data_locked(entry, preferred_devid, use_host_ptr);
407
408 return ok;
409}
410
411#ifdef HAVE_OPENCL
412static gboolean _cache_entry_clmem_has_host_pinned_locked(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid)
413{
414 if(IS_NULL_PTR(entry) || IS_NULL_PTR(host_ptr)) return FALSE;
415
416 gboolean found = FALSE;
418 for(GList *l = g_list_first(entry->cl_mem_list); l; l = g_list_next(l))
419 {
420 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
421 if(IS_NULL_PTR(c) || IS_NULL_PTR(c->mem)) continue;
422
423 if(c->refs == 0 && devid == dt_opencl_get_mem_context_id((cl_mem)c->mem))
424 {
425 found = TRUE;
426 break;
427 }
428 }
430
431 return found;
432}
433
434static gboolean _cache_entry_clmem_flush_host_pinned_locked(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid)
435{
436 // If host_ptr is NULL, we don't have RAM cache for this buffer,
437 // so we can't flush the vRAM cache or we would loose it forever.
438 if(IS_NULL_PTR(entry) || IS_NULL_PTR(host_ptr)) return FALSE;
439
440 gboolean flushed = FALSE;
441
443 for(GList *l = g_list_first(entry->cl_mem_list); l;)
444 {
445 GList *next = g_list_next(l);
446 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
447 if(IS_NULL_PTR(c->mem))
448 {
449 // Current cacheline holds an empty buffer, no point keeping it
450 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
451 dt_free(c);
452 l = next;
453 continue;
454 }
455 if(dt_opencl_get_mem_context_id(c->mem) != devid)
456 {
457 // Current cacheline doesn't belong to current OpenCL devide: don't touch it
458 l = next;
459 continue;
460 }
461
462 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
464 dt_free(c);
465 flushed = TRUE;
466 l = next;
467 }
468
470
471 return flushed;
472}
473#endif
474
476{
477 // devid < 0 means the calling pipe never used OpenCL: it owns no device-side
478 // payload in the cache, so there is nothing of its own to release here.
479 //
480 // This used to also support devid == -1 as "drop cl_mem from every device,
481 // regardless of who else is using it", called from per-pipe cleanup. That ran
482 // without holding any dev[].lock, so it could race the eventlist/cl_mem
483 // bookkeeping of whichever OTHER pixelpipe was concurrently running on that
484 // device, corrupting it and crashing inside clGetEventInfo/clWaitForEvents
485 // (see #859 and #864). A global, all-devices teardown is never needed during
486 // normal operation: dt_cleanup() already finishes every device and
487 // dt_dev_pixelpipe_cache_cleanup() unconditionally releases all remaining
488 // cl_mem objects at application exit, when nothing else is running.
489 if(devid < 0) return;
490
491 // NOTE: the caller must hold darktable.opencl->dev[devid].lock -- either
492 // because it IS the pixelpipe currently running on that device (the lock
493 // dt_opencl_lock_device() handed it for the duration of its run), or because
494 // it explicitly took that lock to safely flush this device's cache entries
495 // after its own run finished (see dt_dev_pixelpipe_cache_flush_clmem_for_pipe()).
497
499 GHashTableIter iter;
500 gpointer key, value;
501 g_hash_table_iter_init(&iter, cache->entries);
502 while(g_hash_table_iter_next(&iter, &key, &value))
503 {
505
506 /* Only idle cachelines may have their vRAM reclaimed. An entry that is referenced or
507 * write-locked is somebody's live (or about-to-be-consumed) buffer: the recursion reserves
508 * an entry-level ref for the next consumer before that consumer borrows the cl_mem payload,
509 * so a payload can be unborrowed (per-payload refs == 0) yet still belong to an in-flight
510 * pipe. Honoring the same protection the LRU/removal paths use (refcount + non-blocking
511 * write-lock probe) keeps us from yanking the sole vRAM copy of another pipe's input out
512 * from under it -- which left a husk and produced skull thumbnails (issue #817). The
513 * trywrlock never waits, so this stays lightweight and cannot deadlock against renders that
514 * already hold entry locks. */
515 const gboolean used = dt_atomic_get_int(&entry->refcount) > 0;
516 gboolean locked = dt_pthread_rwlock_trywrlock(&entry->lock);
517 if(!locked) dt_pthread_rwlock_unlock(&entry->lock);
518 if(used || locked)
519 {
522 "[dt_dev_pixelpipe_cache_flush_clmem] entry %" PRIu64 " is in use (refcount=%i locked=%i), "
523 "keeping its vRAM\n", entry->hash, dt_atomic_get_int(&entry->refcount), locked);
524 continue;
525 }
526
529 "[dt_dev_pixelpipe_cache_flush_clmem] trying to flush vRAM for entry %" PRIu64 " on device %d...\n",
530 entry->hash, devid);
531
532 /* If reclaiming this device's vRAM leaves the entry with no buffer at all, delete it now
533 * instead of letting a payload-less husk persist as a cache hit. We hold cache->lock for the
534 * whole iteration, and lookups bump the consumer ref under that same lock, so no consumer can
535 * be mid-acquisition of this (refcount == 0) entry. iter_remove runs _free_cache_entry, which
536 * releases any remaining resources. */
537 if(_cache_entry_clmem_flush_device(entry, devid))
538 g_hash_table_iter_remove(&iter);
539 }
541}
542
543#ifdef HAVE_OPENCL
545{
546 // Like dt_dev_pixelpipe_cache_flush_clmem(), but for callers that do NOT
547 // currently hold darktable.opencl->dev[devid].lock -- typically a pipe's own
548 // cleanup, running after dt_dev_pixelpipe_process() already released that
549 // lock. Taking it here ensures we can't race the eventlist/cl_mem bookkeeping
550 // of whichever OTHER pixelpipe is now running on that device.
551 if(devid < 0 || IS_NULL_PTR(darktable.opencl) || !darktable.opencl->inited) return;
552
556}
557#else
559{
560 (void)cache;
561 (void)devid;
562}
563#endif
564
571
572
573// find the cache entry hash with the oldest use
574static void _cache_get_oldest(gpointer key, gpointer value, gpointer user_data)
575{
577 _cache_lru_t *lru = (_cache_lru_t *)user_data;
578
579 // Don't remove LRU entries that are still in use
580 // NOTE: with all the killswitches mechanisms and safety measures,
581 // we might have more things decreasing refcount than increasing it.
582 // It's no big deal though, as long as the (final output) backbuf
583 // is checked for NULL and not reused if pipeline is DIRTY.
584 if(cache_entry->age < lru->max_age)
585 {
586 // Returns 1 if the lock is captured by another thread
587 // 0 if WE capture the lock, and then need to release it
588 gboolean locked = dt_pthread_rwlock_trywrlock(&cache_entry->lock);
589 if(!locked) dt_pthread_rwlock_unlock(&cache_entry->lock);
590 gboolean used = dt_atomic_get_int(&cache_entry->refcount) > 0;
591
592 if(!locked && !used)
593 {
594 lru->max_age = cache_entry->age;
595 lru->hash = cache_entry->hash;
596 lru->cache_entry = cache_entry;
597 _pixel_cache_message(cache_entry, "candidate for deletion", TRUE);
598 }
599 else if(used)
600 _pixel_cache_message(cache_entry, "cannot be deleted: used", TRUE);
601 else if(locked)
602 _pixel_cache_message(cache_entry, "cannot be deleted: locked", TRUE);
603 }
604}
605
606static void _print_cache_lines(gpointer key, gpointer value, gpointer user_data)
607{
609 _pixel_cache_message(cache_entry, "", FALSE);
610}
611
612
613// remove the least used cache entry
614// return 0 on success, 1 on error
615// error is : we couldn't find a candidate for deletion because all entries are either locked or in use
616// or we found one but failed to remove it.
618{
619 _cache_lru_t *lru = (_cache_lru_t *)malloc(sizeof(_cache_lru_t));
620 lru->max_age = g_get_monotonic_time();
621 lru->hash = 0;
622 lru->cache_entry = NULL;
623 int error = 1;
624 g_hash_table_foreach(cache->entries, _cache_get_oldest, lru);
625
626 if(lru->hash > 0)
627 {
629 if(error)
630 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe] couldn't remove LRU %" PRIu64 "\n", lru->hash);
631 else
632 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe] LRU %" PRIu64 " removed. Total cache size: %" G_GSIZE_FORMAT " MiB\n",
633 lru->hash, cache->current_memory / (1024 * 1024));
634 }
635 else
636 {
637 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe] couldn't remove LRU, %i items and all are used\n", g_hash_table_size(cache->entries));
638 g_hash_table_foreach(cache->entries, _print_cache_lines, NULL);
639 }
640
641 dt_free(lru);
642 return error;
643}
644
645// return 0 on success 1 on error
653
654#ifdef HAVE_OPENCL
655static void *_pixel_cache_clmem_get(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid,
656 int width, int height, int bpp, int flags)
657{
659
662 "[_pixel_cache_clmem_get] %u output entries in %" PRIu64 "\n",
663 g_list_length(entry->cl_mem_list), entry->hash);
664
665 for(GList *l = g_list_first(entry->cl_mem_list); l;)
666 {
667 GList *next = g_list_next(l);
668 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
669 if(IS_NULL_PTR(c->mem))
670 {
671 // No point in keeping buffer-less cachelines
672 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
673 dt_free(c);
674 l = next;
675 continue;
676 }
677
678 // Buffer reuse must stay on the same OpenCL device and ensure proper size
679 if(dt_opencl_get_mem_context_id(c->mem) == devid
680 && dt_opencl_get_image_width(c->mem) == width
683 && c->refs == 0)
684 {
685 // Destroy the current OpenCL cacheline and return the buffer, the cacheline will be recreated
686 // when we are done consuming the buffer
687 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
688 void *mem = c->mem;
689 dt_free(c);
691 return mem;
692 }
693
694 l = next;
695 }
697
698 return NULL;
699}
700#endif
701
703 int width, int height, int bpp)
704{
705#ifdef HAVE_OPENCL
706
708
709 for(GList *l = g_list_first(entry->cl_mem_list); l; l = g_list_next(l))
710 {
711 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
712 if(dt_opencl_get_mem_context_id(c->mem) == devid
713 && dt_opencl_get_image_width(c->mem) == width
716 {
717 c->refs++;
718 void *mem = c->mem;
720 return mem;
721 }
722 }
724
725#endif
726
727 return NULL;
728}
729
731{
732#ifdef HAVE_OPENCL
733
734 if(IS_NULL_PTR(entry) || IS_NULL_PTR(mem)) return;
735
737 for(GList *l = entry->cl_mem_list; l; l = g_list_next(l))
738 {
739 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
740 if(c && c->mem == mem)
741 {
742 if(c->refs > 0) c->refs--;
743 break;
744 }
745 }
747
748#else
749 (void)entry;
750 (void)mem;
751#endif
752}
753
754#ifdef HAVE_OPENCL
764static int _pixel_cache_clmem_put(dt_pixel_cache_entry_t *entry, void *host_ptr, void *mem)
765{
766 cl_mem clmem = (cl_mem)mem;
767 const int devid = dt_opencl_get_mem_context_id(clmem);
768
770 for(GList *l = g_list_first(entry->cl_mem_list); l; l = g_list_next(l))
771 {
772 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
773 if(c->mem == mem)
774 {
776 return 3;
777 }
778 if(dt_opencl_get_mem_context_id(c->mem) == devid)
779 {
780 // We keep one GPU cacheline per GPU device per pipeline cache entry
781 // If refs > 0 here, we have a problem earlier.
782 if(c->refs > 0) continue;
783
784 void *old = c->mem;
785 c->mem = mem;
786 c->host_ptr = host_ptr;
789 return 2;
790 }
791 }
792
793 dt_cache_clmem_t *c = (dt_cache_clmem_t *)g_malloc0(sizeof(*c));
794 if(IS_NULL_PTR(c))
795 {
798 return 0;
799 }
800
801 c->host_ptr = host_ptr;
802 c->mem = mem;
803 entry->cl_mem_list = g_list_prepend(entry->cl_mem_list, c);
805 return 1;
806}
807
809{
810 if(IS_NULL_PTR(entry) || IS_NULL_PTR(mem)) return;
811
813 for(GList *l = entry->cl_mem_list; l;)
814 {
815 GList *next = g_list_next(l);
816 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
817 if(c && c->mem == mem)
818 {
819 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
820 dt_free(c);
821 }
822 l = next;
823 }
825}
826#endif
827
829{
831 for(GList *l = entry->cl_mem_list; l;)
832 {
833 GList *next = g_list_next(l);
834 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
835 if(c->refs > 0)
836 {
837 l = next;
838 continue;
839 }
840
841 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
843 dt_free(c);
844 l = next;
845 }
847}
848
849#ifdef HAVE_OPENCL
851 dt_pixel_cache_entry_t *entry_hint, int devid,
852 int width, int height, int bpp, int flags,
853 gboolean *out_reused)
854{
855 if(!IS_NULL_PTR(out_reused)) *out_reused = FALSE;
856 if(devid < 0 || width <= 0 || height <= 0 || bpp <= 0) return NULL;
857
858 // Pinning is enabled if the calling function requests it and if it is allowed by user for this device
859 gboolean use_pinned = dt_opencl_use_pinned_memory(devid) && (flags & CL_MEM_USE_HOST_PTR);
860
861 // If no pinning, remove the allocation flag now because pinning happens at vRAM alloc time
862 if(!use_pinned) flags &= ~CL_MEM_USE_HOST_PTR;
863
864 // Reuse the entry hint if available, else find the cache entry attached to the host_ptr
865 dt_pixel_cache_entry_t *entry = entry_hint;
866 if(IS_NULL_PTR(entry))
867 {
869 entry = _cache_entry_for_host_ptr_locked(cache, host_ptr);
871 }
872
873 // Reuse the vRAM buffer attached to the cache entry if any
874 void *mem = NULL;
875 if(entry)
876 {
877 mem = _pixel_cache_clmem_get(entry, host_ptr, devid, width, height, bpp, flags);
878 if(!IS_NULL_PTR(mem) && !IS_NULL_PTR(out_reused)) *out_reused = TRUE;
879 }
880
881 // If no vRAM buffer was found, allocate a new one, pinning the host_ptr memory if the option is enabled
882 if(IS_NULL_PTR(mem))
883 {
884 mem = dt_opencl_alloc_device_use_host_pointer(devid, width, height, bpp, use_pinned ? host_ptr : NULL, flags);
885 if(IS_NULL_PTR(mem)) return NULL;
886 }
887
888 gboolean synced = FALSE;
889
890 // Synchronize host_ptr with mem
892 {
893 // Zero-copy for pinned buffers : note that some drivers may still use non-zero-copy,
894 // in which case that degrades to basic memory copy.
895 void *mapped = dt_opencl_map_image(devid, mem, TRUE, CL_MAP_WRITE, width, height, bpp);
896 synced = (dt_opencl_unmap_mem_object(devid, mem, mapped) == CL_SUCCESS);
897 }
898
899 if(!synced)
900 {
901 // Zero-copy failed or pinned memory is disabled for this device : use plain memory transfer
902 if(dt_opencl_write_host_to_device(devid, host_ptr, mem, width, height, bpp) != CL_SUCCESS)
903 {
904 // Clean everything up on error and abort
905 if(entry) _pixel_cache_clmem_remove(entry, mem);
907 dt_print(DT_DEBUG_OPENCL, "[dt_dev_pixelpipe_cache_get_pinned_image] failed to synchronize\n");
908 return NULL;
909 }
910 else
911 {
912 dt_print(DT_DEBUG_OPENCL, "[dt_dev_pixelpipe_cache_get_pinned_image] synchronized with write_host_to_device\n");
913 }
914 }
915 else
916 {
917 dt_print(DT_DEBUG_OPENCL, "[dt_dev_pixelpipe_cache_get_pinned_image] synchronized with mapping/unmapping\n");
918 }
919
920 return mem;
921}
922
924 dt_pixel_cache_entry_t *entry_hint, void **mem)
925{
926 if(IS_NULL_PTR(mem) || IS_NULL_PTR(*mem) || IS_NULL_PTR(host_ptr)) return;
927 dt_pixel_cache_entry_t *entry = entry_hint;
928 if(IS_NULL_PTR(entry))
929 {
931 dt_print(DT_DEBUG_OPENCL, "[dt_dev_pixelpipe_cache_put_pinned_image] no cache entry to put the vRAM buffer\n");
932 return;
933 }
934
935 // FIXME: is it safe to cache non-pinned vRAM buffers (aka no CL_MEM_USE_HOST_PTR in flags) ?
936 const int state = _pixel_cache_clmem_put(entry, host_ptr, (cl_mem)*mem);
937 *mem = NULL;
939 dt_print(DT_DEBUG_OPENCL, "[dt_dev_pixelpipe_cache_put_pinned_image] cache entry put the vRAM buffer (state=%i) in %p\n", state, entry);
940}
941
943 dt_pixel_cache_entry_t *entry_hint, int devid)
944{
945 if(IS_NULL_PTR(cache) || IS_NULL_PTR(host_ptr)) return FALSE;
946
947 dt_pixel_cache_entry_t *entry = entry_hint;
948 if(IS_NULL_PTR(entry))
949 {
951 entry = _cache_entry_for_host_ptr_locked(cache, host_ptr);
953 }
954
955 if(IS_NULL_PTR(entry)) return FALSE;
956 if(!_cache_entry_clmem_has_host_pinned_locked(entry, host_ptr, devid)) return FALSE;
957
958 if(devid >= 0) dt_opencl_events_wait_for(devid);
960 const gboolean flushed = _cache_entry_clmem_flush_host_pinned_locked(entry, host_ptr, devid);
962 return flushed;
963}
964
965#else
966
968 dt_pixel_cache_entry_t *entry_hint, void **mem)
969{
970 (void)cache;
971 (void)host_ptr;
972 (void)entry_hint;
973 if(mem) *mem = NULL;
974}
975
977 dt_pixel_cache_entry_t *entry_hint, int devid)
978{
979 (void)cache;
980 (void)host_ptr;
981 (void)entry_hint;
982 (void)devid;
983 return FALSE;
984}
985
986void dt_dev_pixelpipe_cache_resync_host_pinned_image(dt_dev_pixelpipe_cache_t *cache, void *host_ptr,
987 dt_pixel_cache_entry_t *entry_hint, int devid)
988{
989 (void)cache;
990 (void)host_ptr;
991 (void)entry_hint;
992 (void)devid;
993}
994#endif
995
996#ifdef HAVE_OPENCL
997static inline gboolean _is_gamma_rgba8_output(const dt_iop_module_t *module, const size_t bpp,
998 const char *message)
999{
1000 return module && message && bpp == 4 * sizeof(uint8_t) && strcmp(module->op, "gamma") == 0
1001 && strcmp(message, "output") == 0;
1002}
1003
1004void *dt_dev_pixelpipe_cache_alloc_cl_device_buffer(int devid, const dt_iop_roi_t *roi, const size_t bpp,
1005 const dt_iop_module_t *module, const char *message,
1006 void *keep)
1007{
1008 const gboolean gamma_rgba8 = _is_gamma_rgba8_output(module, bpp, message);
1009 const int cl_bpp = gamma_rgba8 ? DT_OPENCL_BPP_ENCODE_RGBA8((int)bpp) : (int)bpp;
1010 return dt_opencl_alloc_device(devid, roi->width, roi->height, cl_bpp);
1011}
1012
1013void *dt_dev_pixelpipe_cache_get_cl_buffer(int devid, void *const host_ptr, const dt_iop_roi_t *roi,
1014 const size_t bpp, dt_iop_module_t *module,
1015 const char *message, dt_pixel_cache_entry_t *cache_entry,
1016 gboolean *out_reused, void *keep)
1017{
1018 // Need to use read-write mode because of in-place color space conversions.
1019 void *cl_mem_input = NULL;
1020 gboolean reused_from_cache = FALSE;
1021 const gboolean gamma_rgba8 = _is_gamma_rgba8_output(module, bpp, message);
1022 const int cl_bpp = gamma_rgba8 ? DT_OPENCL_BPP_ENCODE_RGBA8((int)bpp) : (int)bpp;
1023 static dt_atomic_int clmem_reuse_hits;
1024 static dt_atomic_int clmem_reuse_misses;
1025
1026 if(out_reused) *out_reused = FALSE;
1027
1028 if(host_ptr && dt_opencl_use_pinned_memory(devid))
1029 {
1030 const int flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
1031
1032 // Try to reuse existing buffer
1033 if(cache_entry)
1034 {
1035 cl_mem_input = _pixel_cache_clmem_get(cache_entry, host_ptr, devid, roi->width, roi->height,
1036 (int)bpp, flags);
1037 reused_from_cache = (!IS_NULL_PTR(cl_mem_input));
1038 }
1039
1040 // This will internally try to free up cache space if first alloc fails
1041 if(IS_NULL_PTR(cl_mem_input))
1042 {
1043 cl_mem_input = dt_opencl_alloc_device_use_host_pointer(devid, roi->width, roi->height, cl_bpp,
1044 host_ptr, flags);
1046 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] allocated a pinned GPU buffer for %s %s\n", module->name(), message);
1047 }
1048 }
1049 else
1050 {
1051 if(cache_entry)
1052 {
1053 /* Device-only allocations are tracked with a NULL host_ptr key and a normalized READ_WRITE
1054 * flag so scratch buffers can be reused deterministically across drivers. */
1055 cl_mem_input = _pixel_cache_clmem_get(cache_entry, NULL, devid, roi->width, roi->height,
1056 (int)bpp, CL_MEM_READ_WRITE);
1057 reused_from_cache = (!IS_NULL_PTR(cl_mem_input));
1058 }
1059
1060 // This will internally try to free up cache space if first alloc fails
1061 if(IS_NULL_PTR(cl_mem_input))
1062 {
1063 cl_mem_input = dt_dev_pixelpipe_cache_alloc_cl_device_buffer(devid, roi, bpp, module, message, keep);
1064
1066 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] allocated a device-only GPU buffer for %s %s\n", module->name(), message);
1067 }
1068 }
1069
1070 if(IS_NULL_PTR(cl_mem_input))
1071 {
1072 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] couldn't allocate GPU buffer for module %s %s\n", module->name(), message);
1073 }
1074 else if(reused_from_cache)
1075 {
1076 const int hits = dt_atomic_add_int(&clmem_reuse_hits, 1) + 1;
1077 const int misses = dt_atomic_get_int(&clmem_reuse_misses);
1080 "[dev_pixelpipe] reused GPU buffer from cache (hits=%d, misses=%d) for module %s %s\n",
1081 hits, misses, module->name(), message);
1082 }
1083 else
1084 {
1085 dt_atomic_add_int(&clmem_reuse_misses, 1);
1086 }
1087
1088 if(out_reused) *out_reused = reused_from_cache;
1089 return cl_mem_input;
1090}
1091
1115 void *host_ptr, const gboolean cache_device)
1116{
1117 if(!IS_NULL_PTR(cl_mem_buffer) && !IS_NULL_PTR(*cl_mem_buffer))
1118 {
1119 cl_mem mem = *cl_mem_buffer;
1120 if(cache_device && !IS_NULL_PTR(cache_entry))
1121 {
1122 _pixel_cache_clmem_put(cache_entry, host_ptr, mem);
1123 }
1124 else
1125 {
1126 if(!IS_NULL_PTR(cache_entry)) _pixel_cache_clmem_remove(cache_entry, mem);
1128 }
1129 *cl_mem_buffer = NULL;
1130 }
1131}
1132
1162int dt_dev_pixelpipe_cache_sync_cl_buffer(const int devid, void *host_ptr, void *cl_mem_buffer,
1163 const dt_iop_roi_t *roi, int cl_mode, size_t bpp,
1164 dt_iop_module_t *module, const char *message)
1165{
1166 if(IS_NULL_PTR(host_ptr) || IS_NULL_PTR(cl_mem_buffer)) return 1;
1167
1168 const cl_mem mem = (cl_mem)cl_mem_buffer;
1169
1170 // Fast path for true zero-copy pinned images: map/unmap is enough to synchronize host<->device.
1172 {
1173 void *mapped = dt_opencl_map_image(devid, mem, TRUE, cl_mode, roi->width, roi->height, (int)bpp);
1174 if(dt_opencl_unmap_mem_object(devid, mem, mapped) == CL_SUCCESS)
1175 {
1177 "[dev_pixelpipe] successfully synced image %s via map/unmap for module %s (%s)\n",
1178 (cl_mode == CL_MAP_WRITE) ? "host to device" : "device to host",
1179 (module) ? module->op : "base buffer", message);
1180 return 0;
1181 }
1182 }
1183
1184 // Fallback: explicit blocking transfers (safe on all drivers).
1185 cl_int err = CL_SUCCESS;
1186 if(cl_mode == CL_MAP_WRITE)
1187 err = dt_opencl_write_host_to_device(devid, host_ptr, mem, roi->width, roi->height, (int)bpp);
1188 else if(cl_mode == CL_MAP_READ)
1189 err = dt_opencl_read_host_from_device(devid, host_ptr, mem, roi->width, roi->height, (int)bpp);
1190 else
1191 return 1;
1192
1193 if(err != CL_SUCCESS)
1194 {
1195 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] couldn't copy image %s for module %s (%s)\n",
1196 (cl_mode == CL_MAP_WRITE) ? "host to device" : "device to host",
1197 (module) ? module->op : "base buffer", message);
1198 return 1;
1199 }
1200
1201 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] successfully copied image %s for module %s (%s)\n",
1202 (cl_mode == CL_MAP_WRITE) ? "host to device" : "device to host",
1203 (module) ? module->op : "base buffer", message);
1204 return 0;
1205}
1206
1221float *dt_dev_pixelpipe_cache_restore_cl_buffer(dt_dev_pixelpipe_t *pipe, float *input, void *cl_mem_input,
1222 const dt_iop_roi_t *roi_in, dt_iop_module_t *module,
1223 const size_t in_bpp, dt_pixel_cache_entry_t *input_entry,
1224 const char *message)
1225{
1226 if(IS_NULL_PTR(cl_mem_input)) return input;
1228
1229 const int fail = dt_dev_pixelpipe_cache_sync_cl_buffer(pipe->devid, input, cl_mem_input, roi_in,
1230 CL_MAP_READ, in_bpp, module, message);
1232 return fail ? NULL : input;
1233}
1234
1263 float *input, void **cl_mem_input,
1264 const dt_iop_roi_t *roi_in, const size_t in_bpp,
1265 dt_pixel_cache_entry_t *input_entry,
1266 dt_pixel_cache_entry_t **locked_input_entry, void *keep)
1267{
1268 if(IS_NULL_PTR(locked_input_entry)) return 1;
1269 *locked_input_entry = NULL;
1270
1271 if(!IS_NULL_PTR(*cl_mem_input))
1272 {
1273 // We passed the OpenCL memory buffer through directly on vRAM from previous module.
1274 // This is fast and efficient.
1275 // If it's a true zero-copy pinned image, keep the input cache entry read-locked until kernels complete,
1276 // otherwise another thread may overwrite host memory while the GPU is still reading it.
1277 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] %s will use its input directly from vRAM\n", module->name());
1278 const cl_mem mem = (cl_mem)*cl_mem_input;
1280 {
1282 *locked_input_entry = input_entry;
1283 }
1284 return 0;
1285 }
1286
1287 if(IS_NULL_PTR(input))
1288 {
1289 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] %s has no input (cache)\n", module->name());
1290 return 1;
1291 }
1292
1294
1295 // Try to reuse a cached pinned buffer; otherwise allocate a new pinned image backed by `input`.
1296 gboolean input_reused_from_cache = FALSE;
1297 *cl_mem_input = dt_dev_pixelpipe_cache_get_cl_buffer(pipe->devid, input, roi_in, in_bpp, module,
1298 "input", input_entry,
1299 &input_reused_from_cache, keep);
1300 int fail = (IS_NULL_PTR(*cl_mem_input));
1301
1302 // If the input is true zero-copy, the GPU will access host memory asynchronously: keep the cache
1303 // entry read-locked until all kernels have completed. If not, drivers may use a device-side copy
1304 // which must be synchronized from the host before running kernels.
1305 gboolean keep_lock = FALSE;
1306 cl_mem mem = NULL;
1307 if(!fail && *cl_mem_input)
1308 {
1309 mem = (cl_mem)*cl_mem_input;
1310 keep_lock = dt_opencl_is_pinned_memory(mem);
1311 }
1312
1313 /* A reused cached pinned image already carries the authoritative device payload from the
1314 * previous module output. Re-uploading host RAM here would overwrite that valid vRAM state
1315 * with whatever stale contents the host buffer still has when the previous stage stayed GPU-only.
1316 * Only freshly allocated pinned inputs need an explicit host->device copy. */
1317 if(!fail && mem && !keep_lock && !input_reused_from_cache)
1318 {
1319 const cl_int err = dt_opencl_write_host_to_device(pipe->devid, input, mem, roi_in->width, roi_in->height,
1320 (int)in_bpp);
1321 if(err != CL_SUCCESS)
1322 {
1323 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] couldn't copy image host to device for module %s (%s)\n",
1324 (module) ? module->op : "base buffer", "cache to input");
1325 fail = TRUE;
1326 }
1327 else
1328 {
1329 dt_print(DT_DEBUG_OPENCL, "[dev_pixelpipe] successfully copied image host to device for module %s (%s)\n",
1330 (module) ? module->op : "base buffer", "cache to input");
1331 }
1332 }
1333
1334 // Enforce sync with the CPU/RAM cache so lock validity is guaranteed.
1336
1337 if(keep_lock)
1338 *locked_input_entry = input_entry;
1339 else
1341
1342 return fail ? 1 : 0;
1343}
1344#else
1345void *dt_dev_pixelpipe_cache_get_cl_buffer(int devid, void *host_ptr, const dt_iop_roi_t *roi,
1346 size_t bpp, dt_iop_module_t *module, const char *message,
1348 gboolean *out_reused, void *keep)
1349{
1350 (void)devid;
1351 (void)host_ptr;
1352 (void)roi;
1353 (void)bpp;
1354 (void)module;
1355 (void)message;
1356 (void)entry;
1357 (void)keep;
1358 if(out_reused) *out_reused = FALSE;
1359 return NULL;
1360}
1361
1362void *dt_dev_pixelpipe_cache_alloc_cl_device_buffer(int devid, const dt_iop_roi_t *roi, size_t bpp,
1363 const dt_iop_module_t *module,
1364 const char *message, void *keep)
1365{
1366 (void)devid;
1367 (void)roi;
1368 (void)bpp;
1369 (void)module;
1370 (void)message;
1371 (void)keep;
1372 return NULL;
1373}
1374
1376 void *host_ptr, gboolean cache_device)
1377{
1378 (void)entry;
1379 (void)host_ptr;
1380 (void)cache_device;
1381 if(cl_mem_buffer) *cl_mem_buffer = NULL;
1382}
1383
1384int dt_dev_pixelpipe_cache_sync_cl_buffer(int devid, void *host_ptr, void *cl_mem_buffer,
1385 const dt_iop_roi_t *roi, int cl_mode, size_t bpp,
1386 dt_iop_module_t *module, const char *message)
1387{
1388 (void)devid;
1389 (void)host_ptr;
1390 (void)cl_mem_buffer;
1391 (void)roi;
1392 (void)cl_mode;
1393 (void)bpp;
1394 (void)module;
1395 (void)message;
1396 return 1;
1397}
1398
1400 void *cl_mem_input, const dt_iop_roi_t *roi_in,
1401 dt_iop_module_t *module, size_t in_bpp,
1402 dt_pixel_cache_entry_t *input_entry,
1403 const char *message)
1404{
1405 (void)pipe;
1406 (void)cl_mem_input;
1407 (void)roi_in;
1408 (void)module;
1409 (void)in_bpp;
1410 (void)input_entry;
1411 (void)message;
1412 return input;
1413}
1414
1416 float *input, void **cl_mem_input,
1417 const dt_iop_roi_t *roi_in, size_t in_bpp,
1418 dt_pixel_cache_entry_t *input_entry,
1419 dt_pixel_cache_entry_t **locked_input_entry,
1420 void *keep)
1421{
1422 (void)pipe;
1423 (void)module;
1424 (void)input;
1425 (void)cl_mem_input;
1426 (void)roi_in;
1427 (void)in_bpp;
1428 (void)input_entry;
1429 (void)locked_input_entry;
1430 (void)keep;
1431 return 1;
1432}
1433#endif
1434
1436 void *host_ptr)
1437{
1438 if(IS_NULL_PTR(cache) || IS_NULL_PTR(host_ptr)) return NULL;
1439
1440 dt_pthread_mutex_lock(&cache->lock);
1442 if(entry)
1445
1446 return entry;
1447}
1448
1449// Attempt to allocate from the arena; if fragmentation prevents it, evict LRU cache lines
1450// until a sufficiently large contiguous run is available (or nothing remains to evict).
1451static inline void *_arena_alloc_with_defrag(dt_dev_pixelpipe_cache_t *cache, size_t request_size,
1452 size_t *actual_size)
1453{
1454 void *buf = dt_cache_arena_alloc(&cache->arena, request_size, actual_size);
1455 if(!IS_NULL_PTR(buf)) return buf;
1456
1457 uint32_t pages_needed = 0;
1458 if(dt_cache_arena_calc(&cache->arena, request_size, &pages_needed, NULL))
1459 {
1460 dt_pthread_mutex_lock(&cache->lock);
1461 uint32_t total_free_pages = 0, largest_free_run_pages = 0;
1462 dt_cache_arena_stats(&cache->arena, &total_free_pages, &largest_free_run_pages);
1463
1464 while(largest_free_run_pages < pages_needed && g_hash_table_size(cache->entries) > 0)
1465 {
1467 dt_cache_arena_stats(&cache->arena, &total_free_pages, &largest_free_run_pages);
1468 }
1470 }
1471
1472 return dt_cache_arena_alloc(&cache->arena, request_size, actual_size);
1473}
1474
1475static inline void _arena_stats_bytes(dt_dev_pixelpipe_cache_t *cache, uint32_t *total_pages,
1476 uint32_t *largest_pages, size_t *total_bytes, size_t *largest_bytes)
1477{
1478 dt_cache_arena_stats(&cache->arena, total_pages, largest_pages);
1479 const size_t page_size = cache->arena.page_size ? cache->arena.page_size : 1;
1480 if(total_bytes) *total_bytes = (size_t)(*total_pages) * page_size;
1481 if(largest_bytes) *largest_bytes = (size_t)(*largest_pages) * page_size;
1482}
1483
1484static inline void _log_arena_allocation_failure(dt_dev_pixelpipe_cache_t *cache, size_t request_size,
1485 const char *entry_name, const char *module, uint64_t hash,
1486 gboolean name_is_file)
1487{
1488 uint32_t total_free_pages = 0, largest_free_run_pages = 0;
1489 size_t total_free_bytes = 0, largest_free_bytes = 0;
1490 _arena_stats_bytes(cache, &total_free_pages, &largest_free_run_pages, &total_free_bytes, &largest_free_bytes);
1491
1492 if(entry_name)
1493 fprintf(stdout,
1494 "[pixelpipe_cache] failed to allocate %" G_GSIZE_FORMAT " bytes for entry %" PRIu64 " (%s, module=%s) "
1495 "[arena largest=%" G_GSIZE_FORMAT " MiB, total=%" G_GSIZE_FORMAT " MiB, cache=%" G_GSIZE_FORMAT "/%" G_GSIZE_FORMAT " MiB]\n",
1496 request_size, hash, entry_name, module ? module : "unknown",
1497 largest_free_bytes / (1024 * 1024), total_free_bytes / (1024 * 1024),
1498 cache->current_memory / (1024 * 1024), cache->max_memory / (1024 * 1024));
1499 else
1500 fprintf(stdout,
1501 "[pixelpipe_cache] failed to allocate %" G_GSIZE_FORMAT " bytes for entry %" PRIu64 " (module=%s) "
1502 "[arena largest=%" G_GSIZE_FORMAT " MiB, total=%" G_GSIZE_FORMAT " MiB, cache=%" G_GSIZE_FORMAT "/%" G_GSIZE_FORMAT " MiB]\n",
1503 request_size, hash, module ? module : "unknown",
1504 largest_free_bytes / (1024 * 1024), total_free_bytes / (1024 * 1024),
1505 cache->current_memory / (1024 * 1024), cache->max_memory / (1024 * 1024));
1506
1507 if(!IS_NULL_PTR(entry_name) && !IS_NULL_PTR(module))
1508 dt_control_log(_("The pipeline cache is full while allocating `%s` (module `%s`). Either your RAM settings are too frugal or your RAM is too small."),
1509 entry_name, module);
1510 else if(!IS_NULL_PTR(entry_name))
1511 dt_control_log(_("The pipeline cache is full while allocating `%s`. Either your RAM settings are too frugal or your RAM is too small."),
1512 entry_name);
1513 else if(!IS_NULL_PTR(module))
1514 dt_control_log(_("The pipeline cache is full while processing module `%s`. Either your RAM settings are too frugal or your RAM is too small."),
1515 module);
1516 else
1517 dt_control_log(_("The pipeline cache is full. Either your RAM settings are too frugal or your RAM is too small."));
1518
1519 (void)name_is_file; // kept for signature symmetry if future callers need it.
1520}
1521
1522// keep: OpenCL buffer to NOT release
1523#ifdef HAVE_OPENCL
1524// Release this device's vRAM payloads for one entry. The caller has already established that
1525// the entry is idle (refcount == 0, not write-locked), so the device buffers are nobody's live
1526// input and reclaiming them honors the flush's purpose: free vRAM for later allocations.
1527// Returns TRUE if the entry holds no buffer at all afterwards (no host RAM, no vRAM on any
1528// device) and should therefore be evicted entirely instead of lingering as a husk.
1529static gboolean _cache_entry_clmem_flush_device(dt_pixel_cache_entry_t *entry, const int devid)
1530{
1531 // devid is always >= 0 here: dt_dev_pixelpipe_cache_flush_clmem() early-returns
1532 // otherwise. Only cachelines living on this specific device are candidates.
1534
1535 for(GList *l = g_list_first(entry->cl_mem_list); l;)
1536 {
1537 GList *next = g_list_next(l);
1538 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
1539 if(IS_NULL_PTR(c->mem))
1540 {
1541 // Don't keep cacheline with NULL buffer
1542 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
1543 dt_free(c);
1544 l = next;
1545 continue;
1546 }
1547
1548 gboolean referenced = c->refs > 0;
1549 gboolean not_ours = dt_opencl_get_mem_context_id(c->mem) != devid;
1550
1551 if(referenced || not_ours)
1552 {
1553 // Don't flush cachelines that don't belong to the current OpenCL device,
1554 // or are still borrowed by an in-flight GPU module (per-payload refs > 0).
1557 "[dt_dev_pixelpipe_cache_flush_clmem] for entry %" PRIu64 ": couldn't flush %p "
1558 "(referenced=%i not ours=%i)\n",
1559 entry->hash, c->mem, referenced, not_ours);
1560 l = next;
1561 continue;
1562 }
1563
1564 entry->cl_mem_list = g_list_delete_link(entry->cl_mem_list, l);
1566 dt_free(c);
1567 l = next;
1568 }
1569
1570 // A cacheline that now carries neither a host buffer nor any vRAM is a husk: the cache would
1571 // still hand it out as a hit, making a later consumer abort with "has no RAM nor vRAM input"
1572 // (issue #817 skull thumbnails). Signal the caller to delete it entirely.
1573 const gboolean empty = IS_NULL_PTR(entry->data) && IS_NULL_PTR(entry->cl_mem_list);
1575 return empty;
1576}
1577#else
1578static gboolean _cache_entry_clmem_flush_device(dt_pixel_cache_entry_t *entry, const int devid)
1579{
1580 return FALSE;
1581}
1582#endif
1583
1585{
1586 // allocate the data buffer
1587 if(IS_NULL_PTR(cache_entry->data))
1588 {
1589 cache_entry->data = _arena_alloc_with_defrag(cache, cache_entry->size, &cache_entry->size);
1590
1591 if(IS_NULL_PTR(cache_entry->data))
1592 {
1593 const char *module = dt_pixelpipe_cache_current_module;
1594 _log_arena_allocation_failure(cache, cache_entry->size, cache_entry->name, module,
1595 cache_entry->hash, FALSE);
1596 }
1597 }
1598
1599 return cache_entry->data;
1600}
1601
1603{
1604 return entry ? entry->data : NULL;
1605}
1606
1608{
1609 return entry ? entry->size : 0;
1610}
1611
1612// WARNING: non thread-safe
1613static int _free_space_to_alloc(dt_dev_pixelpipe_cache_t *cache, const size_t size, const uint64_t hash,
1614 const char *name)
1615{
1616 // Free up space if needed to match the max memory limit
1617 // If error, all entries are currently locked or in use, so we cannot free space to allocate a new entry.
1618 int error = 0;
1619 while(cache->current_memory + size > cache->max_memory && g_hash_table_size(cache->entries) > 0 && !error)
1621
1622 if(cache->current_memory + size > cache->max_memory)
1623 {
1624 const char *module = dt_pixelpipe_cache_current_module;
1625 const gboolean name_is_file = (!IS_NULL_PTR(name)) && (strchr(name, '/') != NULL) && (strchr(name, ':') != NULL);
1626 if(IS_NULL_PTR(name)) name = g_strdup("unknown");
1627
1628 if(hash)
1629 fprintf(stdout, "[pixelpipe] cache is full, cannot allocate new entry %" PRIu64 " (%s)\n", hash, name);
1630 else
1631 fprintf(stdout, "[pixelpipe] cache is full, cannot allocate new entry (%s)\n", name);
1632 if(!IS_NULL_PTR(name) && !IS_NULL_PTR(module) && name_is_file)
1633 dt_control_log(_("The pipeline cache is full while allocating `%s` (module `%s`). Either your RAM settings are too frugal or your RAM is too small."), name, module);
1634 else if(!IS_NULL_PTR(name))
1635 dt_control_log(_("The pipeline cache is full while allocating `%s`. Either your RAM settings are too frugal or your RAM is too small."), name);
1636 else if(!IS_NULL_PTR(module))
1637 dt_control_log(_("The pipeline cache is full while processing module `%s`. Either your RAM settings are too frugal or your RAM is too small."), module);
1638 else
1639 dt_control_log(_("The pipeline cache is full. Either your RAM settings are too frugal or your RAM is too small."));
1640 }
1641
1642 return error;
1643}
1644
1646 const char *name)
1647{
1648 // Free up space if needed to match the max memory limit
1649 // If error, all entries are currently locked or in use, so we cannot free space to allocate a new entry.
1650 dt_pthread_mutex_lock(&cache->lock);
1651 int error = _free_space_to_alloc(cache, size, 0, name);
1653
1654 if(error) return NULL;
1655
1656 // Page size is the desired size + AVX/SSE rounding
1657 size_t page_size = 0;
1658 void *buf = _arena_alloc_with_defrag(cache, size, &page_size);
1659
1660 if(IS_NULL_PTR(buf))
1661 {
1662 _log_arena_allocation_failure(cache, size, name, NULL, 0, FALSE);
1663 return NULL;
1664 }
1665
1666 void *aligned = __builtin_assume_aligned(buf, DT_CACHELINE_BYTES);
1667
1668 const uint64_t hash = (uint64_t)(uintptr_t)(aligned);
1669
1670 dt_pthread_mutex_lock(&cache->lock);
1671 dt_pixel_cache_entry_t *cache_entry
1672 = dt_pixel_cache_new_entry(hash, page_size, name, id, cache, FALSE, cache->external_entries);
1673
1674 if(IS_NULL_PTR(cache_entry))
1675 {
1677 dt_cache_arena_free(&cache->arena, buf, page_size);
1678 return NULL;
1679 }
1680
1681 // Keep this entry marked as "used" for diagnostics/bookkeeping.
1682 // Note that external_entries are not subject to LRU eviction, so we must not keep
1683 // a thread-owned rwlock held across the lifetime of the buffer (it may be freed
1684 // from a different thread during cleanup paths).
1685 _non_thread_safe_cache_ref_count_entry(cache, TRUE, cache_entry);
1686 cache_entry->data = aligned;
1687 cache_entry->age = g_get_monotonic_time();
1688 cache_entry->external_alloc = TRUE;
1690 return aligned;
1691}
1692
1693void dt_pixelpipe_cache_free_align_cache(dt_dev_pixelpipe_cache_t *cache, void **mem, const char *message)
1694{
1695 if(IS_NULL_PTR(mem) || !*mem) return;
1696
1697 dt_pthread_mutex_lock(&cache->lock);
1698 const uint64_t hash = (uint64_t)(uintptr_t)(*mem);
1700 if(IS_NULL_PTR(cache_entry) || !cache_entry->external_alloc)
1701 {
1703 fprintf(stdout, "error while freeing cache entry: no entry found but we have a buffer, %s.\n", message);
1704 raise(SIGSEGV); // triggers dt_set_signal_handlers() backtrace on Unix
1705 return;
1706 }
1707
1708 _non_thread_safe_cache_ref_count_entry(cache, FALSE, cache_entry);
1709 g_hash_table_remove(cache->external_entries, &cache_entry->hash);
1710 *mem = NULL;
1711
1713}
1714
1715
1716// WARNING: not thread-safe, protect its calls with mutex lock
1718 const char *name, const int id,
1719 dt_dev_pixelpipe_cache_t *cache, gboolean alloc,
1720 GHashTable *table)
1721{
1722 uint32_t pages_needed = 0;
1723 size_t rounded_size = 0;
1724 if(!dt_cache_arena_calc(&cache->arena, size, &pages_needed, &rounded_size))
1725 {
1726 fprintf(stderr, "[pixelpipe] invalid cache entry size %" G_GSIZE_FORMAT " for %s\n", size, name);
1727 return NULL;
1728 }
1729
1730 int error = _free_space_to_alloc(cache, rounded_size, hash, name);
1731 if(error) return NULL;
1732
1734 if(IS_NULL_PTR(cache_entry)) return NULL;
1735
1736 // Metadata, easy to free in batch if need be
1737 cache_entry->size = rounded_size;
1738 cache_entry->age = 0;
1739 cache_entry->hits = 0;
1740 cache_entry->hash = hash;
1741 cache_entry->serial = cache->next_serial++;
1742 cache_entry->id = id;
1743 cache_entry->refcount = 0;
1744 cache_entry->auto_destroy = FALSE;
1745 cache_entry->external_alloc = FALSE;
1746 cache_entry->data = NULL;
1747 cache_entry->cache = cache;
1748 cache_entry->cl_mem_list = NULL;
1749 dt_pthread_mutex_init(&cache_entry->cl_mem_lock, NULL);
1750
1751 // Optionally alloc the actual buffer, but still record its size in cache
1752 if(alloc) dt_pixel_cache_alloc(cache, cache_entry);
1753
1754 if(alloc && IS_NULL_PTR(cache_entry->data))
1755 {
1756 dt_free(cache_entry);
1757 return NULL;
1758 }
1759
1760 // Metadata that need alloc
1761 cache_entry->name = g_strdup(name);
1762 dt_pthread_rwlock_init(&cache_entry->lock, NULL);
1763
1764 uint64_t *key = g_malloc(sizeof(*key));
1765 if(IS_NULL_PTR(key))
1766 {
1767 dt_pthread_rwlock_destroy(&cache_entry->lock);
1768 dt_free(cache_entry->name);
1770 dt_free(cache_entry);
1771 return NULL;
1772 }
1773 *key = hash;
1774 g_hash_table_insert(table, key, cache_entry);
1775
1776 // Note : we grow the cache size even though the data buffer is not yet allocated
1777 // This is planning
1778 cache->current_memory += rounded_size;
1779
1780 return cache_entry;
1781}
1782
1783
1785{
1786 _pixel_cache_message(cache_entry, "freed", FALSE);
1787
1788 if(cache_entry->data)
1789 {
1790#ifdef HAVE_OPENCL
1791 dt_pthread_mutex_lock(&cache_entry->cl_mem_lock);
1792 for(GList *l = cache_entry->cl_mem_list; l; l = g_list_next(l))
1793 {
1794 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
1795 if(IS_NULL_PTR(c) || c->host_ptr != cache_entry->data) continue;
1796
1797 /* Host-backed OpenCL images may still dereference `cache_entry->data` asynchronously until their
1798 * queued work completes. We therefore wait for the owning device before releasing the host arena slot,
1799 * otherwise an auto-destroyed intermediate can be recycled into another module output while the GPU
1800 * is still reading the previous pixels. */
1802 }
1803 dt_pthread_mutex_unlock(&cache_entry->cl_mem_lock);
1804#endif
1805
1807 dt_cache_arena_free(&cache_entry->cache->arena, cache_entry->data, cache_entry->size);
1808 }
1809 else
1810 {
1812 }
1813
1814 cache_entry->data = NULL;
1815 cache_entry->cache->current_memory -= cache_entry->size;
1816 dt_pthread_rwlock_destroy(&cache_entry->lock);
1818 dt_free(cache_entry->name);
1819 dt_free(cache_entry);
1820}
1821
1822static int garbage_collection = 0;
1823
1825{
1827 dt_pthread_mutex_init(&cache->lock, NULL);
1828 cache->entries = g_hash_table_new_full(g_int64_hash, g_int64_equal, dt_free_gpointer, (GDestroyNotify)_free_cache_entry);
1829 cache->external_entries = g_hash_table_new_full(g_int64_hash, g_int64_equal, dt_free_gpointer, (GDestroyNotify)_free_cache_entry);
1830 cache->max_memory = max_memory;
1831 cache->current_memory = 0;
1832 cache->next_serial = 1;
1833 cache->queries = cache->hits = 0;
1834
1835 if(IS_NULL_PTR(cache->entries) || IS_NULL_PTR(cache->external_entries))
1836 {
1837 if(cache->entries) g_hash_table_destroy(cache->entries);
1838 if(cache->external_entries) g_hash_table_destroy(cache->external_entries);
1840 dt_free(cache);
1841 return NULL;
1842 }
1843
1844 if(dt_cache_arena_init(&cache->arena, cache->max_memory))
1845 {
1847 g_hash_table_destroy(cache->external_entries);
1848 g_hash_table_destroy(cache->entries);
1849 dt_free(cache);
1850 return NULL;
1851 }
1852
1853 // Run every 3 minutes
1854 garbage_collection = g_timeout_add(3 * 60 * 1000, (GSourceFunc)dt_dev_pixelpipe_cache_flush_old, cache);
1855 return cache;
1856}
1857
1858
1860{
1861 g_hash_table_destroy(cache->external_entries);
1862 g_hash_table_destroy(cache->entries);
1863 cache->external_entries = NULL;
1864 cache->entries = NULL;
1867
1868 if(garbage_collection != 0)
1869 {
1870 g_source_remove(garbage_collection);
1872 }
1873}
1874
1876 const uint64_t hash, const size_t size,
1877 const char *name, const int id)
1878{
1879 dt_pixel_cache_entry_t *cache_entry = dt_pixel_cache_new_entry(hash, size, name, id, cache, FALSE, cache->entries);
1880 if(IS_NULL_PTR(cache_entry)) return NULL;
1881
1882 // Increase ref_count, consumer will have to decrease it
1883 _non_thread_safe_cache_ref_count_entry(cache, TRUE, cache_entry);
1884
1885 // Acquire write lock so caller can populate data safely
1886 dt_dev_pixelpipe_cache_wrlock_entry(cache, TRUE, cache_entry);
1887
1888 return cache_entry;
1889}
1890
1892 const uint64_t new_hash, const size_t size,
1893 const dt_pixel_cache_entry_t *reuse_hint)
1894{
1895 if(IS_NULL_PTR(cache) || IS_NULL_PTR(reuse_hint)) return NULL;
1896
1897 const uint64_t old_hash = reuse_hint->hash;
1898 if(old_hash == DT_PIXELPIPE_CACHE_HASH_INVALID || old_hash == new_hash) return NULL;
1899 if(reuse_hint->size < size) return NULL;
1900
1901 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, old_hash);
1902 if(IS_NULL_PTR(cache_entry)) return NULL;
1903 if(cache_entry->serial != reuse_hint->serial) return NULL;
1904 if(cache_entry->auto_destroy) return NULL;
1905 if(cache_entry->size < size) return NULL;
1906 if(_non_threadsafe_cache_get_entry(cache, cache->entries, new_hash)) return NULL;
1907
1908 _non_thread_safe_cache_ref_count_entry(cache, TRUE, cache_entry);
1909 dt_dev_pixelpipe_cache_wrlock_entry(cache, TRUE, cache_entry);
1910
1911 /* Rekey reuse transfers the RAM arena slot to a completely different hash. Any cached OpenCL payload
1912 * still attached to the previous owner would otherwise remain reachable through the new hash and could
1913 * later be materialized as if it belonged to the new module output. Bail out if some GPU path is still
1914 * borrowing one of those payloads, otherwise flush the stale bookkeeping before publishing the new hash. */
1915 dt_pthread_mutex_lock(&cache_entry->cl_mem_lock);
1916 for(GList *l = cache_entry->cl_mem_list; l; l = g_list_next(l))
1917 {
1918 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
1919 if(c && c->refs > 0)
1920 {
1921 dt_pthread_mutex_unlock(&cache_entry->cl_mem_lock);
1922 dt_dev_pixelpipe_cache_wrlock_entry(cache, FALSE, cache_entry);
1923 dt_dev_pixelpipe_cache_ref_count_entry(cache, FALSE, cache_entry);
1924 return NULL;
1925 }
1926 }
1927 dt_pthread_mutex_unlock(&cache_entry->cl_mem_lock);
1928
1929 gpointer stolen_key = NULL;
1930 gpointer stolen_value = NULL;
1931 if(!g_hash_table_steal_extended(cache->entries, &old_hash, &stolen_key, &stolen_value)
1932 || stolen_value != cache_entry)
1933 {
1934 if(stolen_key && stolen_value) g_hash_table_insert(cache->entries, stolen_key, stolen_value);
1935 dt_dev_pixelpipe_cache_wrlock_entry(cache, FALSE, cache_entry);
1936 dt_dev_pixelpipe_cache_ref_count_entry(cache, FALSE, cache_entry);
1937 return NULL;
1938 }
1939
1940 *(uint64_t *)stolen_key = new_hash;
1941 cache_entry->hash = new_hash;
1942 g_hash_table_insert(cache->entries, stolen_key, cache_entry);
1943
1945 "[pixelpipe_cache] writable rekey old=%" PRIu64 " new=%" PRIu64 " entry=%" PRIu64 "/%" PRIu64
1946 " refs=%i auto=%i data=%p module=%s\n",
1947 old_hash, new_hash, cache_entry->hash, cache_entry->serial,
1948 dt_atomic_get_int(&cache_entry->refcount), cache_entry->auto_destroy, cache_entry->data,
1950 return cache_entry;
1951}
1952
1953
1955 const size_t size, const char *name, const int id,
1956 const gboolean alloc, void **data,
1957 dt_pixel_cache_entry_t **entry)
1958{
1960 {
1961 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe_cache] refusing invalid hash allocation for %s\n",
1962 name ? name : "unknown");
1963 if(data) *data = NULL;
1964 if(entry) *entry = NULL;
1965 return 1;
1966 }
1967
1968 // Search or create cache entry (under cache lock)
1969 dt_pthread_mutex_lock(&cache->lock);
1970 cache->queries++;
1971
1972 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, hash);
1973 if(!IS_NULL_PTR(cache_entry) && cache_entry->auto_destroy)
1974 {
1975 _pixel_cache_message(cache_entry, "dropping auto-destroy entry before cache_get reuse", FALSE);
1976 if(_non_thread_safe_cache_remove(cache, FALSE, cache_entry, cache->entries) == 0)
1977 cache_entry = NULL;
1978 }
1979
1980 if(!IS_NULL_PTR(cache_entry))
1981 {
1982 cache->hits++;
1983 cache_entry->hits++;
1984 _non_thread_safe_cache_ref_count_entry(cache, TRUE, cache_entry);
1986
1987 // Allocate on demand if requested (e.g. when falling back from vRAM-only buffers).
1988 if(alloc && IS_NULL_PTR(cache_entry->data))
1989 {
1990 dt_dev_pixelpipe_cache_wrlock_entry(cache, TRUE, cache_entry);
1991 dt_pixel_cache_alloc(cache, cache_entry);
1992 dt_dev_pixelpipe_cache_wrlock_entry(cache, FALSE, cache_entry);
1993 }
1994
1995 _pixelpipe_cache_finalize_entry(cache_entry, data, "found");
1996 if(entry) *entry = cache_entry;
1997 return 0;
1998 }
1999
2000 cache_entry = _pixelpipe_cache_create_entry_locked(cache, hash, size, name, id);
2001 if(IS_NULL_PTR(cache_entry))
2002 {
2003 dt_print(DT_DEBUG_PIPECACHE, "couldn't allocate new cache entry %" PRIu64 "\n", hash);
2005 if(entry) *entry = NULL;
2006 return 1;
2007 }
2008
2009 // Release cache lock AFTER acquiring entry locks to prevent other threads to capture it in-between
2011
2012 // Alloc after releasing the lock for better runtimes
2013 if(alloc) dt_pixel_cache_alloc(cache, cache_entry);
2014
2015 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe_cache] Write-lock on entry (new cache entry %" PRIu64 " for %s pipeline)\n",
2016 hash, name);
2017 _pixelpipe_cache_finalize_entry(cache_entry, data, "created");
2018
2019 if(entry) *entry = cache_entry;
2020 return 1;
2021}
2022
2025 const size_t size, const char *name, const int id,
2026 const gboolean alloc, const gboolean allow_rekey_reuse,
2027 const dt_pixel_cache_entry_t *reuse_hint,
2028 void **data,
2029 dt_pixel_cache_entry_t **entry)
2030{
2032 {
2033 if(data) *data = NULL;
2034 if(entry) *entry = NULL;
2036 }
2037
2038 dt_pthread_mutex_lock(&cache->lock);
2039 cache->queries++;
2040
2041 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, hash);
2042 if(!IS_NULL_PTR(cache_entry) && cache_entry->auto_destroy)
2043 {
2044 _pixel_cache_message(cache_entry, "dropping auto-destroy entry before writable reuse", FALSE);
2045 if(_non_thread_safe_cache_remove(cache, FALSE, cache_entry, cache->entries) == 0)
2046 cache_entry = NULL;
2047 }
2048
2049 if(!IS_NULL_PTR(cache_entry))
2050 {
2052 if(data) *data = NULL;
2053 if(entry) *entry = NULL;
2055 }
2056
2057 if(allow_rekey_reuse)
2058 {
2059 cache_entry = _cache_try_rekey_reuse_locked(cache, hash, size, reuse_hint);
2060 if(!IS_NULL_PTR(cache_entry))
2061 {
2063 if(alloc && IS_NULL_PTR(cache_entry->data)) dt_pixel_cache_alloc(cache, cache_entry);
2064 _pixelpipe_cache_finalize_entry(cache_entry, data, "writable-rekeyed");
2065 if(entry) *entry = cache_entry;
2067 }
2068 }
2069
2070 cache_entry = _pixelpipe_cache_create_entry_locked(cache, hash, size, name, id);
2071 if(IS_NULL_PTR(cache_entry))
2072 {
2074 if(data) *data = NULL;
2075 if(entry) *entry = NULL;
2077 }
2078
2080
2081 if(alloc) dt_pixel_cache_alloc(cache, cache_entry);
2082 _pixelpipe_cache_finalize_entry(cache_entry, data, "writable-created");
2083 if(entry) *entry = cache_entry;
2085}
2086
2088 void **data)
2089{
2090 dt_pthread_mutex_lock(&cache->lock);
2091 cache->queries++;
2092 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, hash);
2093
2094 if(!IS_NULL_PTR(cache_entry))
2095 {
2096 cache->hits++;
2097 cache_entry->hits++;
2098 _pixelpipe_cache_finalize_entry(cache_entry, data, "found");
2099 }
2100
2102 return cache_entry;
2103}
2104
2105#ifdef HAVE_OPENCL
2107 const int preferred_devid, void **cl_mem_output)
2108{
2109 if(IS_NULL_PTR(cache_entry) || IS_NULL_PTR(cl_mem_output) || !IS_NULL_PTR(*cl_mem_output) || preferred_devid < 0)
2110 return FALSE;
2111
2112 dt_pthread_mutex_lock(&cache_entry->cl_mem_lock);
2113 for(GList *l = cache_entry->cl_mem_list; l;)
2114 {
2115 GList *next = g_list_next(l);
2116 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
2117 if(!IS_NULL_PTR(c->mem) && c->refs == 0
2118 && dt_opencl_get_mem_context_id((cl_mem)c->mem) == preferred_devid)
2119 {
2120 cache_entry->cl_mem_list = g_list_delete_link(cache_entry->cl_mem_list, l);
2121 *cl_mem_output = c->mem;
2122 dt_free(c);
2123 break;
2124 }
2125 l = next;
2126 }
2127 dt_pthread_mutex_unlock(&cache_entry->cl_mem_lock);
2128
2129 return !IS_NULL_PTR(*cl_mem_output);
2130}
2131#else
2133 const int preferred_devid, void **cl_mem_output)
2134{
2135 return FALSE;
2136}
2137#endif
2138
2140 dt_pixel_cache_entry_t *cache_entry,
2141 const int preferred_devid, void **data)
2142{
2143 if(data) *data = NULL;
2144 if(IS_NULL_PTR(cache) || IS_NULL_PTR(cache_entry)) return FALSE;
2145
2146 if(dt_pixel_cache_entry_get_data(cache_entry) != NULL)
2147 {
2148 if(!IS_NULL_PTR(data)) *data = dt_pixel_cache_entry_get_data(cache_entry);
2149 return TRUE;
2150 }
2151
2152 if(!_cache_entry_materialize_host_data(cache, preferred_devid, cache_entry))
2153 return FALSE;
2154
2155 if(!IS_NULL_PTR(data)) *data = dt_pixel_cache_entry_get_data(cache_entry);
2156 return dt_pixel_cache_entry_get_data(cache_entry) != NULL;
2157}
2158
2159gboolean dt_dev_pixelpipe_cache_peek(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, void **data,
2160 dt_pixel_cache_entry_t **entry, const int preferred_devid,
2161 void **cl_mem_output)
2162{
2163 if(data) *data = NULL;
2164 if(entry) *entry = NULL;
2165 if(cl_mem_output) *cl_mem_output = NULL;
2166
2168 return FALSE;
2169
2170 dt_pixel_cache_entry_t *cache_entry = _cache_lookup_existing(cache, hash, data);
2171 if(IS_NULL_PTR(cache_entry)) return FALSE;
2172
2173 if(data) *data = dt_pixel_cache_entry_get_data(cache_entry);
2174
2175 /* Exact-hit callers treat the returned payload as already published. Reject
2176 * cachelines that are still write-locked: reusable output cachelines are
2177 * rekeyed to their new hash before recompute starts, so exposing them here
2178 * would let concurrent pipes consume stale or half-written buffers. */
2179 if(dt_pthread_rwlock_tryrdlock(&cache_entry->lock) != 0)
2180 {
2181 _trace_exact_hit("locked", hash, cache_entry, data ? *data : NULL,
2182 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2183 if(data) *data = NULL;
2184 return FALSE;
2185 }
2186 dt_pthread_rwlock_unlock(&cache_entry->lock);
2187
2188 if(IS_NULL_PTR(data) && IS_NULL_PTR(cl_mem_output))
2189 {
2190 if(entry) *entry = cache_entry;
2191 return TRUE;
2192 }
2193
2194 /* Picker-triggered aborts can leave a cacheline temporarily present under its
2195 * hash while it is already marked auto-destroy. Those entries must never exact-hit:
2196 * they belong to the aborted lifecycle and must force a rebuild on the next run. */
2197 if(cache_entry->auto_destroy)
2198 {
2199 _trace_exact_hit("auto-destroy", hash, cache_entry, data ? *data : NULL,
2200 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2201 if(data) *data = NULL;
2202 return FALSE;
2203 }
2204
2205 if(dt_pixel_cache_entry_get_data(cache_entry) != NULL)
2206 {
2207 if(data) *data = dt_pixel_cache_entry_get_data(cache_entry);
2208 _cache_try_restore_device_payload(cache_entry, preferred_devid, cl_mem_output);
2209
2210 _trace_exact_hit("host", hash, cache_entry, data ? *data : NULL,
2211 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2212 if(entry) *entry = cache_entry;
2213 return TRUE;
2214 }
2215
2216 /* `preferred_devid < 0` means the caller is on a CPU path and does not own any
2217 * OpenCL device. In that case, hostless cachelines are not consumable here:
2218 * reopening device-only payloads would enqueue hidden GPU work without a locked
2219 * device, while reporting a device-only exact-hit would let CPU callers sample
2220 * an uninitialized host buffer. */
2221 if(preferred_devid < 0)
2222 {
2223 _trace_exact_hit("cpu-no-device", hash, cache_entry, NULL, NULL, preferred_devid, FALSE);
2224 return FALSE;
2225 }
2226
2227 if(_cache_try_restore_device_payload(cache_entry, preferred_devid, cl_mem_output))
2228 {
2229 _trace_exact_hit("device", hash, cache_entry, data ? *data : NULL,
2230 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2231 if(entry) *entry = cache_entry;
2232 return TRUE;
2233 }
2234
2235 if(!IS_NULL_PTR(data) && dt_dev_pixelpipe_cache_restore_host_payload(cache, cache_entry, preferred_devid, data))
2236 {
2237 _trace_exact_hit("restore-host", hash, cache_entry, data ? *data : NULL,
2238 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2239 if(entry) *entry = cache_entry;
2240 return TRUE;
2241 }
2242
2243 _trace_exact_hit("drop-invalid", hash, cache_entry, data ? *data : NULL,
2244 cl_mem_output ? *cl_mem_output : NULL, preferred_devid, FALSE);
2246 "[pixelpipe] cache entry %" PRIu64 " has no authoritative RAM nor vRAM payload and will be removed\n",
2247 hash);
2248 // If the entry removal fails, flag it for auto-destroy.
2249 if(dt_dev_pixelpipe_cache_remove(cache, TRUE, cache_entry))
2250 dt_dev_pixelpipe_cache_flag_auto_destroy(cache, cache_entry);
2251 if(data) *data = NULL;
2252 return FALSE;
2253}
2254
2255
2256static gboolean _for_each_remove(gpointer key, gpointer value, gpointer user_data)
2257{
2259 const int id = GPOINTER_TO_INT(user_data);
2260
2261 // Returns 1 if the lock is captured by another thread
2262 // 0 if WE capture the lock, and then need to release it
2263 gboolean locked = dt_pthread_rwlock_trywrlock(&cache_entry->lock);
2264 if(!locked) dt_pthread_rwlock_unlock(&cache_entry->lock);
2265
2266 return (cache_entry->id == id || id == -1) && !locked;
2267}
2268
2269
2271{
2272 dt_pthread_mutex_lock(&cache->lock);
2273 g_hash_table_foreach_remove(cache->entries, _for_each_remove, GINT_TO_POINTER(id));
2275}
2276
2278 const uint64_t *hashes,
2279 const size_t count)
2280{
2281 int retained = 0;
2282 dt_pthread_mutex_lock(&cache->lock);
2283
2284 // We are invalidating the cumulative outputs from one pipeline stage onward.
2285 // Look them up under the same cache lock used for removal so a shared preview
2286 // pipe cannot replace an entry between lookup and invalidation.
2287 for(size_t k = 0; k < count; k++)
2288 {
2289 if(hashes[k] == DT_PIXELPIPE_CACHE_HASH_INVALID) continue;
2290
2292 = _non_threadsafe_cache_get_entry(cache, cache->entries, hashes[k]);
2293 if(IS_NULL_PTR(entry)) continue;
2294
2295 // A displayed backbuffer or an in-flight consumer may still own this
2296 // shared state. Leave it valid; cache bypass on the retry still walks
2297 // through downstream stages after the provider has been regenerated.
2298 if(_non_thread_safe_cache_remove(cache, FALSE, entry, cache->entries))
2299 retained++;
2300 }
2301
2303 return retained;
2304}
2305
2306
2307static gboolean _for_each_remove_old(gpointer key, gpointer value, gpointer user_data)
2308{
2310
2311 // Returns 1 if the lock is captured by another thread
2312 // 0 if WE capture the lock, and then need to release it
2313 gboolean locked = dt_pthread_rwlock_trywrlock(&cache_entry->lock);
2314 if(!locked) dt_pthread_rwlock_unlock(&cache_entry->lock);
2315 gboolean used = dt_atomic_get_int(&cache_entry->refcount) > 0;
2316
2317 // in microseconds
2318 int64_t delta = g_get_monotonic_time() - cache_entry->age;
2319
2320 // 5 min in microseconds
2321 const int64_t three_min = 5 * 60 * 1000 * 1000;
2322
2323 gboolean too_old = (delta > three_min) && (cache_entry->hits < 4);
2324
2325 return too_old && !used && !locked;
2326}
2327
2329{
2330 // Don't hang the GUI thread if the cache is locked by a pipeline.
2331 // Better luck next time.
2332 if(dt_pthread_mutex_trylock(&cache->lock)) return G_SOURCE_CONTINUE;
2333 g_hash_table_foreach_remove(cache->entries, _for_each_remove_old, NULL);
2335 return G_SOURCE_CONTINUE;
2336}
2337
2343
2344
2346 dt_pixel_cache_entry_t *cache_entry)
2347{
2348 if(IS_NULL_PTR(cache_entry)) return;
2349
2350 if(lock)
2351 {
2352 dt_atomic_add_int(&cache_entry->refcount, 1);
2353 _pixel_cache_message(cache_entry, "ref count ++", TRUE);
2354 }
2355 else
2356 {
2357 dt_atomic_sub_int(&cache_entry->refcount, 1);
2358 _pixel_cache_message(cache_entry, "ref count --", TRUE);
2359 }
2360}
2361
2362
2364 dt_pixel_cache_entry_t *cache_entry)
2365{
2366 dt_pthread_mutex_lock(&cache->lock);
2367 _non_thread_safe_cache_ref_count_entry(cache, lock, cache_entry);
2369}
2370
2371
2373 dt_pixel_cache_entry_t *cache_entry)
2374{
2375 if(lock)
2376 {
2377 dt_pthread_rwlock_wrlock(&cache_entry->lock);
2378 _pixel_cache_message(cache_entry, "write lock", TRUE);
2379 }
2380 else
2381 {
2382 dt_pthread_rwlock_unlock(&cache_entry->lock);
2383 _pixel_cache_message(cache_entry, "write unlock", TRUE);
2384 if(cache_entry && cache_entry->hash != DT_PIXELPIPE_CACHE_HASH_INVALID)
2386 }
2387}
2388
2389
2391 dt_pixel_cache_entry_t *cache_entry)
2392{
2393 if(lock)
2394 {
2395 dt_pthread_rwlock_rdlock(&cache_entry->lock);
2396 _pixel_cache_message(cache_entry, "read lock", TRUE);
2397 }
2398 else
2399 {
2400 dt_pthread_rwlock_unlock(&cache_entry->lock);
2401 _pixel_cache_message(cache_entry, "read unlock", TRUE);
2402 }
2403}
2404
2405
2407 dt_pixel_cache_entry_t *cache_entry)
2408{
2409 dt_pthread_mutex_lock(&cache->lock);
2410 if(IS_NULL_PTR(cache_entry))
2411 {
2413 return;
2414 }
2415
2416 cache_entry->auto_destroy = TRUE;
2417 _pixel_cache_message(cache_entry, "auto destroy flagged", TRUE);
2419}
2420
2421
2423 dt_pixel_cache_entry_t *cache_entry)
2424{
2425 dt_pthread_mutex_lock(&cache->lock);
2426 if(IS_NULL_PTR(cache_entry))
2427 {
2429 return;
2430 }
2431
2432 if(cache_entry->auto_destroy)
2433 {
2434 /* `auto_destroy` is still a normal cache lifecycle: the creator flags a transient entry, then the final
2435 * consumer decrements its refcount and asks the cache to reap it. Only remove it once no consumer owns
2436 * it anymore and nobody still holds the entry lock, otherwise teardown paths can free cachelines that
2437 * still report `refs>0` and hide ownership bugs instead of exposing them. */
2438 const gboolean locked = dt_pthread_rwlock_trywrlock(&cache_entry->lock);
2439 if(!locked) dt_pthread_rwlock_unlock(&cache_entry->lock);
2440 const gboolean used = dt_atomic_get_int(&cache_entry->refcount) > 0;
2441
2442 if(!used && !locked)
2443 {
2444 _pixel_cache_message(cache_entry, "auto destroy removing", FALSE);
2445 g_hash_table_remove(cache->entries, &cache_entry->hash);
2446 }
2447 else if(used)
2448 {
2449 _pixel_cache_message(cache_entry, "auto destroy postponed: used", TRUE);
2450 }
2451 else
2452 {
2453 _pixel_cache_message(cache_entry, "auto destroy postponed: locked", TRUE);
2454 }
2455 }
2456 else
2457 {
2458 _pixel_cache_message(cache_entry, "auto destroy skipped", TRUE);
2459 }
2460
2462}
2463
2465{
2466 if(hash == DT_PIXELPIPE_CACHE_HASH_INVALID) return;
2467
2468 dt_pthread_mutex_lock(&cache->lock);
2469 cache->queries++;
2470 dt_pixel_cache_entry_t *cache_entry = _non_threadsafe_cache_get_entry(cache, cache->entries, hash);
2472
2473 if(cache_entry)
2474 dt_dev_pixelpipe_cache_ref_count_entry(cache, FALSE, cache_entry);
2475}
2476
2478 const uint64_t new_hash, dt_pixel_cache_entry_t *entry)
2479{
2480 if(IS_NULL_PTR(cache)) return 1;
2481 if(old_hash == new_hash) return 0;
2482
2483 dt_pthread_mutex_lock(&cache->lock);
2484
2485 if(IS_NULL_PTR(entry)) entry = _non_threadsafe_cache_get_entry(cache, cache->entries, old_hash);
2486 if(IS_NULL_PTR(entry))
2487 {
2489 "[pixelpipe_cache] rekey miss old=%" PRIu64 " new=%" PRIu64 " module=%s\n",
2490 old_hash, new_hash, _cache_debug_module_name());
2492 return 1;
2493 }
2494
2495 dt_pixel_cache_entry_t *conflict = _non_threadsafe_cache_get_entry(cache, cache->entries, new_hash);
2496 if(conflict && conflict != entry)
2497 {
2499 "[pixelpipe_cache] rekey conflict old=%" PRIu64 " new=%" PRIu64
2500 " entry=%" PRIu64 "/%" PRIu64 " conflict=%" PRIu64 "/%" PRIu64 " module=%s\n",
2501 old_hash, new_hash, entry->hash, entry->serial, conflict->hash, conflict->serial,
2504 return 1;
2505 }
2506
2507 gpointer stolen_key = NULL;
2508 gpointer stolen_value = NULL;
2509 if(!g_hash_table_steal_extended(cache->entries, &old_hash, &stolen_key, &stolen_value))
2510 {
2512 "[pixelpipe_cache] rekey steal-miss old=%" PRIu64 " new=%" PRIu64
2513 " entry=%" PRIu64 "/%" PRIu64 " module=%s\n",
2514 old_hash, new_hash, entry->hash, entry->serial, _cache_debug_module_name());
2516 return 1;
2517 }
2518
2519 if(stolen_value != entry)
2520 {
2522 "[pixelpipe_cache] rekey stolen-entry mismatch old=%" PRIu64 " new=%" PRIu64
2523 " expected=%" PRIu64 "/%" PRIu64 " got=%" PRIu64 "/%" PRIu64 " module=%s\n",
2524 old_hash, new_hash, entry->hash, entry->serial,
2525 ((dt_pixel_cache_entry_t *)stolen_value)->hash, ((dt_pixel_cache_entry_t *)stolen_value)->serial,
2527 g_hash_table_insert(cache->entries, stolen_key, stolen_value);
2529 return 1;
2530 }
2531
2532 /* Explicit rekeying also changes cacheline ownership. The OpenCL payload cache is only valid for the
2533 * previous hash, so do not let the new hash inherit stale device-side state. If some GPU code is still
2534 * borrowing one of these payloads, refuse the rekey instead of publishing an ambiguous cache entry. */
2536 for(GList *l = entry->cl_mem_list; l; l = g_list_next(l))
2537 {
2538 dt_cache_clmem_t *c = (dt_cache_clmem_t *)l->data;
2539 if(c && c->refs > 0)
2540 {
2542 g_hash_table_insert(cache->entries, stolen_key, stolen_value);
2544 return 1;
2545 }
2546 }
2549
2550 *(uint64_t *)stolen_key = new_hash;
2551 entry->hash = new_hash;
2552 g_hash_table_insert(cache->entries, stolen_key, stolen_value);
2554 "[pixelpipe_cache] rekey old=%" PRIu64 " new=%" PRIu64 " entry=%" PRIu64 "/%" PRIu64
2555 " refs=%i auto=%i data=%p module=%s\n",
2556 old_hash, new_hash, entry->hash, entry->serial, dt_atomic_get_int(&entry->refcount),
2557 entry->auto_destroy, entry->data, _cache_debug_module_name());
2558
2560 return 0;
2561}
2562
2563
2565{
2566 if(!(darktable.unmuted & DT_DEBUG_PIPECACHE)) return;
2567
2568 dt_print(DT_DEBUG_PIPECACHE, "[pixelpipe] cache hit rate so far: %.3f%% - size: %" G_GSIZE_FORMAT " MiB over %" G_GSIZE_FORMAT " MiB - %i items\n",
2569 100. * (cache->hits) / (float)cache->queries, cache->current_memory / (1024 * 1024),
2570 cache->max_memory / (1024 * 1024),
2571 g_hash_table_size(cache->entries));
2572}
2573
2574// clang-format off
2575// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
2576// vim: shiftwidth=2 expandtab tabstop=2 cindent
2577// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
2578// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
int dt_atomic_get_int(dt_atomic_int *var)
int dt_atomic_sub_int(dt_atomic_int *var, int decr)
int dt_atomic_add_int(dt_atomic_int *var, int incr)
atomic_int dt_atomic_int
Definition atomic.h:66
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const float delta
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
char * key
char * name
void dt_control_log(const char *msg,...)
Definition control.c:761
darktable_t darktable
Definition darktable.c:181
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
@ DT_DEBUG_OPENCL
Definition darktable.h:722
@ DT_DEBUG_PIPECACHE
Definition darktable.h:720
@ DT_DEBUG_VERBOSE
Definition darktable.h:743
static void dt_free_gpointer(gpointer ptr)
Definition darktable.h:463
#define dt_free(ptr)
Definition darktable.h:456
static const dt_aligned_pixel_simd_t value
Definition darktable.h:577
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
#define DT_CACHELINE_BYTES
Definition darktable.h:380
#define dt_pthread_rwlock_tryrdlock
Definition dtpthread.h:395
#define dt_pthread_rwlock_destroy
Definition dtpthread.h:391
static int dt_pthread_mutex_unlock(dt_pthread_mutex_t *mutex) RELEASE(mutex) NO_THREAD_SAFETY_ANALYSIS
Definition dtpthread.h:374
static int dt_pthread_mutex_init(dt_pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr)
Definition dtpthread.h:359
#define dt_pthread_rwlock_wrlock
Definition dtpthread.h:394
#define dt_pthread_rwlock_trywrlock
Definition dtpthread.h:396
static int dt_pthread_mutex_trylock(dt_pthread_mutex_t *mutex) TRY_ACQUIRE(0
static int dt_pthread_mutex_destroy(dt_pthread_mutex_t *mutex)
Definition dtpthread.h:379
#define dt_pthread_rwlock_init
Definition dtpthread.h:390
#define dt_pthread_rwlock_unlock
Definition dtpthread.h:392
static int dt_pthread_mutex_lock(dt_pthread_mutex_t *mutex) ACQUIRE(mutex) NO_THREAD_SAFETY_ANALYSIS
Definition dtpthread.h:364
#define dt_pthread_rwlock_rdlock
Definition dtpthread.h:393
int bpp
float *const restrict const size_t k
void dt_cache_arena_stats(dt_cache_arena_t *a, uint32_t *out_total_free_pages, uint32_t *out_largest_free_run_pages)
void dt_cache_arena_cleanup(dt_cache_arena_t *a)
gboolean dt_cache_arena_calc(const dt_cache_arena_t *a, size_t size, uint32_t *out_pages, size_t *out_size)
int dt_cache_arena_init(dt_cache_arena_t *a, size_t total_size)
void dt_cache_arena_free(dt_cache_arena_t *a, void *ptr, size_t size)
void * dt_cache_arena_alloc(dt_cache_arena_t *a, size_t size, size_t *out_size)
size_t size
Definition mipmap_cache.c:3
dt_mipmap_buffer_dsc_flags flags
Definition mipmap_cache.c:4
void * dt_opencl_alloc_device_use_host_pointer(const int devid, const int width, const int height, const int bpp, void *host, const int flags)
Definition opencl.c:2493
gboolean dt_opencl_is_pinned_memory(cl_mem mem)
Definition opencl.c:190
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471
int dt_opencl_get_mem_context_id(cl_mem mem)
Definition opencl.c:2560
int dt_opencl_get_image_height(cl_mem mem)
Definition opencl.c:2598
int dt_opencl_unmap_mem_object(const int devid, cl_mem mem_object, void *mapped_ptr)
Definition opencl.c:2430
gboolean dt_opencl_use_pinned_memory(const int devid)
Definition opencl.c:183
int dt_opencl_get_image_width(cl_mem mem)
Definition opencl.c:2587
void * dt_opencl_map_image(const int devid, cl_mem buffer, const int blocking, const int flags, size_t width, size_t height, int bpp)
Definition opencl.c:2410
gboolean dt_opencl_finish(const int devid)
Definition opencl.c:1347
void dt_opencl_events_wait_for(const int devid)
Definition opencl.c:2924
int dt_opencl_read_host_from_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2169
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
int dt_opencl_get_image_element_size(cl_mem mem)
Definition opencl.c:2609
int dt_opencl_write_host_to_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2216
#define DT_OPENCL_BPP_ENCODE_RGBA8(bpp)
Definition opencl.h:85
static __thread const char * dt_pixelpipe_cache_current_module
static const char * _cache_debug_module_name(void)
static void _trace_exact_hit(const char *phase, const uint64_t hash, dt_pixel_cache_entry_t *cache_entry, void *data, void *cl_mem_output, const int preferred_devid, const gboolean verbose)
void * dt_dev_pixelpipe_cache_borrow_cl_payload(dt_pixel_cache_entry_t *entry, int devid, int width, int height, int bpp)
Borrow a cached OpenCL payload attached to a cache entry.
static int _free_space_to_alloc(dt_dev_pixelpipe_cache_t *cache, const size_t size, const uint64_t hash, const char *name)
static gboolean _cache_entry_clmem_flush_device(dt_pixel_cache_entry_t *entry, const int devid)
static void _arena_stats_bytes(dt_dev_pixelpipe_cache_t *cache, uint32_t *total_pages, uint32_t *largest_pages, size_t *total_bytes, size_t *largest_bytes)
void dt_pixelpipe_cache_free_align_cache(dt_dev_pixelpipe_cache_t *cache, void **mem, const char *message)
Free aligned memory allocated with dt_pixelpipe_cache_alloc_align_cache.
void dt_dev_pixelpipe_cache_cleanup(dt_dev_pixelpipe_cache_t *cache)
int dt_dev_pixelpipe_cache_invalidate_hashes(dt_dev_pixelpipe_cache_t *cache, const uint64_t *hashes, const size_t count)
Invalidate cache lines matching an explicit list of hashes.
static int garbage_collection
void * dt_pixel_cache_entry_get_data(dt_pixel_cache_entry_t *entry)
void * dt_dev_pixelpipe_cache_get_pinned_image(dt_dev_pixelpipe_cache_t *cache, void *host_ptr, dt_pixel_cache_entry_t *entry_hint, int devid, int width, int height, int bpp, int flags, gboolean *out_reused)
Acquire a pinned OpenCL image for a host buffer tracked by the pixelpipe cache.
static int dt_dev_pixelpipe_cache_flush_old(dt_dev_pixelpipe_cache_t *cache)
dt_dev_pixelpipe_cache_t * dt_dev_pixelpipe_cache_init(size_t max_memory)
static void _pixel_cache_clmem_remove(dt_pixel_cache_entry_t *entry, void *mem)
void dt_dev_pixelpipe_cache_ref_count_entry(dt_dev_pixelpipe_cache_t *cache, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
Increase/Decrease the reference count on the cache line as to prevent LRU item removal....
static dt_pixel_cache_entry_t * _cache_entry_for_host_ptr_locked(dt_dev_pixelpipe_cache_t *cache, void *host_ptr)
void dt_dev_pixelpipe_cache_print(dt_dev_pixelpipe_cache_t *cache)
void * dt_pixel_cache_alloc(dt_dev_pixelpipe_cache_t *cache, dt_pixel_cache_entry_t *cache_entry)
Actually allocate the memory buffer attached to the cache entry once you create it with dt_dev_pixelp...
void dt_dev_pixelpipe_cache_flush(dt_dev_pixelpipe_cache_t *cache, const int id)
Remove cache lines matching id. Entries locked in read/write or having reference count greater than 0...
static void _cache_get_oldest(gpointer key, gpointer value, gpointer user_data)
void dt_dev_pixelpipe_cache_unref_hash(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash)
Find the entry matching hash, and decrease its ref_count if found.
static dt_pixel_cache_entry_t * _pixelpipe_cache_create_entry_locked(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, const size_t size, const char *name, const int id)
static void _print_cache_lines(gpointer key, gpointer value, gpointer user_data)
void dt_dev_pixelpipe_cache_flush_clmem_for_pipe(dt_dev_pixelpipe_cache_t *cache, const int devid)
Like dt_dev_pixelpipe_cache_flush_clmem(), for callers that do not hold darktable....
static int _non_thread_safe_pixel_pipe_cache_remove_lru(dt_dev_pixelpipe_cache_t *cache)
void dt_dev_pixelpipe_cache_flag_auto_destroy(dt_dev_pixelpipe_cache_t *cache, dt_pixel_cache_entry_t *cache_entry)
Flag the cache entry as "auto_destroy". This is useful for short-lived/disposable cache entries,...
void dt_dev_pixelpipe_cache_auto_destroy_apply(dt_dev_pixelpipe_cache_t *cache, dt_pixel_cache_entry_t *cache_entry)
Free the entry if it has the flag "auto_destroy". See dt_dev_pixelpipe_cache_flag_auto_destroy()....
static gboolean _cache_entry_clmem_has_host_pinned_locked(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid)
static gboolean _for_each_remove(gpointer key, gpointer value, gpointer user_data)
void dt_dev_pixelpipe_cache_put_pinned_image(dt_dev_pixelpipe_cache_t *cache, void *host_ptr, dt_pixel_cache_entry_t *entry_hint, void **mem)
Release or cache a pinned OpenCL image acquired with dt_dev_pixelpipe_cache_get_pinned_image().
void dt_dev_pixelpipe_cache_release_cl_buffer(void **cl_mem_buffer, dt_pixel_cache_entry_t *cache_entry, void *host_ptr, const gboolean cache_device)
Release or cache an OpenCL image associated with a host cache line.
gboolean dt_dev_pixelpipe_cache_peek(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, void **data, dt_pixel_cache_entry_t **entry, const int preferred_devid, void **cl_mem_output)
Non-owning lookup of an existing cache line.
int dt_dev_pixel_pipe_cache_remove_lru(dt_dev_pixelpipe_cache_t *cache)
void dt_dev_pixelpipe_cache_flush_entry_clmem(dt_pixel_cache_entry_t *entry)
Flush all reusable OpenCL payloads cached on one cache entry.
static dt_pixel_cache_entry_t * _cache_try_rekey_reuse_locked(dt_dev_pixelpipe_cache_t *cache, const uint64_t new_hash, const size_t size, const dt_pixel_cache_entry_t *reuse_hint)
static dt_pixel_cache_entry_t * _cache_lookup_existing(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, void **data)
void * dt_dev_pixelpipe_cache_get_cl_buffer(int devid, void *const host_ptr, const dt_iop_roi_t *roi, const size_t bpp, dt_iop_module_t *module, const char *message, dt_pixel_cache_entry_t *cache_entry, gboolean *out_reused, void *keep)
static int _pixel_cache_clmem_put(dt_pixel_cache_entry_t *entry, void *host_ptr, void *mem)
dt_pixel_cache_materialize_source_rank_t
@ DT_PIXEL_CACHE_MATERIALIZE_SOURCE_SECONDARY_PREFERRED
@ DT_PIXEL_CACHE_MATERIALIZE_SOURCE_PRIMARY_ANY
@ DT_PIXEL_CACHE_MATERIALIZE_SOURCE_SECONDARY_ANY
@ DT_PIXEL_CACHE_MATERIALIZE_SOURCE_NONE
@ DT_PIXEL_CACHE_MATERIALIZE_SOURCE_PRIMARY_PREFERRED
static size_t _pixel_cache_get_size(dt_pixel_cache_entry_t *cache_entry)
static gboolean _cache_entry_clmem_flush_host_pinned_locked(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid)
static void _free_cache_entry(dt_pixel_cache_entry_t *cache_entry)
dt_pixel_cache_entry_t * dt_dev_pixelpipe_cache_get_entry(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash)
Get an internal reference to the cache entry matching hash. If you are going to access this entry mor...
void * dt_dev_pixelpipe_cache_alloc_cl_device_buffer(int devid, const dt_iop_roi_t *roi, const size_t bpp, const dt_iop_module_t *module, const char *message, void *keep)
int dt_dev_pixelpipe_cache_get(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, const size_t size, const char *name, const int id, const gboolean alloc, void **data, dt_pixel_cache_entry_t **entry)
Get a cache line from the cache.
int dt_dev_pixelpipe_cache_remove(dt_dev_pixelpipe_cache_t *cache, const gboolean force, dt_pixel_cache_entry_t *cache_entry)
Arbitrarily remove the cache entry matching hash. Entries having a reference count > 0 (inter-thread ...
gboolean dt_dev_pixelpipe_cache_ref_entry_by_hash(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, void **data, dt_pixel_cache_entry_t **entry)
Resolve and retain an existing cache entry by hash.
int dt_dev_pixelpipe_cache_prepare_cl_input(dt_dev_pixelpipe_t *pipe, dt_iop_module_t *module, float *input, void **cl_mem_input, const dt_iop_roi_t *roi_in, const size_t in_bpp, dt_pixel_cache_entry_t *input_entry, dt_pixel_cache_entry_t **locked_input_entry, void *keep)
Prepare/obtain the OpenCL input image for a module.
static gboolean _for_each_remove_old(gpointer key, gpointer value, gpointer user_data)
dt_pixel_cache_entry_t * dt_dev_pixelpipe_cache_get_entry_by_data(dt_dev_pixelpipe_cache_t *cache, void *data)
static gboolean _is_gamma_rgba8_output(const dt_iop_module_t *module, const size_t bpp, const char *message)
dt_dev_pixelpipe_cache_writable_status_t dt_dev_pixelpipe_cache_get_writable(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash, const size_t size, const char *name, const int id, const gboolean alloc, const gboolean allow_rekey_reuse, const dt_pixel_cache_entry_t *reuse_hint, void **data, dt_pixel_cache_entry_t **entry)
gboolean dt_dev_pixelpipe_cache_restore_host_payload(dt_dev_pixelpipe_cache_t *cache, dt_pixel_cache_entry_t *cache_entry, const int preferred_devid, void **data)
Materialize a host payload for a live cache entry from its cached device payload.
void dt_dev_pixelpipe_cache_wrlock_entry(dt_dev_pixelpipe_cache_t *cache, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
Lock or release the write lock on the entry.
static void _pixelpipe_cache_finalize_entry(dt_pixel_cache_entry_t *cache_entry, void **data, const char *message)
int dt_dev_pixelpipe_cache_sync_cl_buffer(const int devid, void *host_ptr, void *cl_mem_buffer, const dt_iop_roi_t *roi, int cl_mode, size_t bpp, dt_iop_module_t *module, const char *message)
Synchronize between host memory and a pinned OpenCL image.
void dt_dev_pixelpipe_cache_return_cl_payload(dt_pixel_cache_entry_t *entry, void *mem)
Return a borrowed cached OpenCL payload to its cache entry.
gboolean dt_dev_pixelpipe_cache_flush_host_pinned_image(dt_dev_pixelpipe_cache_t *cache, void *host_ptr, dt_pixel_cache_entry_t *entry_hint, int devid)
Drop cached pinned OpenCL images associated with a given host buffer.
size_t dt_pixel_cache_entry_get_size(dt_pixel_cache_entry_t *entry)
Peek the size (in bytes) reserved for the host buffer of a cache entry.
int dt_dev_pixelpipe_cache_rekey(dt_dev_pixelpipe_cache_t *cache, const uint64_t old_hash, const uint64_t new_hash, dt_pixel_cache_entry_t *entry)
Change the hash/key of an existing cache line in place, without freeing, reallocating or invalidating...
void dt_dev_pixelpipe_cache_rdlock_entry(dt_dev_pixelpipe_cache_t *cache, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
Lock or release the read lock on the entry.
static void _log_arena_allocation_failure(dt_dev_pixelpipe_cache_t *cache, size_t request_size, const char *entry_name, const char *module, uint64_t hash, gboolean name_is_file)
int _non_thread_safe_cache_remove(dt_dev_pixelpipe_cache_t *cache, const gboolean force, dt_pixel_cache_entry_t *cache_entry, GHashTable *table)
static gboolean _cache_try_restore_device_payload(dt_pixel_cache_entry_t *cache_entry, const int preferred_devid, void **cl_mem_output)
static void _pixel_cache_message(dt_pixel_cache_entry_t *cache_entry, const char *message, gboolean verbose)
static void * _arena_alloc_with_defrag(dt_dev_pixelpipe_cache_t *cache, size_t request_size, size_t *actual_size)
static void * _pixel_cache_clmem_get(dt_pixel_cache_entry_t *entry, void *host_ptr, int devid, int width, int height, int bpp, int flags)
static dt_pixel_cache_entry_t * _non_threadsafe_cache_get_entry(dt_dev_pixelpipe_cache_t *cache, GHashTable *table, const uint64_t key)
void * dt_pixelpipe_cache_alloc_align_cache_impl(dt_dev_pixelpipe_cache_t *cache, size_t size, int id, const char *name)
Allocate aligned memory tracked by the pixelpipe cache. This allows LRU cache entries to be evicted i...
static gboolean _cache_entry_materialize_host_data(dt_dev_pixelpipe_cache_t *cache, int preferred_devid, dt_pixel_cache_entry_t *entry)
static gboolean _cache_entry_materialize_host_data_locked(dt_pixel_cache_entry_t *entry, int preferred_devid, gboolean prefer_device_payload)
dt_pixel_cache_entry_t * dt_dev_pixelpipe_cache_ref_entry_for_host_ptr(dt_dev_pixelpipe_cache_t *cache, void *host_ptr)
Resolve and retain the cache entry owning a host pointer.
static dt_pixel_cache_entry_t * dt_pixel_cache_new_entry(const uint64_t hash, const size_t size, const char *name, const int id, dt_dev_pixelpipe_cache_t *cache, gboolean alloc, GHashTable *table)
void _non_thread_safe_cache_ref_count_entry(dt_dev_pixelpipe_cache_t *cache, gboolean lock, dt_pixel_cache_entry_t *cache_entry)
void dt_dev_pixelpipe_cache_flush_clmem(dt_dev_pixelpipe_cache_t *cache, const int devid)
Release cached OpenCL buffers for a single device.
float * dt_dev_pixelpipe_cache_restore_cl_buffer(dt_dev_pixelpipe_t *pipe, float *input, void *cl_mem_input, const dt_iop_roi_t *roi_in, dt_iop_module_t *module, const size_t in_bpp, dt_pixel_cache_entry_t *input_entry, const char *message)
Force device → host resynchronization of the pixelpipe input cache line.
const char * dt_pixelpipe_cache_set_current_module(const char *module)
Set the current module name for cache diagnostics (thread-local).
Pixelpipe cache for storing intermediate results in the pixelpipe.
#define DT_PIXELPIPE_CACHE_HASH_INVALID
dt_dev_pixelpipe_cache_writable_status_t
@ DT_DEV_PIXELPIPE_CACHE_WRITABLE_REKEYED
@ DT_DEV_PIXELPIPE_CACHE_WRITABLE_ERROR
@ DT_DEV_PIXELPIPE_CACHE_WRITABLE_CREATED
@ DT_DEV_PIXELPIPE_CACHE_WRITABLE_EXACT_HIT
#define DT_DEBUG_CONTROL_SIGNAL_RAISE(ctlsig, signal,...)
Definition signal.h:347
@ DT_SIGNAL_CACHELINE_READY
This signal is raised when one cacheline write lock is released. 1 : uint64_t cacheline hash no retur...
Definition signal.h:185
const float uint32_t state[4]
unsigned __int64 uint64_t
Definition strptime.c:75
dt_pixel_cache_entry_t * cache_entry
struct dt_dev_pixelpipe_cache_t * pixelpipe_cache
Definition darktable.h:790
struct dt_control_signal_t * signals
Definition darktable.h:774
struct dt_opencl_t * opencl
Definition darktable.h:785
int32_t unmuted
Definition darktable.h:760
dt_pthread_mutex_t lock
GModule *dt_dev_operation_t op
Definition imageop.h:256
Region of interest passed through the pixelpipe.
Definition imageop.h:72
dt_pthread_mutex_t lock
Definition opencl.h:124
dt_opencl_device_t * dev
Definition opencl.h:246
int inited
Definition opencl.h:232
uint64_t hash
gboolean auto_destroy
dt_atomic_int refcount
gboolean external_alloc
void * data
size_t size
int64_t age
uint64_t serial
dt_dev_pixelpipe_cache_t * cache
dt_pthread_rwlock_t lock
dt_pthread_mutex_t cl_mem_lock
GList * cl_mem_list
char * name
int hits
int id