Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
opencl.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2010-2012, 2016 johannes hanika.
4 Copyright (C) 2011 Bruce Guenter.
5 Copyright (C) 2011 Henrik Andersson.
6 Copyright (C) 2011 Moritz Lipp.
7 Copyright (C) 2011-2019 Ulrich Pegelow.
8 Copyright (C) 2012 Christian Tellefsen.
9 Copyright (C) 2012 Jérémy Rosen.
10 Copyright (C) 2012 Michal Babej.
11 Copyright (C) 2012 Richard Wonka.
12 Copyright (C) 2012-2014, 2016-2018 Tobias Ellinghaus.
13 Copyright (C) 2013-2019 Roman Lebedev.
14 Copyright (C) 2013 Simon Spannagel.
15 Copyright (C) 2015, 2019 Dan Torop.
16 Copyright (C) 2015, 2017 parafin.
17 Copyright (C) 2015 Pascal de Bruijn.
18 Copyright (C) 2016-2017, 2019 Peter Budai.
19 Copyright (C) 2017-2019 Edgardo Hoszowski.
20 Copyright (C) 2017, 2019 luzpaz.
21 Copyright (C) 2019 Andreas Schneider.
22 Copyright (C) 2019, 2021-2022, 2025-2026 Aurélien PIERRE.
23 Copyright (C) 2019 Damian D. Martinez Dreyer.
24 Copyright (C) 2019-2020 Heiko Bauke.
25 Copyright (C) 2019 jakubfi.
26 Copyright (C) 2019-2021 Pascal Obry.
27 Copyright (C) 2020 David-Tillmann Schaefer.
28 Copyright (C) 2020-2022 Hubert Kowalski.
29 Copyright (C) 2020-2021 Ralf Brown.
30 Copyright (C) 2021 Chris Elston.
31 Copyright (C) 2022 Hanno Schwalm.
32 Copyright (C) 2022 Martin Bařinka.
33 Copyright (C) 2022 Victor Forsiuk.
34 Copyright (C) 2024 Alynx Zhou.
35 Copyright (C) 2025 Guillaume Stutin.
36
37 darktable is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
41
42 darktable is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with darktable. If not, see <http://www.gnu.org/licenses/>.
49*/
50
51#ifdef HAVE_OPENCL
52
53#include "common/opencl.h"
54#include "common/bilateralcl.h"
55#include "common/darktable.h"
56#include "common/dlopencl.h"
57#include "common/dwt.h"
59#include "common/gaussian.h"
61#include "common/heal.h"
64#include "common/nvidia_gpus.h"
66#include "common/tea.h"
67#include "control/conf.h"
68#include "control/control.h"
69#include "gui/splash.h"
70#include "develop/blend.h"
71#include "develop/pixelpipe.h"
73
74#include <assert.h>
75#include <locale.h>
76#include <stdio.h>
77#include <string.h>
78#include <strings.h>
79
80#include <ctype.h>
81#include <errno.h>
82#include <libgen.h>
83#include <sys/stat.h>
84#include <zlib.h>
85
86static gboolean _opencl_splash_active = FALSE;
87
88static inline void _opencl_splash_update_compile(const char *programname)
89{
90 if(IS_NULL_PTR(programname)) return;
91 if(IS_NULL_PTR(darktable.gui)) return;
92
94 {
97 }
98
99 dt_gui_splash_updatef(_("Building OpenCL kernels: %s"), programname);
100}
101
102static const char *dt_opencl_get_vendor_by_id(unsigned int id);
103static char *_ascii_str_canonical(const char *in, char *out, int maxlen);
105static void dt_opencl_priority_parse(dt_opencl_t *cl, char *configstr, int *priority_list, int *mandatory);
107static void dt_opencl_update_priorities();
112
113
114int dt_opencl_get_device_info(dt_opencl_t *cl, cl_device_id device, cl_device_info param_name, void **param_value,
115 size_t *param_value_size)
116{
117 *param_value_size = SIZE_MAX;
118
119 // 1. figure out how much memory is needed
120 cl_int err = (cl->dlocl->symbols->dt_clGetDeviceInfo)(device, param_name, 0, NULL, param_value_size);
121 if(err != CL_SUCCESS)
122 {
124 "[dt_opencl_get_device_info] could not query the actual size in bytes of info %d: %i\n", param_name, err);
125 goto error;
126 }
127
128 // 2. did we /actually/ get the size?
129 if(*param_value_size == SIZE_MAX || *param_value_size == 0)
130 {
131 // both of these sizes make no sense. either i failed to parse spec, or opencl implementation bug?
133 "[dt_opencl_get_device_info] ERROR: no size returned, or zero size returned for data %d: %" G_GSIZE_FORMAT "\n",
134 param_name, *param_value_size);
135 err = CL_INVALID_VALUE; // FIXME: anything better?
136 goto error;
137 }
138
139 // 3. make sure that *param_value points to big-enough memory block
140 {
141 void *ptr = realloc(*param_value, *param_value_size);
142 if(IS_NULL_PTR(ptr))
143 {
145 "[dt_opencl_get_device_info] memory allocation failed! tried to allocate %" G_GSIZE_FORMAT " bytes for data %d: %i",
146 *param_value_size, param_name, err);
147 err = CL_OUT_OF_HOST_MEMORY;
148 goto error;
149 }
150
151 // allocation succeeded, update pointer.
152 *param_value = ptr;
153 }
154
155 // 4. actually get the value
156 err = (cl->dlocl->symbols->dt_clGetDeviceInfo)(device, param_name, *param_value_size, *param_value, NULL);
157 if(err != CL_SUCCESS)
158 {
159 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_get_device_info] could not query info %d: %i\n", param_name, err);
160 goto error;
161 }
162
163 return CL_SUCCESS;
164
165error:
166 dt_free(*param_value);
167 *param_value_size = 0;
168 return err;
169}
170
171int dt_opencl_avoid_atomics(const int devid)
172{
174 return (!cl->inited || devid < 0) ? 0 : cl->dev[devid].avoid_atomics;
175}
176
177int dt_opencl_micro_nap(const int devid)
178{
180 return (!cl->inited || devid < 0) ? 0 : cl->dev[devid].micro_nap;
181}
182
183gboolean dt_opencl_use_pinned_memory(const int devid)
184{
186 if(!cl->inited || devid < 0) return FALSE;
187 return cl->dev[devid].pinned_memory;
188}
189
190gboolean dt_opencl_is_pinned_memory(cl_mem mem)
191{
192 const cl_mem_flags flags = dt_opencl_get_mem_flags(mem);
193 return (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_ALLOC_HOST_PTR);
194}
195
196void dt_opencl_write_device_config(const int devid)
197{
198 if(devid < 0) return;
200 gchar buf[256] = { 0 };
201 gchar key_device[256] = { 0 };
202 g_snprintf(key_device, 254, "%s/%i/%s", DT_CLDEVICE_HEAD, devid, cl->dev[devid].cname);
203
204 g_snprintf(buf, sizeof(buf), "%s/avoid_atomics", key_device);
205 dt_conf_set_int(buf, cl->dev[devid].avoid_atomics);
206
207 g_snprintf(buf, sizeof(buf), "%s/micro_nap", key_device);
208 dt_conf_set_int(buf, cl->dev[devid].micro_nap);
209
210 g_snprintf(buf, sizeof(buf), "%s/pinned_memory", key_device);
212
213 g_snprintf(buf, sizeof(buf), "%s/wd", key_device);
214 dt_conf_set_int(buf, cl->dev[devid].clroundup_wd);
215
216 g_snprintf(buf, sizeof(buf), "%s/ht", key_device);
217 dt_conf_set_int(buf, cl->dev[devid].clroundup_ht);
218
219 g_snprintf(buf, sizeof(buf), "%s/event_handles", key_device);
220 dt_conf_set_int(buf, cl->dev[devid].event_handles);
221
222 g_snprintf(buf, sizeof(buf), "%s/disabled", key_device);
223 dt_conf_set_int(buf, cl->dev[devid].disabled & 1);
224
225 g_snprintf(buf, sizeof(buf), "%s/id%i/forced_headroom", key_device, devid);
226 dt_conf_set_int(buf, cl->dev[devid].forced_headroom);
227}
228
229static int _dt_opencl_get_conf_int(const gchar *key_device, const gchar *conf_name, gboolean *safety_ok)
230{
231 int res = 0;
232 gchar *key = g_strconcat(key_device, "/", conf_name, NULL);
233 const gboolean existing_device = dt_conf_key_not_empty(key);
234 if(existing_device)
235 res = dt_conf_get_int(key);
236 else
237 {
238 dt_print(DT_DEBUG_OPENCL, "Warning: conf '%s' not found in anselrc.\n", key);
239 *safety_ok = FALSE;
240 }
241
242 dt_free(key);
243 return res;
244}
245
246gboolean dt_opencl_read_device_config(const int devid)
247{
248 if(devid < 0) return FALSE;
250 gchar key_device[256] = { 0 };
251 g_snprintf(key_device, 254, "%s/%i/%s", DT_CLDEVICE_HEAD, devid, cl->dev[devid].cname);
252 gboolean safety_ok = TRUE;
253
254 int avoid_atomics = _dt_opencl_get_conf_int(key_device, "avoid_atomics", &safety_ok);
255 int micro_nap = _dt_opencl_get_conf_int(key_device, "micro_nap", &safety_ok);
256 int pinned_memory = _dt_opencl_get_conf_int(key_device, "pinned_memory", &safety_ok);
257 int wd = _dt_opencl_get_conf_int(key_device, "wd", &safety_ok);
258 int ht = _dt_opencl_get_conf_int(key_device, "ht", &safety_ok);
259 int event_handles = _dt_opencl_get_conf_int(key_device, "event_handles", &safety_ok);
260 int disabled = _dt_opencl_get_conf_int(key_device, "disabled", &safety_ok);
261
262 // some rudimentary safety checking if string seems to be ok
263 safety_ok |= (wd > 1) && (wd < 513) && (ht > 1) && (ht < 513);
264
265 if(safety_ok)
266 {
267 cl->dev[devid].avoid_atomics = avoid_atomics;
268 cl->dev[devid].micro_nap = micro_nap;
269 cl->dev[devid].pinned_memory = pinned_memory;
270 cl->dev[devid].clroundup_wd = wd;
271 cl->dev[devid].clroundup_ht = ht;
272 cl->dev[devid].event_handles = event_handles;
273 cl->dev[devid].disabled = disabled;
274 }
275 else // if there is something wrong with the found conf key reset to defaults
276 {
277 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_read_device_config] malformed data '%s'\n", key_device);
278 }
279
280 // do some safety housekeeping
281 cl->dev[devid].avoid_atomics &= 1;
283 cl->dev[devid].micro_nap = CLAMP(cl->dev[devid].micro_nap, 250, 1000000);
284 if((cl->dev[devid].clroundup_wd < 2) || (cl->dev[devid].clroundup_wd > 512))
285 cl->dev[devid].clroundup_wd = 16;
286 if((cl->dev[devid].clroundup_ht < 2) || (cl->dev[devid].clroundup_ht > 512))
287 cl->dev[devid].clroundup_ht = 16;
288 if(cl->dev[devid].event_handles < 0)
289 cl->dev[devid].event_handles = 0x40961440;
290
291 cl->dev[devid].use_events = (cl->dev[devid].event_handles != 0) ? 1 : 0;
292 cl->dev[devid].disabled &= 1;
293
294 // Also take care of extended device data, these are not only device specific but also depend on the devid
295 g_snprintf(key_device, 254, "%s/%i/%s/id%i/forced_headroom", DT_CLDEVICE_HEAD, devid, cl->dev[devid].cname, devid);
296 if(dt_conf_key_not_empty(key_device))
297 {
298 int forced_headroom = dt_conf_get_int(key_device);
299 if(forced_headroom > 0) cl->dev[devid].forced_headroom = forced_headroom;
300 }
301 else // this is used if updating to 4.0 or fresh installs; see commenting _opencl_get_unused_device_mem()
302 cl->dev[devid].forced_headroom = dt_conf_get_int64("memory_opencl_headroom");
303
305 return !safety_ok;
306}
307
309{
311 if(IS_NULL_PTR(cl)) return 0;
312
313 return cl->num_detected_devs;
314}
315
317{
319 if(IS_NULL_PTR(cl) || detected < 0 || detected >= cl->num_detected_devs) return NULL;
320
321 return cl->detected_devs + detected;
322}
323
324gboolean dt_opencl_detected_device_enabled(const int detected)
325{
327 if(IS_NULL_PTR(device)) return FALSE;
328
329 gchar key[256] = { 0 };
330 g_snprintf(key, sizeof(key), "%s/%d/%s/disabled", DT_CLDEVICE_HEAD, device->config_id,
331 !IS_NULL_PTR(device->cname) ? device->cname : "");
332 const gboolean disabled = dt_conf_key_not_empty(key) ? dt_conf_get_int(key) : (device->disabled & 1);
333
334 return !disabled;
335}
336
337int dt_opencl_set_detected_device_enabled(const int detected, const gboolean enabled)
338{
340 if(IS_NULL_PTR(device)) return -1;
341
342 gchar key[256] = { 0 };
343 g_snprintf(key, sizeof(key), "%s/%d/%s/disabled", DT_CLDEVICE_HEAD, device->config_id,
344 !IS_NULL_PTR(device->cname) ? device->cname : "");
345 dt_conf_set_int(key, enabled ? 0 : 1);
346
348 cl->detected_devs[detected].disabled = enabled ? 0 : 1;
349
350 gboolean opencl_enabled = enabled;
351 if(!opencl_enabled)
352 {
353 // The global OpenCL preference is derived from all detected GPUs. We are looking
354 // for any GPU still enabled before turning OpenCL off globally.
355 for(int dev = 0; dev < cl->num_detected_devs; dev++)
356 {
358 {
359 opencl_enabled = TRUE;
360 break;
361 }
362 }
363 }
364
365 dt_conf_set_bool("opencl", opencl_enabled);
366 return 0;
367}
368
369gboolean dt_opencl_detected_device_pinned_memory(const int detected)
370{
372 if(IS_NULL_PTR(device)) return FALSE;
373
374 gchar key[256] = { 0 };
375 g_snprintf(key, sizeof(key), "%s/%d/%s/pinned_memory", DT_CLDEVICE_HEAD, device->config_id,
376 !IS_NULL_PTR(device->cname) ? device->cname : "");
377 const int pinned_memory = dt_conf_key_not_empty(key) ? dt_conf_get_int(key) : device->pinned_memory;
378
379 return pinned_memory & DT_OPENCL_PINNING_ON;
380}
381
382int dt_opencl_set_detected_device_pinned_memory(const int detected, const gboolean enabled)
383{
385 if(IS_NULL_PTR(device)) return -1;
386
387 gchar key[256] = { 0 };
388 const int pinned_memory = enabled ? DT_OPENCL_PINNING_ON : DT_OPENCL_PINNING_OFF;
389 g_snprintf(key, sizeof(key), "%s/%d/%s/pinned_memory", DT_CLDEVICE_HEAD, device->config_id,
390 !IS_NULL_PTR(device->cname) ? device->cname : "");
391 dt_conf_set_int(key, pinned_memory);
392
394 cl->detected_devs[detected].pinned_memory = pinned_memory;
395 return 0;
396}
397
398size_t dt_opencl_detected_device_headroom(const int detected)
399{
401 if(IS_NULL_PTR(device)) return 0;
402
403 gchar key[256] = { 0 };
404 g_snprintf(key, sizeof(key), "%s/%d/%s/id%d/forced_headroom", DT_CLDEVICE_HEAD, device->config_id,
405 !IS_NULL_PTR(device->cname) ? device->cname : "", device->config_id);
406
407 return dt_conf_key_not_empty(key) ? (size_t)dt_conf_get_int(key) : device->forced_headroom;
408}
409
410int dt_opencl_set_detected_device_headroom(const int detected, const size_t headroom)
411{
413 if(IS_NULL_PTR(device)) return -1;
414
415 gchar key[256] = { 0 };
416 g_snprintf(key, sizeof(key), "%s/%d/%s/id%d/forced_headroom", DT_CLDEVICE_HEAD, device->config_id,
417 !IS_NULL_PTR(device->cname) ? device->cname : "", device->config_id);
418 const int clamped_headroom = (int)MIN(headroom, (size_t)G_MAXINT);
419 dt_conf_set_int(key, clamped_headroom);
420
422 cl->detected_devs[detected].forced_headroom = clamped_headroom;
423 return 0;
424}
425
426// returns 0 if all ok or an error if we failed to init this device
427static int dt_opencl_device_init(dt_opencl_t *cl, const int dev, cl_device_id *devices, const int k)
428{
429 int res;
430 cl_int err;
431 gboolean lock_initialized = FALSE;
432
433 memset(cl->dev[dev].program, 0x0, sizeof(cl_program) * DT_OPENCL_MAX_PROGRAMS);
434 memset(cl->dev[dev].program_used, 0x0, sizeof(int) * DT_OPENCL_MAX_PROGRAMS);
435 memset(cl->dev[dev].kernel, 0x0, sizeof(cl_kernel) * DT_OPENCL_MAX_KERNELS);
436 memset(cl->dev[dev].kernel_used, 0x0, sizeof(int) * DT_OPENCL_MAX_KERNELS);
437 cl->dev[dev].context = NULL;
438 cl->dev[dev].cmd_queue = NULL;
439 cl->dev[dev].eventlist = NULL;
440 cl->dev[dev].eventtags = NULL;
441 cl->dev[dev].numevents = 0;
442 cl->dev[dev].eventsconsolidated = 0;
443 cl->dev[dev].maxevents = 0;
444 cl->dev[dev].maxeventslot = 0;
445 cl->dev[dev].lostevents = 0;
446 cl->dev[dev].totalevents = 0;
447 cl->dev[dev].totalsuccess = 0;
448 cl->dev[dev].totallost = 0;
449 cl->dev[dev].summary = CL_COMPLETE;
450 cl->dev[dev].used_global_mem = 0;
451 cl->dev[dev].nvidia_sm_20 = 0;
452 cl->dev[dev].vendor = NULL;
453 cl->dev[dev].name = NULL;
454 cl->dev[dev].cname = NULL;
455 cl->dev[dev].options = NULL;
456 cl->dev[dev].options_md5 = NULL;
457 cl->dev[dev].memory_in_use = 0;
458 cl->dev[dev].peak_memory = 0;
459 cl->dev[dev].used_available = 0;
460 // setting sane/conservative defaults at first
461 cl->dev[dev].avoid_atomics = 0;
462 cl->dev[dev].micro_nap = 250;
464 cl->dev[dev].clroundup_wd = 16;
465 cl->dev[dev].clroundup_ht = 16;
466 cl->dev[dev].use_events = 1;
467 cl->dev[dev].event_handles = 128;
468 cl->dev[dev].disabled = 0;
469 cl->dev[dev].forced_headroom = 0;
470 cl->dev[dev].runtime_error = 0;
471 cl_device_id devid = cl->dev[dev].devid = devices[k];
472
473 char *infostr = NULL;
474 size_t infostr_size;
475
476 char *cname = NULL;
477 size_t cname_size;
478
479 char *vendor = NULL;
480 size_t vendor_size;
481
482 char *driverversion = NULL;
483 size_t driverversion_size;
484
485 char *deviceversion = NULL;
486 size_t deviceversion_size;
487
488 size_t infoint;
489 size_t *infointtab = NULL;
490 cl_device_type type;
491 cl_bool image_support = 0;
492 cl_bool device_available = 0;
493 cl_uint vendor_id = 0;
494 cl_bool little_endian = 0;
495 cl_platform_id platform_id = 0;
496
497 char *dtcache = calloc(PATH_MAX, sizeof(char));
498 char *cachedir = calloc(PATH_MAX, sizeof(char));
499 char *devname = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
500 char *drvversion = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
501 char *platform_name = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
502 char *platform_vendor = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
503
504 char kerneldir[PATH_MAX] = { 0 };
505 char *filename = calloc(PATH_MAX, sizeof(char));
506 char *confentry = calloc(PATH_MAX, sizeof(char));
507 char *binname = calloc(PATH_MAX, sizeof(char));
508 dt_print_nts(DT_DEBUG_OPENCL, "\n[dt_opencl_device_init]\n");
509
510 // test GPU availability, vendor, memory, image support etc:
511 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &device_available, NULL);
512
513 err = dt_opencl_get_device_info(cl, devid, CL_DEVICE_VENDOR, (void **)&vendor, &vendor_size);
514 if(err != CL_SUCCESS)
515 {
516 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get vendor name of device %d: %i\n", k, err);
517 res = -1;
518 goto end;
519 }
520
521 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), &vendor_id, NULL);
522
523 err = dt_opencl_get_device_info(cl, devid, CL_DEVICE_NAME, (void **)&infostr, &infostr_size);
524 if(err != CL_SUCCESS)
525 {
526 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get device name of device %d: %i\n", k, err);
527 res = -1;
528 goto end;
529 }
530
531 // get the canonical device name
532 cname_size = infostr_size;
533 cname = malloc(cname_size);
534 _ascii_str_canonical(infostr, cname, cname_size);
535 cl->dev[dev].name = strdup(infostr);
536 cl->dev[dev].cname = strdup(cname);
537
538 // take every detected device into account of checksum
539 cl->crc = crc32(cl->crc, (const unsigned char *)infostr, strlen(infostr));
540
541 err = (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform_id, NULL);
542 if(err != CL_SUCCESS)
543 {
544 g_strlcpy(platform_vendor, "no platform id", DT_OPENCL_CBUFFSIZE);
545 g_strlcpy(platform_name, "no platform id", DT_OPENCL_CBUFFSIZE);
546 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get platform id for device `%s' : %i\n", cl->dev[dev].name, err);
547 }
548 else
549 {
550 err = (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform_id, CL_PLATFORM_NAME, DT_OPENCL_CBUFFSIZE, platform_name, NULL);
551 if(err != CL_SUCCESS)
552 {
553 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get platform name for device `%s' : %i\n", cl->dev[dev].name, err);
554 g_strlcpy(platform_name, "???", DT_OPENCL_CBUFFSIZE);
555 }
556
557 err = (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform_id, CL_PLATFORM_VENDOR, DT_OPENCL_CBUFFSIZE, platform_vendor, NULL);
558 if(err != CL_SUCCESS)
559 {
560 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get platform vendor for device `%s' : %i\n", cl->dev[dev].name, err);
561 g_strlcpy(platform_vendor, "???", DT_OPENCL_CBUFFSIZE);
562 }
563 }
564
565 const gboolean newdevice = dt_opencl_read_device_config(dev);
566 dt_print_nts(DT_DEBUG_OPENCL, " DEVICE: %d: '%s'%s\n", k, infostr, (newdevice) ? ", NEW" : "" );
567 dt_print_nts(DT_DEBUG_OPENCL, " CANONICAL NAME: %s\n", cname);
568 dt_print_nts(DT_DEBUG_OPENCL, " PLATFORM NAME & VENDOR: %s, %s\n", platform_name, platform_vendor);
569
570 err = dt_opencl_get_device_info(cl, devid, CL_DRIVER_VERSION, (void **)&driverversion, &driverversion_size);
571 if(err != CL_SUCCESS)
572 {
573 dt_print_nts(DT_DEBUG_OPENCL, " *** driver version not available *** %i\n", err);
574 res = -1;
575 cl->dev[dev].disabled |= 1;
576 goto end;
577 }
578
579 err = dt_opencl_get_device_info(cl, devid, CL_DEVICE_VERSION, (void **)&deviceversion, &deviceversion_size);
580 if(err != CL_SUCCESS)
581 {
582 dt_print_nts(DT_DEBUG_OPENCL, " *** device version not available *** %i\n", err);
583 res = -1;
584 cl->dev[dev].disabled |= 1;
585 goto end;
586 }
587
588 // take every detected device driver into account of checksum
589 cl->crc = crc32(cl->crc, (const unsigned char *)deviceversion, deviceversion_size);
590
591 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
592 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL);
593 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t),
594 &(cl->dev[dev].max_image_height), NULL);
595 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t),
596 &(cl->dev[dev].max_image_width), NULL);
597 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong),
598 &(cl->dev[dev].max_mem_alloc), NULL);
599 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), &little_endian, NULL);
600
601 cl->dev[dev].cltype = (unsigned int)type;
602
603
604 if(!strncasecmp(vendor, "NVIDIA", 6))
605 {
606 // very lame attempt to detect support for atomic float add in global memory.
607 // we need compute model sm_20, but let's try for all nvidia devices :(
609 }
610
611 const gboolean is_cpu_device = (type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU;
612
613 // micro_nap can be made less conservative on current systems at least if not on-CPU
614 if(newdevice)
615 cl->dev[dev].micro_nap = (is_cpu_device) ? 1000 : 250;
616
617 dt_print_nts(DT_DEBUG_OPENCL, " DRIVER VERSION: %s\n", driverversion);
618 dt_print_nts(DT_DEBUG_OPENCL, " DEVICE VERSION: %s%s\n", deviceversion,
619 cl->dev[dev].nvidia_sm_20 ? ", SM_20 SUPPORT" : "");
620 dt_print_nts(DT_DEBUG_OPENCL, " DEVICE_TYPE: %s%s%s\n",
621 ((type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU) ? "CPU" : "",
622 ((type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) ? "GPU" : "",
623 (type & CL_DEVICE_TYPE_ACCELERATOR) ? ", Accelerator" : "" );
624
625 if(is_cpu_device && newdevice)
626 {
627 dt_print_nts(DT_DEBUG_OPENCL, " *** discarding new device as emulated by CPU ***\n");
628 cl->dev[dev].disabled |= 1;
629 res = -1;
630 goto end;
631 }
632
633 if(!device_available)
634 {
635 dt_print_nts(DT_DEBUG_OPENCL, " *** device is not available ***\n");
636 res = -1;
637 goto end;
638 }
639
640 if(!image_support)
641 {
642 dt_print_nts(DT_DEBUG_OPENCL, " *** The OpenCL driver doesn't provide image support. See also 'clinfo' output ***\n");
643 res = -1;
644 cl->dev[dev].disabled |= 1;
645 goto end;
646 }
647
648 if(!little_endian)
649 {
650 dt_print_nts(DT_DEBUG_OPENCL, " *** device is not little endian ***\n");
651 res = -1;
652 cl->dev[dev].disabled |= 1;
653 goto end;
654 }
655
656 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong),
657 &(cl->dev[dev].max_global_mem), NULL);
658 if(cl->dev[dev].max_global_mem < (uint64_t)512ul * 1024ul * 1024ul)
659 {
660 dt_print_nts(DT_DEBUG_OPENCL, " *** insufficient global memory (%" PRIu64 "MB) ***\n",
661 cl->dev[dev].max_global_mem / 1024 / 1024);
662 res = -1;
663 cl->dev[dev].disabled |= 1;
664 goto end;
665 }
666
667 cl->dev[dev].vendor = strdup(dt_opencl_get_vendor_by_id(vendor_id));
668
669 const gboolean is_blacklisted = dt_opencl_check_driver_blacklist(deviceversion);
670
671 // disable device for now if this is the first time detected and blacklisted too.
672 if(newdevice && is_blacklisted)
673 {
674 // To keep installations we look for the old blacklist conf key
675 const gboolean old_blacklist = dt_conf_get_bool("opencl_disable_drivers_blacklist");
676 cl->dev[dev].disabled |= (old_blacklist) ? 0 : 1;
677 if(cl->dev[dev].disabled)
678 dt_print_nts(DT_DEBUG_OPENCL, " *** new device is blacklisted ***\n");
679 res = -1;
680 goto end;
681 }
682
683 dt_print_nts(DT_DEBUG_OPENCL, " GLOBAL MEM SIZE: %.0f MB\n", (double)cl->dev[dev].max_global_mem / 1024.0 / 1024.0);
684 dt_print_nts(DT_DEBUG_OPENCL, " MAX MEM ALLOC: %.0f MB\n", (double)cl->dev[dev].max_mem_alloc / 1024.0 / 1024.0);
685 dt_print_nts(DT_DEBUG_OPENCL, " MAX IMAGE SIZE: %" G_GSIZE_FORMAT " x %" G_GSIZE_FORMAT "\n", cl->dev[dev].max_image_width, cl->dev[dev].max_image_height);
686 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(infoint), &infoint, NULL);
687 dt_print_nts(DT_DEBUG_OPENCL, " MAX WORK GROUP SIZE: %" G_GSIZE_FORMAT "\n", infoint);
688 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(infoint), &infoint, NULL);
689 dt_print_nts(DT_DEBUG_OPENCL, " MAX WORK ITEM DIMENSIONS: %" G_GSIZE_FORMAT "\n", infoint);
690
691 size_t infointtab_size;
692 err = dt_opencl_get_device_info(cl, devid, CL_DEVICE_MAX_WORK_ITEM_SIZES, (void **)&infointtab, &infointtab_size);
693 if(err == CL_SUCCESS)
694 {
695 dt_print_nts(DT_DEBUG_OPENCL, " MAX WORK ITEM SIZES: [ ");
696 for(size_t i = 0; i < infoint; i++) dt_print_nts(DT_DEBUG_OPENCL, "%" G_GSIZE_FORMAT " ", infointtab[i]);
697 dt_free(infointtab);
699 }
700 else
701 {
702 dt_print_nts(DT_DEBUG_OPENCL, " *** could not get maximum work item sizes ***\n");
703 res = -1;
704 cl->dev[dev].disabled |= 1;
705 goto end;
706 }
707
708 const gboolean pinning = (cl->dev[dev].pinned_memory & DT_OPENCL_PINNING_ON);
709 dt_print_nts(DT_DEBUG_OPENCL, " PINNED MEMORY TRANSFER: %s\n", pinning ? "WANTED" : "NO");
710 dt_print_nts(DT_DEBUG_OPENCL, " FORCED HEADROOM: %" G_GSIZE_FORMAT "\n", cl->dev[dev].forced_headroom);
711 dt_print_nts(DT_DEBUG_OPENCL, " AVOID ATOMICS: %s\n", (cl->dev[dev].avoid_atomics) ? "YES" : "NO");
712 dt_print_nts(DT_DEBUG_OPENCL, " MICRO NAP: %i\n", cl->dev[dev].micro_nap);
713 dt_print_nts(DT_DEBUG_OPENCL, " ROUNDUP WIDTH: %i\n", cl->dev[dev].clroundup_wd);
714 dt_print_nts(DT_DEBUG_OPENCL, " ROUNDUP HEIGHT: %i\n", cl->dev[dev].clroundup_ht);
715 dt_print_nts(DT_DEBUG_OPENCL, " CHECK EVENT HANDLES: %i\n", cl->dev[dev].event_handles);
716 dt_print_nts(DT_DEBUG_OPENCL, " DEFAULT DEVICE: %s\n", (type & CL_DEVICE_TYPE_DEFAULT) ? "YES" : "NO");
717
718 if(type & CL_DEVICE_TYPE_GPU)
719 {
720 dt_opencl_detected_device_t *detected_devs
721 = g_realloc(cl->detected_devs, sizeof(*cl->detected_devs) * (cl->num_detected_devs + 1));
722 if(!IS_NULL_PTR(detected_devs))
723 {
724 cl->detected_devs = detected_devs;
726 detected->config_id = dev;
727 detected->name = g_strdup(cl->dev[dev].name);
728 detected->cname = g_strdup(cl->dev[dev].cname);
729 detected->cltype = cl->dev[dev].cltype;
730 detected->disabled = cl->dev[dev].disabled & 1;
731 detected->pinned_memory = cl->dev[dev].pinned_memory;
732 detected->forced_headroom = cl->dev[dev].forced_headroom;
733 cl->num_detected_devs++;
734 }
735 }
736
737 if(cl->dev[dev].disabled)
738 {
739 dt_print_nts(DT_DEBUG_OPENCL, " *** marked as disabled ***\n");
740 res = -1;
741 goto end;
742 }
743 dt_print_nts(DT_DEBUG_OPENCL, " *** Device enabled ***\n");
744
745 dt_pthread_mutex_init(&cl->dev[dev].lock, NULL);
746 lock_initialized = TRUE;
747
748 cl->dev[dev].context = (cl->dlocl->symbols->dt_clCreateContext)(0, 1, &devid, NULL, NULL, &err);
749 if(err != CL_SUCCESS)
750 {
751 dt_print_nts(DT_DEBUG_OPENCL, " *** could not create context *** %i\n", err);
752 res = -1;
753 goto end;
754 }
755 // create a command queue for first device the context reported
757 cl->dev[dev].context, devid, (darktable.unmuted & DT_DEBUG_PERF) ? CL_QUEUE_PROFILING_ENABLE : 0, &err);
758 if(err != CL_SUCCESS)
759 {
760 dt_print_nts(DT_DEBUG_OPENCL, " *** could not create command queue *** %i\n", err);
761 res = -1;
762 goto end;
763 }
764
765 dt_loc_get_kerneldir(kerneldir, sizeof(kerneldir));
766 dt_print_nts(DT_DEBUG_OPENCL, " KERNEL SOURCE DIRECTORY: %s\n", kerneldir);
767
768 double tstart, tend, tdiff;
769 dt_loc_get_user_cache_dir(dtcache, PATH_MAX * sizeof(char));
770
771 int len = MIN(strlen(infostr),1024 * sizeof(char));;
772 int j = 0;
773 // remove non-alphanumeric chars from device name
774 for(int i = 0; i < len; i++)
775 if(isalnum(infostr[i])) devname[j++] = infostr[i];
776 devname[j] = 0;
777 len = MIN(strlen(driverversion), 1024 * sizeof(char));
778 j = 0;
779 // remove non-alphanumeric chars from driver version
780 for(int i = 0; i < len; i++)
781 if(isalnum(driverversion[i])) drvversion[j++] = driverversion[i];
782 drvversion[j] = 0;
783 snprintf(cachedir, PATH_MAX * sizeof(char), "%s" G_DIR_SEPARATOR_S "cached_kernels_for_%s_%s", dtcache, devname, drvversion);
784
785 dt_print_nts(DT_DEBUG_OPENCL, " KERNEL BUILD DIRECTORY: %s\n", cachedir);
786
787 if(g_mkdir_with_parents(cachedir, 0700) == -1)
788 {
789 dt_print_nts(DT_DEBUG_OPENCL, " *** failed to create kernel directory `%s' ***\n", cachedir);
790 res = -1;
791 goto end;
792 }
793
794 dt_concat_path_file(filename, kerneldir, "programs.conf");
795
796 char *escapedkerneldir = NULL;
797#ifndef __APPLE__
798 escapedkerneldir = g_strdup_printf("\"%s\"", kerneldir);
799#else
800 escapedkerneldir = dt_util_str_replace(kerneldir, " ", "\\ ");
801#endif
802
803 gchar* compile_option_name_cname = g_strdup_printf("%s/%i/%s/building", DT_CLDEVICE_HEAD, dev, cl->dev[dev].cname);
804 const char* compile_opt = NULL;
805
806 if(dt_conf_key_exists(compile_option_name_cname))
807 compile_opt = dt_conf_get_string_const(compile_option_name_cname);
808 else
809 {
810 switch(vendor_id)
811 {
813 compile_opt = DT_OPENCL_DEFAULT_COMPILE_AMD;
814 break;
817 break;
820 break;
821 default:
822 compile_opt = DT_OPENCL_DEFAULT_COMPILE;
823 }
824 }
825 gchar *my_option = g_strdup(compile_opt);
826 dt_conf_set_string(compile_option_name_cname, my_option);
827
828 cl->dev[dev].options = g_strdup_printf("-w %s %s -D%s=1 -I%s",
829 my_option,
830 (cl->dev[dev].nvidia_sm_20 ? " -DNVIDIA_SM_20=1" : ""),
831 dt_opencl_get_vendor_by_id(vendor_id), escapedkerneldir);
832 // Keep kernel checksum stable when the runtime kernel path changes (e.g. AppImage mount point).
833 const char *kerneldir_token = "<ansel-kernels>";
834 char *escapedkerneldir_md5 = NULL;
835#ifndef __APPLE__
836 escapedkerneldir_md5 = g_strdup_printf("\"%s\"", kerneldir_token);
837#else
838 escapedkerneldir_md5 = g_strdup(kerneldir_token);
839#endif
840 cl->dev[dev].options_md5 = g_strdup_printf("-w %s %s -D%s=1 -I%s",
841 my_option,
842 (cl->dev[dev].nvidia_sm_20 ? " -DNVIDIA_SM_20=1" : ""),
843 dt_opencl_get_vendor_by_id(vendor_id), escapedkerneldir_md5);
844
845 dt_print_nts(DT_DEBUG_OPENCL, " CL COMPILER OPTION: %s\n", my_option);
846
847 dt_free(compile_option_name_cname);
848 dt_free(my_option);
849 dt_free(escapedkerneldir);
850 dt_free(escapedkerneldir_md5);
851 escapedkerneldir = NULL;
852
853 const char *clincludes[DT_OPENCL_MAX_INCLUDES] = { "rgb_norms.h", "noise_generator.h", "color_conversion.h", "colorspaces.cl", "colorspace.h", "common.h", NULL };
854 char *includemd5[DT_OPENCL_MAX_INCLUDES] = { NULL };
855 dt_opencl_md5sum(clincludes, includemd5);
856
857 if(newdevice) // so far the device seems to be ok. Make sure to write&export the conf database to
858 {
861 }
862
863 // now load all darktable cl kernels.
864 // TODO: compile as a job?
865 tstart = dt_get_wtime();
866 FILE *f = g_fopen(filename, "rb");
867 if(f)
868 {
869 while(!feof(f))
870 {
871 int prog = -1;
872 gchar *confline_pattern = g_strdup_printf("%%%" G_GSIZE_FORMAT "[^\n]\n", PATH_MAX * sizeof(char) - 1);
873 int rd = fscanf(f, confline_pattern, confentry);
874 dt_free(confline_pattern);
875 if(rd != 1) continue;
876 // remove comments:
877 size_t end = strlen(confentry);
878 for(size_t pos = 0; pos < end; pos++)
879 if(confentry[pos] == '#')
880 {
881 confentry[pos] = '\0';
882 for(int l = pos - 1; l >= 0; l--)
883 {
884 if(confentry[l] == ' ')
885 confentry[l] = '\0';
886 else
887 break;
888 }
889 break;
890 }
891 if(confentry[0] == '\0') continue;
892
893 const char *programname = NULL, *programnumber = NULL;
894 gchar **tokens = g_strsplit_set(confentry, " \t", 2);
895 if(tokens)
896 {
897 programname = tokens[0];
898 if(tokens[0])
899 programnumber = tokens[1]; // if the 0st wasn't NULL then we have at least the terminating NULL in [1]
900 }
901
902 prog = programnumber ? strtol(programnumber, NULL, 10) : -1;
903
904 if(IS_NULL_PTR(programname) || programname[0] == '\0' || prog < 0)
905 {
906 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_device_init] malformed entry in programs.conf `%s'; ignoring it!\n", confentry);
907 continue;
908 }
909 dt_concat_path_file(filename, kerneldir, programname);
910 gchar *program_bin = g_strdup_printf("%s.bin", programname);
911 dt_concat_path_file(binname, cachedir, program_bin);
912 dt_free(program_bin);
913
914 dt_vprint(DT_DEBUG_OPENCL, "[dt_opencl_device_init] testing program `%s' ..\n", programname);
915 int loaded_cached;
916 char md5sum[33];
917 if(dt_opencl_load_program(dev, prog, filename, binname, cachedir, md5sum, includemd5, &loaded_cached))
918 {
919 if(!loaded_cached)
921
922 if(dt_opencl_build_program(dev, prog, binname, cachedir, md5sum, loaded_cached) != CL_SUCCESS)
923 {
924 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_device_init] failed to compile program `%s'!\n", programname);
925 fclose(f);
926 g_strfreev(tokens);
927 res = -1;
928 goto end;
929 }
930 }
931
932 g_strfreev(tokens);
933 }
934
935 fclose(f);
936 tend = dt_get_wtime();
937 tdiff = tend - tstart;
938 dt_print_nts(DT_DEBUG_OPENCL, " KERNEL LOADING TIME: %2.4lf sec\n", tdiff);
939 }
940 else
941 {
942 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_device_init] could not open `%s'!\n", filename);
943 res = -1;
944 goto end;
945 }
946 for(int n = 0; n < DT_OPENCL_MAX_INCLUDES; n++) dt_free(includemd5[n]);
947 res = 0;
948
949end:
950 // we always write the device config to keep track of disabled devices
952
953 if(res != 0)
954 {
955 if(lock_initialized)
956 {
957 for(int n = 0; n < DT_OPENCL_MAX_KERNELS; n++)
958 if(cl->dev[dev].kernel_used[n]) (cl->dlocl->symbols->dt_clReleaseKernel)(cl->dev[dev].kernel[n]);
959 for(int n = 0; n < DT_OPENCL_MAX_PROGRAMS; n++)
960 if(cl->dev[dev].program_used[n]) (cl->dlocl->symbols->dt_clReleaseProgram)(cl->dev[dev].program[n]);
961 if(!IS_NULL_PTR(cl->dev[dev].cmd_queue))
963 if(!IS_NULL_PTR(cl->dev[dev].context))
964 (cl->dlocl->symbols->dt_clReleaseContext)(cl->dev[dev].context);
966 }
967
968 dt_free(cl->dev[dev].vendor);
969 dt_free(cl->dev[dev].name);
970 dt_free(cl->dev[dev].cname);
971 dt_free(cl->dev[dev].options);
972 dt_free(cl->dev[dev].options_md5);
973 }
974
975 dt_free(infostr);
976 dt_free(cname);
977 dt_free(vendor);
978 dt_free(driverversion);
979 dt_free(deviceversion);
980
981 dt_free(dtcache);
982 dt_free(cachedir);
983 dt_free(devname);
984 dt_free(drvversion);
985 dt_free(platform_name);
986 dt_free(platform_vendor);
987
988 dt_free(filename);
989 dt_free(confentry);
990 dt_free(binname);
991
992 return res;
993}
994
995void dt_opencl_init(dt_opencl_t *cl, const gboolean exclude_opencl, const gboolean print_statistics)
996{
998
999 dt_pthread_mutex_init(&cl->lock, NULL);
1000 cl->inited = 0;
1001 cl->enabled = 0;
1002 cl->stopped = 0;
1003 cl->error_count = 0;
1004 cl->print_statistics = print_statistics;
1005
1006 // work-around to fix a bug in some AMD OpenCL compilers, which would fail parsing certain numerical
1007 // constants if locale is different from "C".
1008 // we save the current locale, set locale to "C", and restore the previous setting after OpenCL is
1009 // initialized
1010 char *locale = strdup(setlocale(LC_ALL, NULL));
1011 setlocale(LC_ALL, "C");
1012
1013 cl->crc = 5781;
1014 cl->dlocl = NULL;
1015 cl->dev_priority_image = 0;
1016 cl->dev_priority_preview = 0;
1017 cl->dev_priority_export = 0;
1018 cl->dev_priority_thumbnail = 0;
1019 cl->num_detected_devs = 0;
1020 cl->detected_devs = NULL;
1021
1022 if(exclude_opencl) return;
1023
1024 cl_platform_id *all_platforms = NULL;
1025 cl_uint *all_num_devices = NULL;
1026
1027 char *platform_name = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
1028 char *platform_vendor = calloc(DT_OPENCL_CBUFFSIZE, sizeof(char));
1029
1030 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] opencl related configuration options:\n");
1031 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] opencl: %s\n", dt_conf_get_bool("opencl") ? "ON" : "OFF" );
1032 // look for explicit definition of opencl_runtime library in preferences
1033 const char *library = dt_conf_get_string_const("opencl_library");
1034 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] opencl_library: '%s'\n", (strlen(library) == 0) ? "default path" : library);
1035 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] opencl_mandatory_timeout: %d\n",
1036 dt_conf_get_int("opencl_mandatory_timeout"));
1037
1038 // dynamically load opencl runtime
1039 if((cl->dlocl = dt_dlopencl_init(library)) == NULL)
1040 {
1042 "[opencl_init] no working opencl library found. Continue with opencl disabled\n");
1043 goto finally;
1044 }
1045 else
1046 {
1047 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] opencl library '%s' found on your system and loaded\n",
1048 cl->dlocl->library);
1049 }
1050
1051 cl_int err;
1052 all_platforms = malloc(sizeof(cl_platform_id) * DT_OPENCL_MAX_PLATFORMS);
1053 all_num_devices = malloc(sizeof(cl_uint) * DT_OPENCL_MAX_PLATFORMS);
1054 cl_uint num_platforms = DT_OPENCL_MAX_PLATFORMS;
1055 err = (cl->dlocl->symbols->dt_clGetPlatformIDs)(DT_OPENCL_MAX_PLATFORMS, all_platforms, &num_platforms);
1056 if(err != CL_SUCCESS)
1057 {
1058 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] could not get platforms: %i\n", err);
1059 goto finally;
1060 }
1061
1062 if(num_platforms == 0)
1063 {
1064 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] no opencl platform available\n");
1065 goto finally;
1066 }
1067 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] found %d platform%s\n", num_platforms,
1068 num_platforms > 1 ? "s" : "");
1069
1070 for(int n = 0; n < num_platforms; n++)
1071 {
1072 cl_platform_id platform = all_platforms[n];
1073 // get the number of GPU devices available to the platforms
1074 // the other common option is CL_DEVICE_TYPE_GPU/CPU (but the latter doesn't work with the nvidia drivers)
1075 err = (cl->dlocl->symbols->dt_clGetDeviceIDs)(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &(all_num_devices[n]));
1076 if(err != CL_SUCCESS)
1077 {
1078 cl_int errv = (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform, CL_PLATFORM_VENDOR, DT_OPENCL_CBUFFSIZE, platform_vendor, NULL);
1079 cl_int errn = (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform, CL_PLATFORM_NAME, DT_OPENCL_CBUFFSIZE, platform_name, NULL);
1080 if((errn == CL_SUCCESS) && (errv == CL_SUCCESS))
1081 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] no devices found for %s (vendor) - %s (name)\n", platform_vendor, platform_name);
1082 else
1083 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] no devices found for unknown platform\n");
1084
1085 all_num_devices[n] = 0;
1086 }
1087 else
1088 {
1089 char profile[64] = { 0 };
1090 size_t profile_size;
1091 err = (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform, CL_PLATFORM_PROFILE, 64, profile, &profile_size);
1092 if(err != CL_SUCCESS)
1093 {
1094 all_num_devices[n] = 0;
1095 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] could not get profile: %i\n", err);
1096 }
1097 else
1098 {
1099 // fprintf(stderr, "%s\n", profile);
1100 if(strcmp("FULL_PROFILE", profile) != 0)
1101 {
1102 all_num_devices[n] = 0;
1103 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] platform %i is not FULL_PROFILE\n", n);
1104 }
1105 }
1106 }
1107 }
1108
1109 cl_uint num_devices = 0;
1110 for(int n = 0; n < num_platforms; n++) num_devices += all_num_devices[n];
1111
1112 // create the device list
1113 cl_device_id *devices = 0;
1114 if(num_devices)
1115 {
1116 cl->dev = (dt_opencl_device_t *)malloc(sizeof(dt_opencl_device_t) * num_devices);
1117 devices = (cl_device_id *)malloc(sizeof(cl_device_id) * num_devices);
1118 if(IS_NULL_PTR(cl->dev) || IS_NULL_PTR(devices))
1119 {
1120 dt_free(cl->dev);
1121 dt_free(devices);
1122 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] could not allocate memory\n");
1123 goto finally;
1124 }
1125 }
1126
1127 cl_device_id *devs = devices;
1128 for(int n = 0; n < num_platforms; n++)
1129 {
1130 if(all_num_devices[n])
1131 {
1132 cl_platform_id platform = all_platforms[n];
1133 err = (cl->dlocl->symbols->dt_clGetDeviceIDs)(platform, CL_DEVICE_TYPE_ALL, all_num_devices[n], devs,
1134 NULL);
1135 if(err != CL_SUCCESS)
1136 {
1137 num_devices -= all_num_devices[n];
1138 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] could not get devices list: %i\n", err);
1139 }
1140 devs += all_num_devices[n];
1141 }
1142 }
1143 devs = NULL;
1144
1145 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] found %d device%s\n", num_devices, num_devices > 1 ? "s" : "");
1146 if(num_devices == 0)
1147 {
1148 if(devices)
1149 {
1150 dt_free(devices);
1151 }
1152 goto finally;
1153 }
1154
1155 int dev = 0;
1156 for(int k = 0; k < num_devices; k++)
1157 {
1158 const int res = dt_opencl_device_init(cl, dev, devices, k);
1159 if(res != 0)
1160 continue;
1161 // increase dev only if dt_opencl_device_init was successful (res == 0)
1162 ++dev;
1163 }
1164 dt_free(devices);
1165
1166 if(dev > 0)
1167 {
1168 cl->num_devs = dev;
1169 cl->inited = 1;
1170 cl->enabled = dt_conf_get_bool("opencl");
1171 memset(cl->mandatory, 0, sizeof(cl->mandatory));
1172 cl->dev_priority_image = (int *)malloc(sizeof(int) * (dev + 1));
1173 cl->dev_priority_preview = (int *)malloc(sizeof(int) * (dev + 1));
1174 cl->dev_priority_export = (int *)malloc(sizeof(int) * (dev + 1));
1175 cl->dev_priority_thumbnail = (int *)malloc(sizeof(int) * (dev + 1));
1176
1177 // only check successful malloc in debug mode; darktable will crash anyhow sooner or later if mallocs that
1178 // small would fail
1181
1182 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] OpenCL successfully initialized.\n");
1183 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] here are the internal numbers and names of OpenCL devices available to Ansel:\n");
1184 for(int i = 0; i < dev; i++) dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init]\t\t%d\t'%s'\n", i, cl->dev[i].name);
1185 }
1186 else
1187 {
1188 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] no suitable devices found.\n");
1189 }
1190
1191finally:
1192 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] FINALLY: opencl is %sAVAILABLE on this system.\n",
1193 cl->inited ? "" : "NOT ");
1194 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_init] initial status of opencl enabled flag is %s.\n",
1195 cl->enabled ? "ON" : "OFF");
1196
1197 char checksum[64];
1198 snprintf(checksum, sizeof(checksum), "%u", cl->crc);
1199
1200 if(cl->inited)
1201 {
1202 dt_capabilities_add("opencl");
1208 cl->dwt = dt_dwt_init_cl_global();
1212 }
1213
1215
1216 if(!cl->inited)// initialization failed
1217 {
1218 for(int i = 0; cl->dev && i < cl->num_devs; i++) dt_opencl_cleanup_device(cl, i);
1219 }
1220
1221 dt_free(all_num_devices);
1222 dt_free(all_platforms);
1223 dt_free(platform_name);
1224 dt_free(platform_vendor);
1225
1226 if(locale)
1227 {
1228 setlocale(LC_ALL, locale);
1229 dt_free(locale);
1230 }
1231
1232 return;
1233}
1234
1236{
1238 for(int k = 0; k < DT_OPENCL_MAX_KERNELS; k++)
1239 if(cl->dev[i].kernel_used[k]) (cl->dlocl->symbols->dt_clReleaseKernel)(cl->dev[i].kernel[k]);
1240 for(int k = 0; k < DT_OPENCL_MAX_PROGRAMS; k++)
1241 if(cl->dev[i].program_used[k]) (cl->dlocl->symbols->dt_clReleaseProgram)(cl->dev[i].program[k]);
1242 if(!IS_NULL_PTR(cl->dev[i].cmd_queue))
1244 if(!IS_NULL_PTR(cl->dev[i].context))
1246
1248 {
1249 dt_print_nts(DT_DEBUG_OPENCL, " [opencl_summary_statistics] device '%s' (%d): peak memory usage %" G_GSIZE_FORMAT " bytes (%.1f MB)\n",
1250 cl->dev[i].name, i, cl->dev[i].peak_memory, (float)cl->dev[i].peak_memory/(1024*1024));
1251 }
1252
1253 if(cl->print_statistics && cl->dev[i].use_events)
1254 {
1255 if(cl->dev[i].totalevents)
1256 {
1257 dt_print_nts(DT_DEBUG_OPENCL, " [opencl_summary_statistics] device '%s' (%d): %d out of %d events were "
1258 "successful and %d events lost. max event=%d%s\n",
1259 cl->dev[i].name, i, cl->dev[i].totalsuccess, cl->dev[i].totalevents, cl->dev[i].totallost,
1260 cl->dev[i].maxeventslot, (cl->dev[i].maxeventslot > 1024) ? "\n *** Warning, slots > 1024" : "");
1261 }
1262 else
1263 {
1264 dt_print_nts(DT_DEBUG_OPENCL, " [opencl_summary_statistics] device '%s' (%d): NOT utilized\n",
1265 cl->dev[i].name, i);
1266 }
1267 }
1268
1269 if(cl->dev[i].use_events)
1270 {
1272
1273 dt_free(cl->dev[i].eventlist);
1274 dt_free(cl->dev[i].eventtags);
1275 }
1276
1277 dt_free(cl->dev[i].vendor);
1278 dt_free(cl->dev[i].name);
1279 dt_free(cl->dev[i].cname);
1280 dt_free(cl->dev[i].options);
1281 dt_free(cl->dev[i].options_md5);
1282}
1283
1285{
1286 if(cl->inited)
1287 {
1297
1298 for(int i = 0; i < cl->num_devs; i++)
1300
1305 }
1306
1307 if(cl->dlocl)
1308 {
1309 dt_free(cl->dlocl->symbols);
1310 dt_free(cl->dlocl->library);
1311 dt_free(cl->dlocl);
1312 }
1313
1314 for(int i = 0; i < cl->num_detected_devs; i++)
1315 {
1318 }
1320
1321 dt_free(cl->dev);
1323}
1324
1325static const char *dt_opencl_get_vendor_by_id(unsigned int id)
1326{
1327 const char *vendor;
1328
1329 switch(id)
1330 {
1332 vendor = "AMD";
1333 break;
1335 vendor = "NVIDIA";
1336 break;
1338 vendor = "INTEL";
1339 break;
1340 default:
1341 vendor = "UNKNOWN";
1342 }
1343
1344 return vendor;
1345}
1346
1347gboolean dt_opencl_finish(const int devid)
1348{
1350 if(!cl->inited || devid < 0) return FALSE;
1351
1352 cl_int err = (cl->dlocl->symbols->dt_clFinish)(cl->dev[devid].cmd_queue);
1353
1354 // take the opportunity to release some event handles, but without printing
1355 // summary statistics
1356 cl_int success = dt_opencl_events_flush(devid, 0);
1357
1358 return (err == CL_SUCCESS && success == CL_COMPLETE);
1359}
1360
1361int dt_opencl_enqueue_barrier(const int devid)
1362{
1364 if(!cl->inited || devid < 0) return -1;
1365 return (cl->dlocl->symbols->dt_clEnqueueBarrier)(cl->dev[devid].cmd_queue);
1366}
1367
1368static int _take_from_list(int *list, int value)
1369{
1370 int result = -1;
1371
1372 while(*list != -1 && *list != value) list++;
1373 result = *list;
1374
1375 while(*list != -1)
1376 {
1377 *list = *(list + 1);
1378 list++;
1379 }
1380
1381 return result;
1382}
1383
1384
1385static int _device_by_cname(const char *name)
1386{
1388 int devs = cl->num_devs;
1389 char tmp[2048] = { 0 };
1390 int result = -1;
1391
1392 _ascii_str_canonical(name, tmp, sizeof(tmp));
1393
1394 for(int i = 0; i < devs; i++)
1395 {
1396 if(!strcmp(tmp, cl->dev[i].cname))
1397 {
1398 result = i;
1399 break;
1400 }
1401 }
1402
1403 return result;
1404}
1405
1406
1407static char *_ascii_str_canonical(const char *in, char *out, int maxlen)
1408{
1409 if(IS_NULL_PTR(out))
1410 {
1411 maxlen = strlen(in) + 1;
1412 out = malloc(maxlen);
1413 if(IS_NULL_PTR(out)) return NULL;
1414 }
1415
1416 int len = 0;
1417
1418 while(*in != '\0' && len < maxlen - 1)
1419 {
1420 int n = strcspn(in, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1421 in += n;
1422 if(n != 0) continue;
1423 out[len] = tolower(*in);
1424 len++;
1425 in++;
1426 }
1427 out[len] = '\0';
1428
1429 return out;
1430}
1431
1432// parse a single token of priority string and store priorities in priority_list
1433static void dt_opencl_priority_parse(dt_opencl_t *cl, char *configstr, int *priority_list, int *mandatory)
1434{
1435 int devs = cl->num_devs;
1436 int count = 0;
1437 int *full = malloc(sizeof(int) * (devs + 1));
1438 int mnd = 0;
1439
1440 // NULL or empty configstring?
1441 if(IS_NULL_PTR(configstr) || *configstr == '\0')
1442 {
1443 priority_list[0] = -1;
1444 *mandatory = 0;
1445 dt_free(full);
1446 return;
1447 }
1448
1449 // check if user wants us to force-use opencl device(s)
1450 if(configstr[0] == '+')
1451 {
1452 mnd = 1;
1453 configstr++;
1454 }
1455
1456 // first start with a full list of devices to take from
1457 for(int i = 0; i < devs; i++) full[i] = i;
1458 full[devs] = -1;
1459
1460 gchar **tokens = g_strsplit(configstr, ",", 0);
1461 gchar **tokens_ptr = tokens;
1462
1463 while(!IS_NULL_PTR(tokens) && !IS_NULL_PTR(*tokens_ptr) && count < devs + 1 && full[0] != -1)
1464 {
1465 gchar *str = *tokens_ptr;
1466 int not = 0;
1467 int all = 0;
1468
1469 switch(*str)
1470 {
1471 case '*':
1472 all = 1;
1473 break;
1474 case '!':
1475 not = 1;
1476 while(*str == '!') str++;
1477 break;
1478 }
1479
1480 if(all)
1481 {
1482 // copy all remaining device numbers from full to priority list
1483 for(int i = 0; i < devs && full[i] != -1; i++)
1484 {
1485 priority_list[count] = full[i];
1486 count++;
1487 }
1488 full[0] = -1; // mark full list as empty
1489 }
1490 else if(*str != '\0')
1491 {
1492 char *endptr = NULL;
1493
1494 // first check if str corresponds to an existing canonical device name
1495 long number = _device_by_cname(str);
1496
1497 // if not try to convert string into decimal device number
1498 if(number < 0) number = strtol(str, &endptr, 10);
1499
1500 // still not found or negative number given? set number to -1
1501 if(number < 0 || (number == 0 && endptr == str)) number = -1;
1502
1503 // try to take number out of remaining device list
1504 int dev_number = _take_from_list(full, number);
1505
1506 if(!not&&dev_number != -1)
1507 {
1508 priority_list[count] = dev_number;
1509 count++;
1510 }
1511 }
1512
1513 tokens_ptr++;
1514 }
1515
1516 g_strfreev(tokens);
1517
1518 // terminate priority list with -1
1519 while(count < devs + 1) priority_list[count++] = -1;
1520
1521 // opencl use can only be mandatory if at least one opencl device is given
1522 *mandatory = (priority_list[0] != -1) ? mnd : 0;
1523
1524 dt_free(full);
1525}
1526
1527// set device priorities according to config string
1529{
1531 if(!cl->inited) return;
1532
1533 // Priority parsing iterates over the list of available devices.
1534 // If !cl->inited, that means we have no available device, so empty list.
1535 // Exit early of face a segfault
1536 char *darkroom = dt_conf_get_string("opencl_devid_darkroom");
1537 char *preview = dt_conf_get_string("opencl_devid_preview");
1538 char *export = dt_conf_get_string("opencl_devid_export");
1539 char *thumbnail = dt_conf_get_string("opencl_devid_thumbnail");
1540
1541 dt_opencl_priority_parse(cl, darkroom, cl->dev_priority_image, &cl->mandatory[0]);
1542 dt_opencl_priority_parse(cl, preview, cl->dev_priority_preview, &cl->mandatory[1]);
1543 dt_opencl_priority_parse(cl, export, cl->dev_priority_export, &cl->mandatory[2]);
1544 dt_opencl_priority_parse(cl, thumbnail, cl->dev_priority_thumbnail, &cl->mandatory[3]);
1545
1546 dt_free(darkroom);
1547 dt_free(preview);
1548 dt_free(export);
1549 dt_free(thumbnail);
1550
1551 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities] these are your device priorities:\n");
1552 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities] \tid |\t\tIMAGE\tPREVIEW\tEXPORT\tTHUMBS\n");
1553 for(int i = 0; i < cl->num_devs; i++)
1554 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities]\t%i |\t\t%d\t%d\t%d\t%d\n",
1555 i, cl->dev_priority_image[i],
1557 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities] show if opencl use is mandatory for a given pixelpipe:\n");
1558 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities] \t\tIMAGE\tPREVIEW\tEXPORT\tTHUMBS\n");
1559 dt_print_nts(DT_DEBUG_OPENCL, "[dt_opencl_update_priorities]\t\t%s\t%s\t%s\t%s\n", cl->mandatory[0] ? "yes" : "no",
1560 cl->mandatory[1] ? "yes" : "no", cl->mandatory[2] ? "yes" : "no", cl->mandatory[3] ? "yes" : "no");
1561}
1562
1563int dt_opencl_lock_device(const int pipetype)
1564{
1566 if(!cl->inited) return -1;
1567
1568
1570
1571 size_t prio_size = sizeof(int) * (cl->num_devs + 1);
1572 int *priority = (int *)malloc(prio_size);
1573 int mandatory;
1574
1575 switch(pipetype)
1576 {
1578 memcpy(priority, cl->dev_priority_image, prio_size);
1579 mandatory = cl->mandatory[0];
1580 break;
1582 memcpy(priority, cl->dev_priority_preview, prio_size);
1583 mandatory = cl->mandatory[1];
1584 break;
1586 memcpy(priority, cl->dev_priority_export, prio_size);
1587 mandatory = cl->mandatory[2];
1588 break;
1590 memcpy(priority, cl->dev_priority_thumbnail, prio_size);
1591 mandatory = cl->mandatory[3];
1592 break;
1593 default:
1594 dt_free(priority);
1595 mandatory = 0;
1596 }
1597
1599
1600 if(priority)
1601 {
1602 const int usec = 5000;
1603 const int nloop = MAX(0, dt_conf_get_int("opencl_mandatory_timeout"));
1604
1605 // check for free opencl device repeatedly if mandatory is TRUE, else give up after first try
1606 for(int n = 0; n < nloop; n++)
1607 {
1608 const int *prio = priority;
1609
1610 while(*prio != -1)
1611 {
1612 if(!dt_pthread_mutex_BAD_trylock(&cl->dev[*prio].lock))
1613 {
1614 int devid = *prio;
1615 dt_free(priority);
1616 return devid;
1617 }
1618 prio++;
1619 }
1620
1621 if(!mandatory)
1622 {
1623 dt_free(priority);
1624 return -1;
1625 }
1626
1627 dt_iop_nap(usec);
1628 }
1629 dt_print(DT_DEBUG_OPENCL, "[opencl_lock_device] reached opencl_mandatory_timeout trying to lock mandatory device, fallback to CPU\n");
1630 }
1631 else
1632 {
1633 // only a fallback if a new pipe type would be added and we forget to take care of it in opencl.c
1634 for(int try_dev = 0; try_dev < cl->num_devs; try_dev++)
1635 {
1636 // get first currently unused processor
1637 if(!dt_pthread_mutex_BAD_trylock(&cl->dev[try_dev].lock)) return try_dev;
1638 }
1639 }
1640
1641 dt_free(priority);
1642
1643 // no free GPU :(
1644 // use CPU processing, if no free device:
1645 return -1;
1646}
1647
1648void dt_opencl_unlock_device(const int dev)
1649{
1651 if(!cl->inited) return;
1652 if(dev < 0 || dev >= cl->num_devs) return;
1654}
1655
1656static FILE *fopen_stat(const char *filename, struct stat *st)
1657{
1658 FILE *f = g_fopen(filename, "rb");
1659 if(IS_NULL_PTR(f))
1660 {
1661 dt_print(DT_DEBUG_OPENCL, "[opencl_fopen_stat] could not open file `%s'!\n", filename);
1662 return NULL;
1663 }
1664 int fd = fileno(f);
1665 if(fstat(fd, st) < 0)
1666 {
1667 dt_print(DT_DEBUG_OPENCL, "[opencl_fopen_stat] could not stat file `%s'!\n", filename);
1668 return NULL;
1669 }
1670 return f;
1671}
1672
1673
1674void dt_opencl_md5sum(const char **files, char **md5sums)
1675{
1676 char kerneldir[PATH_MAX] = { 0 };
1677 char filename[PATH_MAX] = { 0 };
1678 dt_loc_get_kerneldir(kerneldir, sizeof(kerneldir));
1679
1680 for(int n = 0; n < DT_OPENCL_MAX_INCLUDES; n++, files++, md5sums++)
1681 {
1682 if(!*files)
1683 {
1684 *md5sums = NULL;
1685 continue;
1686 }
1687
1688 dt_concat_path_file(filename, kerneldir, *files);
1689
1690 struct stat filestat;
1691 FILE *f = fopen_stat(filename, &filestat);
1692
1693 if(IS_NULL_PTR(f))
1694 {
1695 dt_print(DT_DEBUG_OPENCL, "[opencl_md5sums] could not open file `%s'!\n", filename);
1696 *md5sums = NULL;
1697 continue;
1698 }
1699
1700 size_t filesize = filestat.st_size;
1701 char *file = (char *)malloc(filesize);
1702
1703 if(IS_NULL_PTR(file))
1704 {
1705 dt_print(DT_DEBUG_OPENCL, "[opencl_md5sums] could not allocate buffer for file `%s'!\n", filename);
1706 *md5sums = NULL;
1707 fclose(f);
1708 continue;
1709 }
1710
1711 size_t rd = fread(file, sizeof(char), filesize, f);
1712 fclose(f);
1713
1714 if(rd != filesize)
1715 {
1716 dt_free(file);
1717 dt_print(DT_DEBUG_OPENCL, "[opencl_md5sums] could not read all of file `%s'!\n", filename);
1718 *md5sums = NULL;
1719 continue;
1720 }
1721
1722 *md5sums = g_compute_checksum_for_data(G_CHECKSUM_MD5, (guchar *)file, filesize);
1723
1724 dt_free(file);
1725 }
1726}
1727
1728int dt_opencl_load_program(const int dev, const int prog, const char *filename, const char *binname,
1729 const char *cachedir, char *md5sum, char **includemd5, int *loaded_cached)
1730{
1731 cl_int err;
1733
1734 struct stat filestat, cachedstat;
1735 *loaded_cached = 0;
1736
1737 if(prog < 0 || prog >= DT_OPENCL_MAX_PROGRAMS)
1738 {
1739 dt_print(DT_DEBUG_OPENCL, "[opencl_load_source] invalid program number `%d' of file `%s'!\n", prog,
1740 filename);
1741 return 0;
1742 }
1743
1744 if(cl->dev[dev].program_used[prog])
1745 {
1747 "[opencl_load_source] program number `%d' already in use when loading file `%s'!\n", prog,
1748 filename);
1749 return 0;
1750 }
1751
1752 FILE *f = fopen_stat(filename, &filestat);
1753 if(IS_NULL_PTR(f)) return 0;
1754
1755 size_t filesize = filestat.st_size;
1756 char *file = (char *)malloc(filesize + 2048);
1757 size_t rd = fread(file, sizeof(char), filesize, f);
1758 fclose(f);
1759 if(rd != filesize)
1760 {
1761 dt_free(file);
1762 dt_print(DT_DEBUG_OPENCL, "[opencl_load_source] could not read all of file `%s'!\n", filename);
1763 return 0;
1764 }
1765
1766 char *start = file + filesize;
1767 char *end = start + 2048;
1768 size_t len;
1769
1770 cl_device_id devid = cl->dev[dev].devid;
1771 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DRIVER_VERSION, end - start, start, &len);
1772 start += len;
1773
1774 cl_platform_id platform;
1775 (cl->dlocl->symbols->dt_clGetDeviceInfo)(devid, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
1776
1777 (cl->dlocl->symbols->dt_clGetPlatformInfo)(platform, CL_PLATFORM_VERSION, end - start, start, &len);
1778 start += len;
1779
1780 const char *options_md5 = cl->dev[dev].options_md5 ? cl->dev[dev].options_md5 : cl->dev[dev].options;
1781 len = g_strlcpy(start, options_md5, end - start);
1782 start += len;
1783
1784 /* make sure that the md5sums of all the includes are applied as well */
1785 for(int n = 0; n < DT_OPENCL_MAX_INCLUDES; n++)
1786 {
1787 if(!includemd5[n]) continue;
1788 len = g_strlcpy(start, includemd5[n], end - start);
1789 start += len;
1790 }
1791
1792 char *source_md5 = g_compute_checksum_for_data(G_CHECKSUM_MD5, (guchar *)file, start - file);
1793 g_strlcpy(md5sum, source_md5, 33);
1794 dt_free(source_md5);
1795
1796 file[filesize] = '\0';
1797
1798 char linkedfile[PATH_MAX] = { 0 };
1799 ssize_t linkedfile_len = 0;
1800
1801#if defined(_WIN32)
1802 // No symlinks on Windows
1803 // Have to figure out the name using the filename + md5sum
1804 char dup[PATH_MAX] = { 0 };
1805 snprintf(dup, sizeof(dup), "%s.%s", binname, md5sum);
1806 FILE *cached = fopen_stat(dup, &cachedstat);
1807 g_strlcpy(linkedfile, md5sum, sizeof(linkedfile));
1808 linkedfile_len = strlen(md5sum);
1809#else
1810 FILE *cached = fopen_stat(binname, &cachedstat);
1811#endif
1812
1813 if(cached)
1814 {
1815#if !defined(_WIN32)
1816 linkedfile_len = readlink(binname, linkedfile, sizeof(linkedfile) - 1);
1817#endif // !defined(_WIN32)
1818 if(linkedfile_len > 0)
1819 {
1820 linkedfile[linkedfile_len] = '\0';
1821
1822 if(strncmp(linkedfile, md5sum, 33) == 0)
1823 {
1824 // md5sum matches, load cached binary
1825 size_t cached_filesize = cachedstat.st_size;
1826
1827 unsigned char *cached_content = (unsigned char *)malloc(cached_filesize + 1);
1828 rd = fread(cached_content, sizeof(char), cached_filesize, cached);
1829 if(rd != cached_filesize)
1830 {
1831 dt_print(DT_DEBUG_OPENCL, "[opencl_load_program] could not read all of file '%s' MD5: %s!\n", binname, md5sum);
1832 }
1833 else
1834 {
1835 cl->dev[dev].program[prog] = (cl->dlocl->symbols->dt_clCreateProgramWithBinary)(
1836 cl->dev[dev].context, 1, &(cl->dev[dev].devid), &cached_filesize,
1837 (const unsigned char **)&cached_content, NULL, &err);
1838 if(err != CL_SUCCESS)
1839 {
1841 "[opencl_load_program] could not load cached binary program from file '%s' MD5: '%s'! (%i)\n",
1842 binname, md5sum, err);
1843 }
1844 else
1845 {
1846 cl->dev[dev].program_used[prog] = 1;
1847 *loaded_cached = 1;
1848 }
1849 }
1850 dt_free(cached_content);
1851 }
1852 }
1853 fclose(cached);
1854 }
1855
1856
1857 if(*loaded_cached == 0)
1858 {
1859 // if loading cached was unsuccessful for whatever reason,
1860 // try to remove cached binary & link
1861#if !defined(_WIN32)
1862 if(linkedfile_len > 0)
1863 {
1864 char link_dest[PATH_MAX] = { 0 };
1865 dt_concat_path_file(link_dest, cachedir, linkedfile);
1866 g_unlink(link_dest);
1867 }
1868 g_unlink(binname);
1869#else
1870 // delete the file which contains the MD5 name
1871 g_unlink(dup);
1872#endif
1873
1875 "[opencl_load_program] could not load cached binary program, trying to compile source\n");
1876
1877 cl->dev[dev].program[prog] = (cl->dlocl->symbols->dt_clCreateProgramWithSource)(
1878 cl->dev[dev].context, 1, (const char **)&file, &filesize, &err);
1879 dt_free(file);
1880 if((err != CL_SUCCESS) || (cl->dev[dev].program[prog] == NULL))
1881 {
1882 dt_print(DT_DEBUG_OPENCL, "[opencl_load_source] could not create program from file `%s'! (%i)\n",
1883 filename, err);
1884 return 0;
1885 }
1886 else
1887 {
1888 cl->dev[dev].program_used[prog] = 1;
1889 }
1890 }
1891 else
1892 {
1893 dt_free(file);
1894 dt_vprint(DT_DEBUG_OPENCL, "[opencl_load_program] loaded cached binary program from file '%s' MD5: '%s' \n", binname, md5sum);
1895 }
1896
1897 dt_vprint(DT_DEBUG_OPENCL, "[opencl_load_program] successfully loaded program from '%s' MD5: '%s'\n", filename, md5sum);
1898
1899 return 1;
1900}
1901
1902int dt_opencl_build_program(const int dev, const int prog, const char *binname, const char *cachedir,
1903 char *md5sum, int loaded_cached)
1904{
1905 if(prog < 0 || prog >= DT_OPENCL_MAX_PROGRAMS) return -1;
1907 cl_program program = cl->dev[dev].program[prog];
1908 cl_int err = (cl->dlocl->symbols->dt_clBuildProgram)(program, 1, &(cl->dev[dev].devid), cl->dev[dev].options, 0, 0);
1909
1910 if(err != CL_SUCCESS)
1911 dt_print(DT_DEBUG_OPENCL, "[opencl_build_program] could not build program: %i\n", err);
1912 else
1913 dt_vprint(DT_DEBUG_OPENCL, "[opencl_build_program] successfully built program\n");
1914
1915 cl_build_status build_status;
1916 (cl->dlocl->symbols->dt_clGetProgramBuildInfo)(program, cl->dev[dev].devid, CL_PROGRAM_BUILD_STATUS,
1917 sizeof(cl_build_status), &build_status, NULL);
1918 dt_vprint(DT_DEBUG_OPENCL, "[opencl_build_program] BUILD STATUS: %d\n", build_status);
1919
1920 char *build_log;
1921 size_t ret_val_size;
1922 (cl->dlocl->symbols->dt_clGetProgramBuildInfo)(program, cl->dev[dev].devid, CL_PROGRAM_BUILD_LOG, 0, NULL,
1923 &ret_val_size);
1924 if(ret_val_size != SIZE_MAX)
1925 {
1926 build_log = (char *)malloc(sizeof(char) * (ret_val_size + 1));
1927 if(build_log)
1928 {
1929 (cl->dlocl->symbols->dt_clGetProgramBuildInfo)(program, cl->dev[dev].devid, CL_PROGRAM_BUILD_LOG,
1930 ret_val_size, build_log, NULL);
1931
1932 build_log[ret_val_size] = '\0';
1933
1934 dt_vprint(DT_DEBUG_OPENCL, "BUILD LOG:\n");
1935 dt_vprint(DT_DEBUG_OPENCL, "%s\n", build_log);
1936
1937 dt_free(build_log);
1938 }
1939 }
1940
1941 if(err != CL_SUCCESS)
1942 return err;
1943 else
1944 {
1945 if(!loaded_cached)
1946 {
1947 dt_vprint(DT_DEBUG_OPENCL, "[opencl_build_program] saving binary\n");
1948
1949 cl_uint numdev = 0;
1950 err = (cl->dlocl->symbols->dt_clGetProgramInfo)(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
1951 &numdev, NULL);
1952 if(err != CL_SUCCESS)
1953 {
1954 dt_print(DT_DEBUG_OPENCL, "[opencl_build_program] CL_PROGRAM_NUM_DEVICES failed: %i\n", err);
1955 return CL_SUCCESS;
1956 }
1957
1958 cl_device_id *devices = malloc(sizeof(cl_device_id) * numdev);
1959 err = (cl->dlocl->symbols->dt_clGetProgramInfo)(program, CL_PROGRAM_DEVICES,
1960 sizeof(cl_device_id) * numdev, devices, NULL);
1961 if(err != CL_SUCCESS)
1962 {
1963 dt_print(DT_DEBUG_OPENCL, "[opencl_build_program] CL_PROGRAM_DEVICES failed: %i\n", err);
1964 dt_free(devices);
1965 return CL_SUCCESS;
1966 }
1967
1968 size_t *binary_sizes = malloc(sizeof(size_t) * numdev);
1969 err = (cl->dlocl->symbols->dt_clGetProgramInfo)(program, CL_PROGRAM_BINARY_SIZES,
1970 sizeof(size_t) * numdev, binary_sizes, NULL);
1971 if(err != CL_SUCCESS)
1972 {
1973 dt_print(DT_DEBUG_OPENCL, "[opencl_build_program] CL_PROGRAM_BINARY_SIZES failed: %i\n", err);
1974 dt_free(binary_sizes);
1975 dt_free(devices);
1976 return CL_SUCCESS;
1977 }
1978
1979 unsigned char **binaries = malloc(sizeof(unsigned char *) * numdev);
1980 for(int i = 0; i < numdev; i++) binaries[i] = (unsigned char *)malloc(binary_sizes[i]);
1981 err = (cl->dlocl->symbols->dt_clGetProgramInfo)(program, CL_PROGRAM_BINARIES,
1982 sizeof(unsigned char *) * numdev, binaries, NULL);
1983 if(err != CL_SUCCESS)
1984 {
1985 dt_print(DT_DEBUG_OPENCL, "[opencl_build_program] CL_PROGRAM_BINARIES failed: %i\n", err);
1986 goto ret;
1987 }
1988
1989 for(int i = 0; i < numdev; i++)
1990 if(cl->dev[dev].devid == devices[i])
1991 {
1992 // save opencl compiled binary as md5sum-named file
1993 char link_dest[PATH_MAX] = { 0 };
1994 snprintf(link_dest, sizeof(link_dest), "%s" G_DIR_SEPARATOR_S "%s", cachedir, md5sum);
1995 FILE *f = g_fopen(link_dest, "wb");
1996 if(IS_NULL_PTR(f)) goto ret;
1997 size_t bytes_written = fwrite(binaries[i], sizeof(char), binary_sizes[i], f);
1998 if(bytes_written != binary_sizes[i]) goto ret;
1999 fclose(f);
2000
2001 // create link (e.g. basic.cl.bin -> f1430102c53867c162bb60af6c163328)
2002 char cwd[PATH_MAX] = { 0 };
2003 if(!getcwd(cwd, sizeof(cwd))) goto ret;
2004 if(chdir(cachedir) != 0) goto ret;
2005 char dup[PATH_MAX] = { 0 };
2006 g_strlcpy(dup, binname, sizeof(dup));
2007 char *bname = basename(dup);
2008#if defined(_WIN32)
2009 //CreateSymbolicLink in Windows requires admin privileges, which we don't want/need
2010 //store has using a simple filerename
2011 char finalfilename[PATH_MAX] = { 0 };
2012 snprintf(finalfilename, sizeof(finalfilename), "%s" G_DIR_SEPARATOR_S "%s.%s", cachedir, bname, md5sum);
2013 rename(link_dest, finalfilename);
2014#else
2015 if(symlink(md5sum, bname) != 0) goto ret;
2016#endif
2017 if(chdir(cwd) != 0) goto ret;
2018 }
2019
2020 ret:
2021 for(int i = 0; i < numdev; i++) dt_free(binaries[i]);
2022 dt_free(binaries);
2023 dt_free(binary_sizes);
2024 dt_free(devices);
2025 }
2026 return CL_SUCCESS;
2027 }
2028}
2029
2030int dt_opencl_create_kernel(const int prog, const char *name)
2031{
2033 if(!cl->inited) return -1;
2034 if(prog < 0 || prog >= DT_OPENCL_MAX_PROGRAMS) return -1;
2036 int k = 0;
2037 for(int dev = 0; dev < cl->num_devs; dev++)
2038 {
2039 cl_int err;
2040 for(; k < DT_OPENCL_MAX_KERNELS; k++)
2041 if(!cl->dev[dev].kernel_used[k])
2042 {
2043 cl->dev[dev].kernel_used[k] = 1;
2044 cl->dev[dev].kernel[k]
2045 = (cl->dlocl->symbols->dt_clCreateKernel)(cl->dev[dev].program[prog], name, &err);
2046 if(err != CL_SUCCESS)
2047 {
2048 dt_print(DT_DEBUG_OPENCL, "[opencl_create_kernel] could not create kernel `%s'! (%i)\n", name, err);
2049 cl->dev[dev].kernel_used[k] = 0;
2050 goto error;
2051 }
2052 else
2053 break;
2054 }
2056 {
2057 dt_vprint(DT_DEBUG_OPENCL, "[opencl_create_kernel] successfully loaded kernel `%s' (%d) for device %d\n",
2058 name, k, dev);
2059 }
2060 else
2061 {
2062 dt_print(DT_DEBUG_OPENCL, "[opencl_create_kernel] too many kernels! can't create kernel `%s'\n", name);
2063 goto error;
2064 }
2065 }
2067 return k;
2068error:
2070 return -1;
2071}
2072
2074{
2076 if(!cl->inited) return;
2077 if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return;
2079 for(int dev = 0; dev < cl->num_devs; dev++)
2080 {
2081 cl->dev[dev].kernel_used[kernel] = 0;
2082 (cl->dlocl->symbols->dt_clReleaseKernel)(cl->dev[dev].kernel[kernel]);
2083 }
2085}
2086
2087int dt_opencl_get_max_work_item_sizes(const int dev, size_t *sizes)
2088{
2090 if(!cl->inited || dev < 0) return -1;
2091 return (cl->dlocl->symbols->dt_clGetDeviceInfo)(cl->dev[dev].devid, CL_DEVICE_MAX_WORK_ITEM_SIZES,
2092 sizeof(size_t) * 3, sizes, NULL);
2093}
2094
2095int dt_opencl_get_work_group_limits(const int dev, size_t *sizes, size_t *workgroupsize,
2096 unsigned long *localmemsize)
2097{
2099 if(!cl->inited || dev < 0) return -1;
2100 cl_ulong lmemsize;
2101 cl_int err = (cl->dlocl->symbols->dt_clGetDeviceInfo)(cl->dev[dev].devid, CL_DEVICE_LOCAL_MEM_SIZE,
2102 sizeof(cl_ulong), &lmemsize, NULL);
2103 if(err != CL_SUCCESS) return err;
2104
2105 *localmemsize = lmemsize;
2106
2107 err = (cl->dlocl->symbols->dt_clGetDeviceInfo)(cl->dev[dev].devid, CL_DEVICE_MAX_WORK_GROUP_SIZE,
2108 sizeof(size_t), workgroupsize, NULL);
2109 if(err != CL_SUCCESS) return err;
2110
2111 return dt_opencl_get_max_work_item_sizes(dev, sizes);
2112}
2113
2114
2115int dt_opencl_get_kernel_work_group_size(const int dev, const int kernel, size_t *kernelworkgroupsize)
2116{
2118 if(!cl->inited || dev < 0) return -1;
2119 if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1;
2120
2121 return (cl->dlocl->symbols->dt_clGetKernelWorkGroupInfo)(cl->dev[dev].kernel[kernel], cl->dev[dev].devid,
2122 CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
2123 kernelworkgroupsize, NULL);
2124}
2125
2126
2127int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size,
2128 const void *arg)
2129{
2131 if(!cl->inited || dev < 0) return -1;
2132 if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1;
2133 return (cl->dlocl->symbols->dt_clSetKernelArg)(cl->dev[dev].kernel[kernel], num, size, arg);
2134}
2135
2136int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
2137{
2138 return dt_opencl_enqueue_kernel_2d_with_local(dev, kernel, sizes, NULL);
2139}
2140
2141
2142int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes,
2143 const size_t *local)
2144{
2146 if(!cl->inited || dev < 0) return -1;
2147 if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1;
2148
2149 char buf[256];
2150 buf[0] = '\0';
2152 (cl->dlocl->symbols->dt_clGetKernelInfo)(cl->dev[dev].kernel[kernel], CL_KERNEL_FUNCTION_NAME, 256, buf, NULL);
2153 cl_event *eventp = dt_opencl_events_get_slot(dev, buf);
2154 cl_int err = (cl->dlocl->symbols->dt_clEnqueueNDRangeKernel)(cl->dev[dev].cmd_queue, cl->dev[dev].kernel[kernel],
2155 2, NULL, sizes, local, 0, NULL, eventp);
2156
2157 if(err != CL_SUCCESS)
2158 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_enqueue_kernel_2d_with_local] kernel %i (%s) on device %d: %i\n", kernel, buf, dev, err);
2159
2160 return err;
2161}
2162
2163int dt_opencl_copy_device_to_host(const int devid, void *host, void *device, const int width,
2164 const int height, const int bpp)
2165{
2166 return dt_opencl_read_host_from_device(devid, host, device, width, height, bpp);
2167}
2168
2169int dt_opencl_read_host_from_device(const int devid, void *host, void *device, const int width,
2170 const int height, const int bpp)
2171{
2172 return dt_opencl_read_host_from_device_rowpitch(devid, host, device, width, height, bpp * width);
2173}
2174
2175int dt_opencl_read_host_from_device_rowpitch(const int devid, void *host, void *device, const int width,
2176 const int height, const int rowpitch)
2177{
2178 if(!darktable.opencl->inited || devid < 0) return -1;
2179 const size_t origin[] = { 0, 0, 0 };
2180 const size_t region[] = { width, height, 1 };
2181 // blocking.
2182 return dt_opencl_read_host_from_device_raw(devid, host, device, origin, region, rowpitch, CL_TRUE);
2183}
2184
2185int dt_opencl_read_host_from_device_non_blocking(const int devid, void *host, void *device, const int width,
2186 const int height, const int bpp)
2187{
2189 bpp * width);
2190}
2191
2192int dt_opencl_read_host_from_device_rowpitch_non_blocking(const int devid, void *host, void *device,
2193 const int width, const int height,
2194 const int rowpitch)
2195{
2196 if(!darktable.opencl->inited || devid < 0) return -1;
2197 const size_t origin[] = { 0, 0, 0 };
2198 const size_t region[] = { width, height, 1 };
2199 // non-blocking.
2200 return dt_opencl_read_host_from_device_raw(devid, host, device, origin, region, rowpitch, CL_FALSE);
2201}
2202
2203
2204int dt_opencl_read_host_from_device_raw(const int devid, void *host, void *device, const size_t *origin,
2205 const size_t *region, const int rowpitch, const int blocking)
2206{
2207 if(!darktable.opencl->inited) return -1;
2208
2209 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Read Image (from device to host)]");
2210
2212 device, blocking ? CL_TRUE : CL_FALSE, origin, region, rowpitch,
2213 0, host, 0, NULL, eventp);
2214}
2215
2216int dt_opencl_write_host_to_device(const int devid, void *host, void *device, const int width,
2217 const int height, const int bpp)
2218{
2219 return dt_opencl_write_host_to_device_rowpitch(devid, host, device, width, height, width * bpp);
2220}
2221
2222int dt_opencl_write_host_to_device_rowpitch(const int devid, void *host, void *device, const int width,
2223 const int height, const int rowpitch)
2224{
2225 if(!darktable.opencl->inited || devid < 0) return -1;
2226 const size_t origin[] = { 0, 0, 0 };
2227 const size_t region[] = { width, height, 1 };
2228 // blocking.
2229 return dt_opencl_write_host_to_device_raw(devid, host, device, origin, region, rowpitch, CL_TRUE);
2230}
2231
2232int dt_opencl_write_host_to_device_non_blocking(const int devid, void *host, void *device, const int width,
2233 const int height, const int bpp)
2234{
2236}
2237
2238int dt_opencl_write_host_to_device_rowpitch_non_blocking(const int devid, void *host, void *device,
2239 const int width, const int height,
2240 const int rowpitch)
2241{
2242 if(!darktable.opencl->inited || devid < 0) return -1;
2243 const size_t origin[] = { 0, 0, 0 };
2244 const size_t region[] = { width, height, 1 };
2245 // non-blocking.
2246 return dt_opencl_write_host_to_device_raw(devid, host, device, origin, region, rowpitch, CL_FALSE);
2247}
2248
2249int dt_opencl_write_host_to_device_raw(const int devid, const void *host, void *device, const size_t *origin,
2250 const size_t *region, const int rowpitch, const int blocking)
2251{
2252 if(!darktable.opencl->inited) return -1;
2253
2254 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Write Image (from host to device)]");
2255
2257 device, blocking ? CL_TRUE : CL_FALSE, origin, region,
2258 rowpitch, 0, host, 0, NULL, eventp);
2259}
2260
2261int dt_opencl_enqueue_copy_image(const int devid, cl_mem src, cl_mem dst, size_t *orig_src, size_t *orig_dst,
2262 size_t *region)
2263{
2264 if(!darktable.opencl->inited || devid < 0) return -1;
2265 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Copy Image (on device)]");
2267 darktable.opencl->dev[devid].cmd_queue, src, dst, orig_src, orig_dst, region, 0, NULL, eventp);
2268 if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, "[opencl copy_image] could not copy image on device %d: %i\n", devid, err);
2269 return err;
2270}
2271
2272int dt_opencl_enqueue_copy_image_to_buffer(const int devid, cl_mem src_image, cl_mem dst_buffer,
2273 size_t *origin, size_t *region, size_t offset)
2274{
2275 if(!darktable.opencl->inited) return -1;
2276 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Copy Image to Buffer (on device)]");
2278 darktable.opencl->dev[devid].cmd_queue, src_image, dst_buffer, origin, region, offset, 0, NULL, eventp);
2279 if(err != CL_SUCCESS)
2280 dt_print(DT_DEBUG_OPENCL, "[opencl copy_image_to_buffer] could not copy image on device %d: %i\n", devid, err);
2281 return err;
2282}
2283
2284int dt_opencl_enqueue_copy_buffer_to_image(const int devid, cl_mem src_buffer, cl_mem dst_image,
2285 size_t offset, size_t *origin, size_t *region)
2286{
2287 if(!darktable.opencl->inited) return -1;
2288 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Copy Buffer to Image (on device)]");
2290 darktable.opencl->dev[devid].cmd_queue, src_buffer, dst_image, offset, origin, region, 0, NULL, eventp);
2291 if(err != CL_SUCCESS)
2292 dt_print(DT_DEBUG_OPENCL, "[opencl copy_buffer_to_image] could not copy buffer on device %d: %i\n", devid, err);
2293 return err;
2294}
2295
2296int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer,
2297 size_t srcoffset, size_t dstoffset, size_t size)
2298{
2299 if(!darktable.opencl->inited) return -1;
2300 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Copy Buffer to Buffer (on device)]");
2302 src_buffer, dst_buffer, srcoffset,
2303 dstoffset, size, 0, NULL, eventp);
2304 if(err != CL_SUCCESS)
2305 dt_print(DT_DEBUG_OPENCL, "[opencl copy_buffer_to_buffer] could not copy buffer on device %d: %i\n", devid, err);
2306 return err;
2307}
2308
2309int dt_opencl_read_buffer_from_device(const int devid, void *host, void *device, const size_t offset,
2310 const size_t size, const int blocking)
2311{
2312 if(!darktable.opencl->inited) return -1;
2313
2314 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Read Buffer (from device to host)]");
2315
2317 darktable.opencl->dev[devid].cmd_queue, device, blocking ? CL_TRUE : CL_FALSE, offset, size, host, 0, NULL, eventp);
2318}
2319
2320int dt_opencl_write_buffer_to_device(const int devid, void *host, void *device, const size_t offset,
2321 const size_t size, const int blocking)
2322{
2323 if(!darktable.opencl->inited) return -1;
2324
2325 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Write Buffer (from host to device)]");
2326
2328 darktable.opencl->dev[devid].cmd_queue, device, blocking ? CL_TRUE : CL_FALSE, offset, size, host, 0, NULL, eventp);
2329}
2330
2331
2332void *dt_opencl_copy_host_to_device_constant(const int devid, const size_t size, void *host)
2333{
2334 if(!darktable.opencl->inited || devid < 0) return NULL;
2335 cl_int err;
2337 darktable.opencl->dev[devid].context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, host, &err);
2338 if(err != CL_SUCCESS)
2340 "[opencl copy_host_to_device_constant] could not alloc buffer on device %d: %i\n", devid, err);
2341
2343
2344 return dev;
2345}
2346
2347void *dt_opencl_copy_host_to_device(const int devid, void *host, const int width, const int height,
2348 const int bpp)
2349{
2350 return dt_opencl_copy_host_to_device_rowpitch(devid, host, width, height, bpp, 0);
2351}
2352
2353void *dt_opencl_copy_host_to_device_rowpitch(const int devid, void *host, const int width, const int height,
2354 const int bpp, const int rowpitch)
2355{
2356 if(!darktable.opencl->inited || devid < 0) return NULL;
2357 cl_int err;
2358 cl_image_format fmt;
2359 // guess pixel format from bytes per pixel
2360 if(bpp == 4 * sizeof(float))
2361 fmt = (cl_image_format){ CL_RGBA, CL_FLOAT };
2362 else if(bpp == sizeof(float))
2363 fmt = (cl_image_format){ CL_R, CL_FLOAT };
2364 else if(bpp == sizeof(uint16_t))
2365 fmt = (cl_image_format){ CL_R, CL_UNSIGNED_INT16 };
2366 else
2367 return NULL;
2368
2369 // TODO: if fmt = uint16_t, blow up to 4xuint16_t and copy manually!
2371 darktable.opencl->dev[devid].context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &fmt, width, height,
2372 rowpitch, host, &err);
2373 if(err != CL_SUCCESS)
2375 "[opencl copy_host_to_device] could not alloc/copy img buffer on device %d: %i\n", devid, err);
2376
2378
2379 return dev;
2380}
2381
2382
2384{
2385 if(!darktable.opencl->inited) return;
2386
2387 // the OpenCL specs are not absolutely clear if clReleaseMemObject(NULL) is a no-op. we take care of the
2388 // case in a centralized way at this place
2389 if(IS_NULL_PTR(mem)) return;
2390
2392
2394}
2395
2396void *dt_opencl_map_buffer(const int devid, cl_mem buffer, const int blocking, const int flags, size_t offset,
2397 size_t size)
2398{
2399 if(!darktable.opencl->inited) return NULL;
2400 cl_int err;
2401 void *ptr;
2402 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Map Buffer]");
2404 darktable.opencl->dev[devid].cmd_queue, buffer, blocking ? CL_TRUE : CL_FALSE, flags, offset, size, 0, NULL, eventp, &err);
2405 if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, "[opencl map buffer] could not map buffer on device %d: %i\n", devid, err);
2406 return ptr;
2407}
2408
2409
2410void *dt_opencl_map_image(const int devid, cl_mem buffer, const int blocking, const int flags, size_t width, size_t height, int bpp)
2411{
2412 if(!darktable.opencl->inited) return NULL;
2413 cl_int err;
2414 void *ptr;
2415 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Map Image 2D]");
2416 size_t origin[3] = {0, 0, 0};
2417 size_t region[3] = {width, height, 1};
2418 size_t mapped_row_pitch;
2419
2421 darktable.opencl->dev[devid].cmd_queue, buffer, blocking ? CL_TRUE : CL_FALSE, flags, origin, region,
2422 &mapped_row_pitch, NULL, 0, NULL, eventp, &err);
2423
2424 if(err != CL_SUCCESS)
2425 dt_print(DT_DEBUG_OPENCL, "[opencl map buffer] could not map image on device %d: %i\n", devid, err);
2426 return ptr;
2427}
2428
2429
2430int dt_opencl_unmap_mem_object(const int devid, cl_mem mem_object, void *mapped_ptr)
2431{
2432 if(!darktable.opencl->inited) return -1;
2433 cl_event *eventp = dt_opencl_events_get_slot(devid, "[Unmap Mem Object]");
2435 darktable.opencl->dev[devid].cmd_queue, mem_object, mapped_ptr, 0, NULL, eventp);
2436 if(err != CL_SUCCESS)
2437 dt_print(DT_DEBUG_OPENCL, "[opencl unmap mem object] could not unmap mem object on device %d: %i\n", devid, err);
2438 return err;
2439}
2440
2441static inline void *_dt_opencl_alloc_image2d(const int devid, const int width, const int height,
2442 const cl_mem_flags flags, const cl_image_format fmt, void *host,
2443 const char *const context)
2444{
2445 if(!darktable.opencl->inited || devid < 0) return NULL;
2446 cl_int err;
2447 cl_mem dev = NULL;
2448 for(int attempt = 0; attempt < 2; attempt++)
2449 {
2451 &fmt, width, height, 0, host, &err);
2452 if(err == CL_SUCCESS) break;
2453 if(attempt == 0 && (err == CL_MEM_OBJECT_ALLOCATION_FAILURE || err == CL_OUT_OF_RESOURCES))
2454 {
2456 "[opencl %s] out of memory on device %d, flushing cached pinned buffers and retrying\n",
2457 context, devid);
2459 continue;
2460 }
2461 break;
2462 }
2463
2464 if(err != CL_SUCCESS)
2465 dt_print(DT_DEBUG_OPENCL, "[opencl %s] could not alloc img buffer on device %d: %i\n", context, devid, err);
2466
2467 if(err == CL_SUCCESS) dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
2468 return dev;
2469}
2470
2471void *dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
2472{
2473 const int effective_bpp = DT_OPENCL_BPP_DECODE(bpp);
2474 const gboolean rgba8 = DT_OPENCL_BPP_IS_RGBA8(bpp);
2475 cl_image_format fmt;
2476 // guess pixel format from bytes per pixel (+ optional format tag for ambiguous 4-byte formats)
2477 if(rgba8 && effective_bpp == 4 * sizeof(uint8_t))
2478 fmt = (cl_image_format){ CL_RGBA, CL_UNSIGNED_INT8 };
2479 else if(effective_bpp == 4 * sizeof(float))
2480 fmt = (cl_image_format){ CL_RGBA, CL_FLOAT };
2481 else if(effective_bpp == sizeof(float))
2482 fmt = (cl_image_format){ CL_R, CL_FLOAT };
2483 else if(effective_bpp == sizeof(uint16_t))
2484 fmt = (cl_image_format){ CL_R, CL_UNSIGNED_INT16 };
2485 else if(effective_bpp == sizeof(uint8_t))
2486 fmt = (cl_image_format){ CL_R, CL_UNSIGNED_INT8 };
2487 else
2488 return NULL;
2489
2490 return _dt_opencl_alloc_image2d(devid, width, height, CL_MEM_READ_WRITE, fmt, NULL, "alloc_device");
2491}
2492
2493void *dt_opencl_alloc_device_use_host_pointer(const int devid, const int width, const int height,
2494 const int bpp, void *host, const int flags)
2495{
2496 const int effective_bpp = DT_OPENCL_BPP_DECODE(bpp);
2497 const gboolean rgba8 = DT_OPENCL_BPP_IS_RGBA8(bpp);
2498 cl_image_format fmt;
2499 // guess pixel format from bytes per pixel (+ optional format tag for ambiguous 4-byte formats)
2500 if(rgba8 && effective_bpp == 4 * sizeof(uint8_t))
2501 fmt = (cl_image_format){ CL_RGBA, CL_UNSIGNED_INT8 };
2502 else if(effective_bpp == 4 * sizeof(float))
2503 fmt = (cl_image_format){ CL_RGBA, CL_FLOAT };
2504 else if(effective_bpp == sizeof(float))
2505 fmt = (cl_image_format){ CL_R, CL_FLOAT };
2506 else if(effective_bpp == sizeof(uint16_t))
2507 fmt = (cl_image_format){ CL_R, CL_UNSIGNED_INT16 };
2508 else
2509 return NULL;
2510
2511 return _dt_opencl_alloc_image2d(devid, width, height, flags, fmt, host, "alloc_device_use_host_pointer");
2512}
2513
2514void *dt_opencl_alloc_device_buffer_with_flags(const int devid, const size_t size, const int flags, void *host_ptr)
2515{
2516 if(!darktable.opencl->inited) return NULL;
2517 cl_int err;
2518 cl_mem buf = NULL;
2519 for(int attempt = 0; attempt < 2; attempt++)
2520 {
2522 flags, size, host_ptr, &err);
2523 if(err == CL_SUCCESS) break;
2524 if(attempt == 0 && (err == CL_MEM_OBJECT_ALLOCATION_FAILURE || err == CL_OUT_OF_RESOURCES))
2525 {
2527 "[opencl alloc_device_buffer] out of memory on device %d, flushing cached pinned buffers and retrying\n",
2528 devid);
2530 continue;
2531 }
2532 break;
2533 }
2534 if(err != CL_SUCCESS)
2535 dt_print(DT_DEBUG_OPENCL, "[opencl alloc_device_buffer] could not alloc buffer on device %d: %d\n", devid,
2536 err);
2537
2538 if(err == CL_SUCCESS) dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD);
2539
2540 return buf;
2541}
2542
2543
2544void *dt_opencl_alloc_device_buffer(const int devid, const size_t size)
2545{
2546 return dt_opencl_alloc_device_buffer_with_flags(devid, size, CL_MEM_READ_WRITE, NULL);
2547}
2548
2549
2551{
2552 size_t size;
2553 if(IS_NULL_PTR(mem)) return 0;
2554
2555 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetMemObjectInfo)(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
2556
2557 return (err == CL_SUCCESS) ? size : 0;
2558}
2559
2561{
2562 cl_context context;
2563 if(IS_NULL_PTR(mem)) return -1;
2564
2565 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetMemObjectInfo)(mem, CL_MEM_CONTEXT, sizeof(context), &context, NULL);
2566 if(err != CL_SUCCESS)
2567 return -1;
2568
2569 for(int devid = 0; devid < darktable.opencl->num_devs; devid++)
2570 {
2571 if(darktable.opencl->dev[devid].context == context)
2572 return devid;
2573 }
2574
2575 return -1;
2576}
2577
2578cl_mem_flags dt_opencl_get_mem_flags(cl_mem mem)
2579{
2580 if(!darktable.opencl->inited || IS_NULL_PTR(mem)) return 0;
2581 cl_mem_flags flags = 0;
2582 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetMemObjectInfo)(mem, CL_MEM_FLAGS, sizeof(flags), &flags, NULL);
2583 if(err != CL_SUCCESS) return 0;
2584 return flags;
2585}
2586
2588{
2589 size_t size;
2590 if(IS_NULL_PTR(mem)) return 0;
2591
2592 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetImageInfo)(mem, CL_IMAGE_WIDTH, sizeof(size), &size, NULL);
2593 if(size > INT_MAX) size = 0;
2594
2595 return (err == CL_SUCCESS) ? (int)size : 0;
2596}
2597
2599{
2600 size_t size;
2601 if(IS_NULL_PTR(mem)) return 0;
2602
2603 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetImageInfo)(mem, CL_IMAGE_HEIGHT, sizeof(size), &size, NULL);
2604 if(size > INT_MAX) size = 0;
2605
2606 return (err == CL_SUCCESS) ? (int)size : 0;
2607}
2608
2610{
2611 size_t size;
2612 if(IS_NULL_PTR(mem)) return 0;
2613
2614 cl_int err = (darktable.opencl->dlocl->symbols->dt_clGetImageInfo)(mem, CL_IMAGE_ELEMENT_SIZE, sizeof(size), &size,
2615 NULL);
2616 if(size > INT_MAX) size = 0;
2617
2618 return (err == CL_SUCCESS) ? (int)size : 0;
2619}
2620
2621void dt_opencl_memory_statistics(int devid, cl_mem mem, dt_opencl_memory_t action)
2622{
2623 if(devid < 0)
2624 devid = dt_opencl_get_mem_context_id(mem);
2625
2626 if(devid < 0)
2627 return;
2628
2629 const size_t size = dt_opencl_get_mem_object_size(mem);
2630 if(action == OPENCL_MEMORY_ADD)
2632 else
2635 ? (darktable.opencl->dev[devid].memory_in_use - size)
2636 : 0;
2637
2640
2643 "[opencl memory] device %d: %" G_GSIZE_FORMAT " bytes (%.1f MB) in use\n", devid, darktable.opencl->dev[devid].memory_in_use,
2644 (float)darktable.opencl->dev[devid].memory_in_use/(1024*1024));
2645}
2646
2647void dt_opencl_check_tuning(const int devid)
2648{
2650 if(!cl->inited || devid < 0) return;
2651
2652 // Apply the headroom read from this device configuration. Older configs without
2653 // a per-device key are migrated from the global default during device init.
2654 size_t headroom = cl->dev[devid].forced_headroom;
2655
2656 cl->dev[devid].used_available = MAX(0ul, cl->dev[devid].max_global_mem - headroom * 1024 * 1024);
2657
2659 "[dt_opencl_check_tuning] use %" G_GSIZE_FORMAT " MiB on device `%s' id=%i\n",
2660 cl->dev[devid].used_available / (1024 * 1024),
2661 cl->dev[devid].name, devid);
2662}
2663
2664cl_ulong dt_opencl_get_device_available(const int devid)
2665{
2666 if(!darktable.opencl->inited || devid < 0) return 0;
2667 const cl_ulong limit = darktable.opencl->dev[devid].used_available;
2668 const size_t in_use = darktable.opencl->dev[devid].memory_in_use;
2669 return (limit > in_use) ? (limit - in_use) : 0;
2670}
2671
2672static cl_ulong _opencl_get_device_memalloc(const int devid)
2673{
2674 return darktable.opencl->dev[devid].max_mem_alloc;
2675}
2676
2677cl_ulong dt_opencl_get_device_memalloc(const int devid)
2678{
2679 if(!darktable.opencl->inited || devid < 0) return 0;
2680 return _opencl_get_device_memalloc(devid);
2681}
2682
2683gboolean dt_opencl_image_fits_device(const int devid, const size_t width, const size_t height, const unsigned bpp,
2684 const float factor, const size_t overhead)
2685{
2687 if(!cl->inited || devid < 0) return FALSE;
2688
2689 const size_t required = width * height * bpp;
2690 const size_t total = (size_t)ceilf((float)required * factor) + overhead;
2691
2692 if(cl->dev[devid].max_image_width < width || cl->dev[devid].max_image_height < height)
2693 return FALSE;
2694
2695 if(_opencl_get_device_memalloc(devid) < required)
2696 {
2698 "[opencl] trying to allocate %" PRIu64 " MiB of memory while the vRAM has %" PRIu64
2699 " MiB total\n",
2700 (uint64_t)(required / (1024 * 1024)),
2701 (uint64_t)(_opencl_get_device_memalloc(devid) / (1024 * 1024)));
2702 return FALSE;
2703 }
2704
2705 if(dt_opencl_get_device_available(devid) >= total)
2706 return TRUE;
2707
2709 "[opencl] trying to allocate %" PRIu64 " MiB of memory while the vRAM has %" PRIu64
2710 " MiB left\n",
2711 (uint64_t)(total / (1024 * 1024)),
2712 (uint64_t)(dt_opencl_get_device_available(devid) / (1024 * 1024)));
2713
2714 return FALSE;
2715}
2716
2718int dt_opencl_dev_roundup_width(int size, const int devid)
2719{
2720 const int roundup = darktable.opencl->dev[devid].clroundup_wd;
2721 return (size % roundup == 0 ? size : (size / roundup + 1) * roundup);
2722}
2723int dt_opencl_dev_roundup_height(int size, const int devid)
2724{
2725 const int roundup = darktable.opencl->dev[devid].clroundup_ht;
2726 return (size % roundup == 0 ? size : (size / roundup + 1) * roundup);
2727}
2728
2731{
2732 return darktable.opencl->inited;
2733}
2734
2735
2738{
2739 if(!darktable.opencl->inited) return FALSE;
2740 return darktable.opencl->enabled;
2741}
2742
2743
2746{
2747 if(!darktable.opencl->inited) return;
2749 dt_conf_set_bool("opencl", FALSE);
2750}
2751
2752
2755{
2757 // FIXME: This pulls in prefs every time the pixelpipe runs. Instead have a callback for DT_SIGNAL_PREFERENCES_CHANGE?
2758 if(!cl->inited) return FALSE;
2759 const int prefs = dt_conf_get_bool("opencl");
2760
2761 if(cl->enabled != prefs)
2762 {
2763 cl->enabled = prefs;
2764 cl->stopped = 0;
2765 cl->error_count = 0;
2766 dt_print(DT_DEBUG_OPENCL, "[opencl_update_enabled] enabled flag set to %s\n", prefs ? "ON" : "OFF");
2767 }
2768
2769 return (cl->enabled && !cl->stopped);
2770}
2771
2772
2775{
2777 dt_print_nts(DT_DEBUG_OPENCL, "[opencl_synchronization_timeout] synchronization timeout set to %d\n", value);
2778}
2779
2788
2789
2793cl_event *dt_opencl_events_get_slot(const int devid, const char *tag)
2794{
2796 if(!cl->inited || devid < 0) return NULL;
2797 if(!cl->dev[devid].use_events) return NULL;
2798
2799 static const cl_event zeroevent[1]; // implicitly initialized to zero
2800 cl_event **eventlist = &(cl->dev[devid].eventlist);
2801 dt_opencl_eventtag_t **eventtags = &(cl->dev[devid].eventtags);
2802 int *numevents = &(cl->dev[devid].numevents);
2803 int *maxevents = &(cl->dev[devid].maxevents);
2804 int *eventsconsolidated = &(cl->dev[devid].eventsconsolidated);
2805 int *lostevents = &(cl->dev[devid].lostevents);
2806 int *totalevents = &(cl->dev[devid].totalevents);
2807 int *totallost = &(cl->dev[devid].totallost);
2808 int *maxeventslot = &(cl->dev[devid].maxeventslot);
2809 // if first time called: allocate initial buffers
2810 if(IS_NULL_PTR(*eventlist))
2811 {
2812 int newevents = DT_OPENCL_EVENTLISTSIZE;
2813 *eventlist = calloc(newevents, sizeof(cl_event));
2814 *eventtags = calloc(newevents, sizeof(dt_opencl_eventtag_t));
2815 if(!*eventlist || !*eventtags)
2816 {
2817 dt_free(*eventlist);
2818 dt_free(*eventtags);
2819 *eventlist = NULL;
2820 *eventtags = NULL;
2821 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_events_get_slot] NO eventlist for device %i\n", devid);
2822 return NULL;
2823 }
2824 *maxevents = newevents;
2825 }
2826
2827 // check if currently highest event slot was actually consumed. If not use it again
2828 if(*numevents > 0 && !memcmp((*eventlist) + *numevents - 1, zeroevent, sizeof(cl_event)))
2829 {
2830 (*lostevents)++;
2831 (*totallost)++;
2832 if(!IS_NULL_PTR(tag))
2833 {
2834 g_strlcpy((*eventtags)[*numevents - 1].tag, tag, DT_OPENCL_EVENTNAMELENGTH);
2835 }
2836 else
2837 {
2838 (*eventtags)[*numevents - 1].tag[0] = '\0';
2839 }
2840
2841 (*totalevents)++;
2842 return (*eventlist) + *numevents - 1;
2843 }
2844
2845 // check if we would exceed the number of available event handles. In that case first flush existing handles
2846 if((*numevents - *eventsconsolidated + 1 > cl->dev[devid].event_handles) || (*numevents == *maxevents))
2847 (void)dt_opencl_events_flush(devid, 0);
2848
2849 // if no more space left in eventlist: grow buffer
2850 if(*numevents == *maxevents)
2851 {
2852 int newevents = *maxevents + DT_OPENCL_EVENTLISTSIZE;
2853 cl_event *neweventlist = calloc(newevents, sizeof(cl_event));
2854 dt_opencl_eventtag_t *neweventtags = calloc(newevents, sizeof(dt_opencl_eventtag_t));
2855 if(!neweventlist || IS_NULL_PTR(neweventtags))
2856 {
2857 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_events_get_slot] NO new eventlist with size %i for device %i\n",
2858 newevents, devid);
2859 dt_free(neweventlist);
2860 dt_free(neweventtags);
2861 return NULL;
2862 }
2863 memcpy(neweventlist, *eventlist, sizeof(cl_event) * *maxevents);
2864 memcpy(neweventtags, *eventtags, sizeof(dt_opencl_eventtag_t) * *maxevents);
2865 dt_free(*eventlist);
2866 dt_free(*eventtags);
2867 *eventlist = neweventlist;
2868 *eventtags = neweventtags;
2869 *maxevents = newevents;
2870 }
2871
2872 // init next event slot and return it
2873 (*numevents)++;
2874 memcpy((*eventlist) + *numevents - 1, zeroevent, sizeof(cl_event));
2875 if(!IS_NULL_PTR(tag))
2876 {
2877 g_strlcpy((*eventtags)[*numevents - 1].tag, tag, DT_OPENCL_EVENTNAMELENGTH);
2878 }
2879 else
2880 {
2881 (*eventtags)[*numevents - 1].tag[0] = '\0';
2882 }
2883
2884 (*totalevents)++;
2885 *maxeventslot = MAX(*maxeventslot, *numevents - 1);
2886 return (*eventlist) + *numevents - 1;
2887}
2888
2889
2891void dt_opencl_events_reset(const int devid)
2892{
2894 if(!cl->inited || devid < 0) return;
2895 if(!cl->dev[devid].use_events) return;
2896
2897 cl_event **eventlist = &(cl->dev[devid].eventlist);
2898 dt_opencl_eventtag_t **eventtags = &(cl->dev[devid].eventtags);
2899 int *numevents = &(cl->dev[devid].numevents);
2900 int *maxevents = &(cl->dev[devid].maxevents);
2901 int *eventsconsolidated = &(cl->dev[devid].eventsconsolidated);
2902 int *lostevents = &(cl->dev[devid].lostevents);
2903 cl_int *summary = &(cl->dev[devid].summary);
2904
2905 if(IS_NULL_PTR(*eventlist) || *numevents == 0) return; // nothing to do
2906
2907 // release all remaining events in eventlist, not to waste resources
2908 for(int k = *eventsconsolidated; k < *numevents; k++)
2909 {
2910 (cl->dlocl->symbols->dt_clReleaseEvent)((*eventlist)[k]);
2911 }
2912
2913 memset(*eventtags, 0, sizeof(dt_opencl_eventtag_t) * *maxevents);
2914 *numevents = 0;
2915 *eventsconsolidated = 0;
2916 *lostevents = 0;
2917 *summary = CL_COMPLETE;
2918 return;
2919}
2920
2921
2924void dt_opencl_events_wait_for(const int devid)
2925{
2927 if(!cl->inited || devid < 0) return;
2928 if(!cl->dev[devid].use_events) return;
2929
2930 static const cl_event zeroevent[1]; // implicitly initialized to zero
2931 cl_event **eventlist = &(cl->dev[devid].eventlist);
2932 int *numevents = &(cl->dev[devid].numevents);
2933 int *lostevents = &(cl->dev[devid].lostevents);
2934 int *totallost = &(cl->dev[devid].totallost);
2935 int *eventsconsolidated = &(cl->dev[devid].eventsconsolidated);
2936
2937 if(IS_NULL_PTR(*eventlist) || *numevents == 0) return; // nothing to do
2938
2939 // check if last event slot was actually used and correct numevents if needed
2940 if(!memcmp((*eventlist) + *numevents - 1, zeroevent, sizeof(cl_event)))
2941 {
2942 (*numevents)--;
2943 (*lostevents)++;
2944 (*totallost)++;
2945 }
2946
2947 if(*numevents == *eventsconsolidated) return; // nothing to do
2948
2949 assert(*numevents > *eventsconsolidated);
2950
2951 // now wait for all remaining events to terminate
2952 // Risk: might never return in case of OpenCL blocks or endless loops
2953 // TODO: run clWaitForEvents in separate thread and implement watchdog timer
2954 cl_int err = (cl->dlocl->symbols->dt_clWaitForEvents)(*numevents - *eventsconsolidated,
2955 (*eventlist) + *eventsconsolidated);
2956 if((err != CL_SUCCESS) && (err != CL_INVALID_VALUE))
2957 dt_vprint(DT_DEBUG_OPENCL, "[dt_opencl_events_wait_for] reported %i for device %i\n",
2958 err, devid);
2959}
2960
2961
2968cl_int dt_opencl_events_flush(const int devid, const int reset)
2969{
2971 if(!cl->inited || devid < 0) return FALSE;
2972 if(!cl->dev[devid].use_events) return FALSE;
2973
2974 cl_event **eventlist = &(cl->dev[devid].eventlist);
2975 dt_opencl_eventtag_t **eventtags = &(cl->dev[devid].eventtags);
2976 int *numevents = &(cl->dev[devid].numevents);
2977 int *eventsconsolidated = &(cl->dev[devid].eventsconsolidated);
2978 int *lostevents = &(cl->dev[devid].lostevents);
2979 int *totalsuccess = &(cl->dev[devid].totalsuccess);
2980
2981 cl_int *summary = &(cl->dev[devid].summary);
2982
2983 if(IS_NULL_PTR(*eventlist) || *numevents == 0) return CL_COMPLETE; // nothing to do, no news is good news
2984
2985 // Wait for command queue to terminate (side effect: might adjust *numevents)
2987
2988 // now check return status and profiling data of all newly terminated events
2989 for(int k = *eventsconsolidated; k < *numevents; k++)
2990 {
2991 cl_int err;
2992 char *tag = (*eventtags)[k].tag;
2993 cl_int *retval = &((*eventtags)[k].retval);
2994
2995 // get return value of event
2996 err = (cl->dlocl->symbols->dt_clGetEventInfo)((*eventlist)[k], CL_EVENT_COMMAND_EXECUTION_STATUS,
2997 sizeof(cl_int), retval, NULL);
2998 if(err != CL_SUCCESS)
2999 {
3000 dt_print(DT_DEBUG_OPENCL, "[opencl_events_flush] could not get event info for '%s': %i\n",
3001 tag[0] == '\0' ? "<?>" : tag, err);
3002 }
3003 else if(*retval != CL_COMPLETE)
3004 {
3005 dt_print(DT_DEBUG_OPENCL, "[opencl_events_flush] execution of '%s' %s: %d\n",
3006 tag[0] == '\0' ? "<?>" : tag, *retval == CL_COMPLETE ? "was successful" : "failed", *retval);
3007 *summary = *retval;
3008 }
3009 else
3010 (*totalsuccess)++;
3011
3013 {
3014 // get profiling info of event (only if darktable was called with '-d perf')
3015 cl_ulong start;
3016 cl_ulong end;
3017 cl_int errs = (cl->dlocl->symbols->dt_clGetEventProfilingInfo)(
3018 (*eventlist)[k], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
3019 cl_int erre = (cl->dlocl->symbols->dt_clGetEventProfilingInfo)((*eventlist)[k], CL_PROFILING_COMMAND_END,
3020 sizeof(cl_ulong), &end, NULL);
3021 if(errs == CL_SUCCESS && erre == CL_SUCCESS)
3022 {
3023 (*eventtags)[k].timelapsed = end - start;
3024 }
3025 else
3026 {
3027 (*eventtags)[k].timelapsed = 0;
3028 (*lostevents)++;
3029 }
3030 }
3031 else
3032 (*eventtags)[k].timelapsed = 0;
3033
3034 // finally release event to be re-used by driver
3035 (cl->dlocl->symbols->dt_clReleaseEvent)((*eventlist)[k]);
3036 (*eventsconsolidated)++;
3037 }
3038
3039 cl_int result = *summary;
3040
3041 // do we want to get rid of all stored info?
3042 if(reset)
3043 {
3044 // output profiling info if wanted
3046
3047 // reset eventlist structures to empty state
3049 }
3050
3051 return result == CL_COMPLETE ? 0 : result;
3052}
3053
3054
3057void dt_opencl_events_profiling(const int devid, const int aggregated)
3058{
3060 if(!cl->inited || devid < 0) return;
3061 if(!cl->dev[devid].use_events) return;
3062
3063 cl_event **eventlist = &(cl->dev[devid].eventlist);
3064 dt_opencl_eventtag_t **eventtags = &(cl->dev[devid].eventtags);
3065 int *numevents = &(cl->dev[devid].numevents);
3066 int *eventsconsolidated = &(cl->dev[devid].eventsconsolidated);
3067 int *lostevents = &(cl->dev[devid].lostevents);
3068
3069 if(IS_NULL_PTR(*eventlist) || *numevents == 0 || IS_NULL_PTR(*eventtags) || *eventsconsolidated == 0)
3070 return; // nothing to do
3071
3072 char **tags = malloc(sizeof(char *) * (*eventsconsolidated + 1));
3073 float *timings = malloc(sizeof(float) * (*eventsconsolidated + 1));
3074 int items = 1;
3075 tags[0] = "";
3076 timings[0] = 0.0f;
3077
3078 // get profiling info and arrange it
3079 for(int k = 0; k < *eventsconsolidated; k++)
3080 {
3081 // if aggregated is TRUE, try to sum up timings for multiple runs of each kernel
3082 if(aggregated)
3083 {
3084 // linear search: this is not efficient at all but acceptable given the limited
3085 // number of events (ca. 10 - 20)
3086 int tagfound = -1;
3087 for(int i = 0; i < items; i++)
3088 {
3089 if(!strncmp(tags[i], (*eventtags)[k].tag, DT_OPENCL_EVENTNAMELENGTH))
3090 {
3091 tagfound = i;
3092 break;
3093 }
3094 }
3095
3096 if(tagfound >= 0) // tag was already detected before
3097 {
3098 // sum up timings
3099 timings[tagfound] += (*eventtags)[k].timelapsed * 1e-9;
3100 }
3101 else // tag is new
3102 {
3103 // make new entry
3104 items++;
3105 tags[items - 1] = (*eventtags)[k].tag;
3106 timings[items - 1] = (*eventtags)[k].timelapsed * 1e-9;
3107 }
3108 }
3109
3110 else // no aggregated info wanted -> arrange event by event
3111 {
3112 items++;
3113 tags[items - 1] = (*eventtags)[k].tag;
3114 timings[items - 1] = (*eventtags)[k].timelapsed * 1e-9;
3115 }
3116 }
3117
3118 // now display profiling info
3120 "[opencl_profiling] profiling device %d ('%s'):\n", devid, cl->dev[devid].name);
3121
3122 float total = 0.0f;
3123 for(int i = 1; i < items; i++)
3124 {
3125 dt_print(DT_DEBUG_OPENCL, "[opencl_profiling] spent %7.4f seconds in %s\n", (double)timings[i],
3126 tags[i][0] == '\0' ? "<?>" : tags[i]);
3127 total += timings[i];
3128 }
3129 // aggregated timing info for items without tag (if any)
3130 if(timings[0] != 0.0f)
3131 {
3132 dt_print(DT_DEBUG_OPENCL, "[opencl_profiling] spent %7.4f seconds (unallocated)\n", (double)timings[0]);
3133 total += timings[0];
3134 }
3135
3137 "[opencl_profiling] spent %7.4f seconds totally in command queue (with %d event%s missing)\n",
3138 (double)total, *lostevents, *lostevents == 1 ? "" : "s");
3139
3140 dt_free(timings);
3141 dt_free(tags);
3142
3143 return;
3144}
3145
3146static int nextpow2(int n)
3147{
3148 int k = 1;
3149 while (k < n)
3150 k <<= 1;
3151 return k;
3152}
3153
3154// utility function to calculate optimal work group dimensions for a given kernel
3155// taking device specific restrictions and local memory limitations into account
3156int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
3157{
3159 if(!cl->inited || devid < 0) return FALSE;
3160
3161 size_t maxsizes[3] = { 0 }; // the maximum dimensions for a work group
3162 size_t workgroupsize = 0; // the maximum number of items in a work group
3163 unsigned long localmemsize = 0; // the maximum amount of local memory we can use
3164 size_t kernelworkgroupsize = 0; // the maximum amount of items in work group for this kernel
3165
3166 int *blocksizex = &factors->sizex;
3167 int *blocksizey = &factors->sizey;
3168
3169 // initial values must be supplied in sizex and sizey.
3170 // we make sure that these are a power of 2 and lie within reasonable limits.
3171 *blocksizex = CLAMP(nextpow2(*blocksizex), 1, 1 << 16);
3172 *blocksizey = CLAMP(nextpow2(*blocksizey), 1, 1 << 16);
3173
3174 if(dt_opencl_get_work_group_limits(devid, maxsizes, &workgroupsize, &localmemsize) == CL_SUCCESS
3175 && dt_opencl_get_kernel_work_group_size(devid, kernel, &kernelworkgroupsize) == CL_SUCCESS)
3176 {
3177 while(maxsizes[0] < *blocksizex || maxsizes[1] < *blocksizey
3178 || localmemsize < ((factors->xfactor * (*blocksizex) + factors->xoffset) *
3179 (factors->yfactor * (*blocksizey) + factors->yoffset)) * factors->cellsize + factors->overhead
3180 || workgroupsize < (size_t)(*blocksizex) * (*blocksizey) || kernelworkgroupsize < (size_t)(*blocksizex) * (*blocksizey))
3181 {
3182 if(*blocksizex == 1 && *blocksizey == 1) return FALSE;
3183
3184 if(*blocksizex > *blocksizey)
3185 *blocksizex >>= 1;
3186 else
3187 *blocksizey >>= 1;
3188 }
3189 }
3190 else
3191 {
3192 dt_print(DT_DEBUG_OPENCL, "[dt_opencl_local_buffer_opt] can not identify resource limits for device %d\n", devid);
3193 return FALSE;
3194 }
3195
3196 return TRUE;
3197}
3198
3199
3200#endif
3201
3202// clang-format off
3203// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
3204// vim: shiftwidth=2 expandtab tabstop=2 cindent
3205// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
3206// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
dt_bilateral_cl_global_t * dt_bilateral_init_cl_global()
Definition bilateralcl.c:38
void dt_bilateral_free_cl_global(dt_bilateral_cl_global_t *b)
dt_blendop_cl_global_t * dt_develop_blend_init_cl_global(void)
Definition blend.c:1570
void dt_develop_blend_free_cl_global(dt_blendop_cl_global_t *b)
Definition blend.c:1603
const dt_aligned_pixel_t f
const dt_colormatrix_t dt_aligned_pixel_t out
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
char * key
int type
char * name
void dt_conf_set_bool(const char *name, int val)
int dt_conf_get_bool(const char *name)
int dt_conf_key_exists(const char *key)
gchar * dt_conf_get_string(const char *name)
void dt_conf_set_int(const char *name, int val)
int dt_conf_get_int(const char *name)
int64_t dt_conf_get_int64(const char *name)
void dt_conf_set_string(const char *name, const char *val)
const char * dt_conf_get_string_const(const char *name)
void dt_conf_save(dt_conf_t *cf)
gboolean dt_conf_key_not_empty(const char *name)
void reset(dt_view_t *self)
Definition darkroom.c:1266
void dt_vprint(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1567
void dt_concat_path_file(char destination[PATH_MAX], const char path[PATH_MAX], const char *const file)
Definition darktable.c:1889
darktable_t darktable
Definition darktable.c:181
void dt_print_nts(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1555
void dt_capabilities_add(char *capability)
Definition darktable.c:1773
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542
@ DT_DEBUG_OPENCL
Definition darktable.h:722
@ DT_DEBUG_MEMORY
Definition darktable.h:724
@ DT_DEBUG_PERF
Definition darktable.h:719
#define dt_free(ptr)
Definition darktable.h:456
static const dt_aligned_pixel_simd_t value
Definition darktable.h:577
static double dt_get_wtime(void)
Definition darktable.h:914
#define PATH_MAX
Definition darktable.h:1062
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
dt_dlopencl_t * dt_dlopencl_init(const char *name)
Definition dlopencl.c:63
static int dt_pthread_mutex_BAD_trylock(dt_pthread_mutex_t *mutex)
Definition dtpthread.h:411
static int dt_pthread_mutex_BAD_unlock(dt_pthread_mutex_t *mutex)
Definition dtpthread.h:416
static int dt_pthread_mutex_unlock(dt_pthread_mutex_t *mutex) RELEASE(mutex) NO_THREAD_SAFETY_ANALYSIS
Definition dtpthread.h:374
static int dt_pthread_mutex_init(dt_pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr)
Definition dtpthread.h:359
static int dt_pthread_mutex_destroy(dt_pthread_mutex_t *mutex)
Definition dtpthread.h:379
static int dt_pthread_mutex_lock(dt_pthread_mutex_t *mutex) ACQUIRE(mutex) NO_THREAD_SAFETY_ANALYSIS
Definition dtpthread.h:364
dt_dwt_cl_global_t * dt_dwt_init_cl_global()
Definition dwt.c:538
void dt_dwt_free_cl_global(dt_dwt_cl_global_t *g)
Definition dwt.c:551
void dt_loc_get_user_cache_dir(char *cachedir, size_t bufsize)
void dt_loc_get_kerneldir(char *kerneldir, size_t bufsize)
const dt_collection_sort_t items[]
Definition filter.c:95
void dt_gaussian_free_cl_global(dt_gaussian_cl_global_t *g)
Definition gaussian.c:574
dt_gaussian_cl_global_t * dt_gaussian_init_cl_global()
Definition gaussian.c:341
dt_guided_filter_cl_global_t * dt_guided_filter_init_cl_global()
void dt_guided_filter_free_cl_global(dt_guided_filter_cl_global_t *g)
void dt_heal_free_cl_global(dt_heal_cl_global_t *g)
Definition heal.c:422
dt_heal_cl_global_t * dt_heal_init_cl_global()
Definition heal.c:415
int bpp
void dt_iop_nap(int32_t usec)
Definition imageop.c:2899
void dt_interpolation_free_cl_global(dt_interpolation_cl_global_t *g)
dt_interpolation_cl_global_t * dt_interpolation_init_cl_global()
static float kernel(const float *x, const float *y)
void dt_colorspaces_free_cl_global(dt_colorspaces_cl_global_t *g)
dt_colorspaces_cl_global_t * dt_colorspaces_init_cl_global()
dt_local_laplacian_cl_global_t * dt_local_laplacian_init_cl_global()
void dt_local_laplacian_free_cl_global(dt_local_laplacian_cl_global_t *g)
float *const restrict const size_t k
size_t size
Definition mipmap_cache.c:3
dt_mipmap_buffer_dsc_flags flags
Definition mipmap_cache.c:4
static int dt_nvidia_gpu_supports_sm_20(const char *model)
cl_ulong dt_opencl_get_device_available(const int devid)
Definition opencl.c:2664
int dt_opencl_get_kernel_work_group_size(const int dev, const int kernel, size_t *kernelworkgroupsize)
Definition opencl.c:2115
cl_event * dt_opencl_events_get_slot(const int devid, const char *tag)
Definition opencl.c:2793
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:3156
void dt_opencl_unlock_device(const int dev)
Definition opencl.c:1648
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136
static void dt_opencl_update_priorities()
Definition opencl.c:1528
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2544
int dt_opencl_read_host_from_device_rowpitch(const int devid, void *host, void *device, const int width, const int height, const int rowpitch)
Definition opencl.c:2175
void * dt_opencl_alloc_device_use_host_pointer(const int devid, const int width, const int height, const int bpp, void *host, const int flags)
Definition opencl.c:2493
size_t dt_opencl_get_mem_object_size(cl_mem mem)
Definition opencl.c:2550
gboolean dt_opencl_is_pinned_memory(cl_mem mem)
Definition opencl.c:190
static char * _ascii_str_canonical(const char *in, char *out, int maxlen)
Definition opencl.c:1407
void dt_opencl_cleanup_device(dt_opencl_t *cl, int i)
Definition opencl.c:1235
int dt_opencl_enqueue_copy_buffer_to_image(const int devid, cl_mem src_buffer, cl_mem dst_image, size_t offset, size_t *origin, size_t *region)
Definition opencl.c:2284
void dt_opencl_init(dt_opencl_t *cl, const gboolean exclude_opencl, const gboolean print_statistics)
Definition opencl.c:995
void dt_opencl_events_reset(const int devid)
Definition opencl.c:2891
cl_int dt_opencl_events_flush(const int devid, const int reset)
Definition opencl.c:2968
int dt_opencl_copy_device_to_host(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2163
int dt_opencl_lock_device(const int pipetype)
Definition opencl.c:1563
gboolean dt_opencl_read_device_config(const int devid)
Definition opencl.c:246
void dt_opencl_check_tuning(const int devid)
Definition opencl.c:2647
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471
int dt_opencl_is_inited(void)
Definition opencl.c:2730
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030
void * dt_opencl_copy_host_to_device_constant(const int devid, const size_t size, void *host)
Definition opencl.c:2332
int dt_opencl_dev_roundup_height(int size, const int devid)
Definition opencl.c:2723
void * dt_opencl_copy_host_to_device_rowpitch(const int devid, void *host, const int width, const int height, const int bpp, const int rowpitch)
Definition opencl.c:2353
int dt_opencl_write_host_to_device_rowpitch(const int devid, void *host, void *device, const int width, const int height, const int rowpitch)
Definition opencl.c:2222
int dt_opencl_dev_roundup_width(int size, const int devid)
Definition opencl.c:2718
void dt_opencl_write_device_config(const int devid)
Definition opencl.c:196
static void dt_opencl_set_synchronization_timeout(int value)
Definition opencl.c:2774
static int dt_opencl_device_init(dt_opencl_t *cl, const int dev, cl_device_id *devices, const int k)
Definition opencl.c:427
int dt_opencl_get_mem_context_id(cl_mem mem)
Definition opencl.c:2560
int dt_opencl_write_host_to_device_non_blocking(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2232
int dt_opencl_load_program(const int dev, const int prog, const char *filename, const char *binname, const char *cachedir, char *md5sum, char **includemd5, int *loaded_cached)
Definition opencl.c:1728
static void * _dt_opencl_alloc_image2d(const int devid, const int width, const int height, const cl_mem_flags flags, const cl_image_format fmt, void *host, const char *const context)
Definition opencl.c:2441
void * dt_opencl_map_buffer(const int devid, cl_mem buffer, const int blocking, const int flags, size_t offset, size_t size)
Definition opencl.c:2396
static void dt_opencl_apply_scheduling_profile()
Definition opencl.c:2781
static int _dt_opencl_get_conf_int(const gchar *key_device, const gchar *conf_name, gboolean *safety_ok)
Definition opencl.c:229
int dt_opencl_get_image_height(cl_mem mem)
Definition opencl.c:2598
int dt_opencl_read_host_from_device_rowpitch_non_blocking(const int devid, void *host, void *device, const int width, const int height, const int rowpitch)
Definition opencl.c:2192
int dt_opencl_write_buffer_to_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
Definition opencl.c:2320
int dt_opencl_micro_nap(const int devid)
Definition opencl.c:177
static cl_ulong _opencl_get_device_memalloc(const int devid)
Definition opencl.c:2672
int dt_opencl_enqueue_copy_image(const int devid, cl_mem src, cl_mem dst, size_t *orig_src, size_t *orig_dst, size_t *region)
Definition opencl.c:2261
static int _take_from_list(int *list, int value)
Definition opencl.c:1368
int dt_opencl_set_detected_device_enabled(const int detected, const gboolean enabled)
Definition opencl.c:337
int dt_opencl_read_buffer_from_device(const int devid, void *host, void *device, const size_t offset, const size_t size, const int blocking)
Definition opencl.c:2309
int dt_opencl_build_program(const int dev, const int prog, const char *binname, const char *cachedir, char *md5sum, int loaded_cached)
Definition opencl.c:1902
int dt_opencl_get_max_work_item_sizes(const int dev, size_t *sizes)
Definition opencl.c:2087
int dt_opencl_enqueue_barrier(const int devid)
Definition opencl.c:1361
static FILE * fopen_stat(const char *filename, struct stat *st)
Definition opencl.c:1656
gboolean dt_opencl_image_fits_device(const int devid, const size_t width, const size_t height, const unsigned bpp, const float factor, const size_t overhead)
Definition opencl.c:2683
int dt_opencl_unmap_mem_object(const int devid, cl_mem mem_object, void *mapped_ptr)
Definition opencl.c:2430
int dt_opencl_read_host_from_device_raw(const int devid, void *host, void *device, const size_t *origin, const size_t *region, const int rowpitch, const int blocking)
Definition opencl.c:2204
gboolean dt_opencl_detected_device_pinned_memory(const int detected)
Definition opencl.c:369
int dt_opencl_set_detected_device_headroom(const int detected, const size_t headroom)
Definition opencl.c:410
int dt_opencl_is_enabled(void)
Definition opencl.c:2737
void dt_opencl_md5sum(const char **files, char **md5sums)
Definition opencl.c:1674
static int nextpow2(int n)
Definition opencl.c:3146
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073
static gboolean _opencl_splash_active
Definition opencl.c:86
gboolean dt_opencl_use_pinned_memory(const int devid)
Definition opencl.c:183
void dt_opencl_cleanup(dt_opencl_t *cl)
Definition opencl.c:1284
int dt_opencl_get_image_width(cl_mem mem)
Definition opencl.c:2587
cl_ulong dt_opencl_get_device_memalloc(const int devid)
Definition opencl.c:2677
void dt_opencl_memory_statistics(int devid, cl_mem mem, dt_opencl_memory_t action)
Definition opencl.c:2621
int dt_opencl_read_host_from_device_non_blocking(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2185
int dt_opencl_get_detected_device_count(void)
Definition opencl.c:308
void * dt_opencl_map_image(const int devid, cl_mem buffer, const int blocking, const int flags, size_t width, size_t height, int bpp)
Definition opencl.c:2410
void * dt_opencl_alloc_device_buffer_with_flags(const int devid, const size_t size, const int flags, void *host_ptr)
Definition opencl.c:2514
int dt_opencl_write_host_to_device_rowpitch_non_blocking(const int devid, void *host, void *device, const int width, const int height, const int rowpitch)
Definition opencl.c:2238
static void dt_opencl_priority_parse(dt_opencl_t *cl, char *configstr, int *priority_list, int *mandatory)
Definition opencl.c:1433
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127
gboolean dt_opencl_finish(const int devid)
Definition opencl.c:1347
int dt_opencl_enqueue_copy_image_to_buffer(const int devid, cl_mem src_image, cl_mem dst_buffer, size_t *origin, size_t *region, size_t offset)
Definition opencl.c:2272
static void _opencl_splash_update_compile(const char *programname)
Definition opencl.c:88
int dt_opencl_update_settings(void)
Definition opencl.c:2754
void dt_opencl_events_wait_for(const int devid)
Definition opencl.c:2924
int dt_opencl_set_detected_device_pinned_memory(const int detected, const gboolean enabled)
Definition opencl.c:382
int dt_opencl_read_host_from_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2169
void dt_opencl_events_profiling(const int devid, const int aggregated)
Definition opencl.c:3057
int dt_opencl_get_work_group_limits(const int dev, size_t *sizes, size_t *workgroupsize, unsigned long *localmemsize)
Definition opencl.c:2095
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:2142
cl_mem_flags dt_opencl_get_mem_flags(cl_mem mem)
Definition opencl.c:2578
int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer, size_t srcoffset, size_t dstoffset, size_t size)
Definition opencl.c:2296
const dt_opencl_detected_device_t * dt_opencl_get_detected_device(const int detected)
Definition opencl.c:316
void * dt_opencl_copy_host_to_device(const int devid, void *host, const int width, const int height, const int bpp)
Definition opencl.c:2347
int dt_opencl_write_host_to_device_raw(const int devid, const void *host, void *device, const size_t *origin, const size_t *region, const int rowpitch, const int blocking)
Definition opencl.c:2249
size_t dt_opencl_detected_device_headroom(const int detected)
Definition opencl.c:398
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383
static int _device_by_cname(const char *name)
Definition opencl.c:1385
int dt_opencl_get_image_element_size(cl_mem mem)
Definition opencl.c:2609
gboolean dt_opencl_detected_device_enabled(const int detected)
Definition opencl.c:324
static const char * dt_opencl_get_vendor_by_id(unsigned int id)
Definition opencl.c:1325
int dt_opencl_get_device_info(dt_opencl_t *cl, cl_device_id device, cl_device_info param_name, void **param_value, size_t *param_value_size)
Definition opencl.c:114
int dt_opencl_avoid_atomics(const int devid)
Definition opencl.c:171
int dt_opencl_write_host_to_device(const int devid, void *host, void *device, const int width, const int height, const int bpp)
Definition opencl.c:2216
void dt_opencl_disable(void)
Definition opencl.c:2745
#define DT_OPENCL_MAX_INCLUDES
Definition opencl.h:50
dt_opencl_memory_t
Definition opencl.h:96
@ OPENCL_MEMORY_ADD
Definition opencl.h:97
@ OPENCL_MEMORY_SUB
Definition opencl.h:98
#define DT_OPENCL_EVENTLISTSIZE
Definition opencl.h:47
#define DT_OPENCL_BPP_DECODE(bpp)
Definition opencl.h:87
@ DT_OPENCL_PINNING_DISABLED
Definition opencl.h:115
@ DT_OPENCL_PINNING_OFF
Definition opencl.h:113
@ DT_OPENCL_PINNING_ON
Definition opencl.h:114
#define DT_OPENCL_DEFAULT_COMPILE
Definition opencl.h:92
#define DT_OPENCL_DEFAULT_COMPILE_INTEL
Definition opencl.h:89
#define DT_OPENCL_BPP_IS_RGBA8(bpp)
Definition opencl.h:86
#define DT_OPENCL_CBUFFSIZE
Definition opencl.h:54
#define DT_OPENCL_EVENTNAMELENGTH
Definition opencl.h:48
#define DT_OPENCL_VENDOR_INTEL
Definition opencl.h:53
#define DT_OPENCL_MAX_PROGRAMS
Definition opencl.h:45
#define DT_OPENCL_MAX_PLATFORMS
Definition opencl.h:44
#define DT_CLDEVICE_HEAD
Definition opencl.h:93
#define DT_OPENCL_DEFAULT_COMPILE_AMD
Definition opencl.h:90
#define DT_OPENCL_DEFAULT_COMPILE_NVIDIA
Definition opencl.h:91
#define DT_OPENCL_VENDOR_NVIDIA
Definition opencl.h:52
#define DT_OPENCL_VENDOR_AMD
Definition opencl.h:51
#define DT_OPENCL_MAX_KERNELS
Definition opencl.h:46
static gboolean dt_opencl_check_driver_blacklist(const char *device_version)
const float factor
Definition pdf.h:90
@ DT_DEV_PIXELPIPE_THUMBNAIL
Definition pixelpipe.h:41
@ DT_DEV_PIXELPIPE_EXPORT
Definition pixelpipe.h:38
@ DT_DEV_PIXELPIPE_PREVIEW
Definition pixelpipe.h:40
@ DT_DEV_PIXELPIPE_FULL
Definition pixelpipe.h:39
void dt_dev_pixelpipe_cache_flush_clmem(dt_dev_pixelpipe_cache_t *cache, const int devid)
Release cached OpenCL buffers for a single device.
Pixelpipe cache for storing intermediate results in the pixelpipe.
void dt_gui_splash_init(void)
Definition splash.c:509
void dt_gui_splash_updatef(const char *format,...)
Definition splash.c:684
unsigned __int64 uint64_t
Definition strptime.c:75
struct dt_dev_pixelpipe_cache_t * pixelpipe_cache
Definition darktable.h:790
struct dt_gui_gtk_t * gui
Definition darktable.h:775
struct dt_opencl_t * opencl
Definition darktable.h:785
int32_t unmuted
Definition darktable.h:760
struct dt_conf_t * conf
Definition darktable.h:769
dt_clEnqueueReadImage_t dt_clEnqueueReadImage
Definition dlopencl.h:210
dt_clGetEventInfo_t dt_clGetEventInfo
Definition dlopencl.h:195
dt_clReleaseCommandQueue_t dt_clReleaseCommandQueue
Definition dlopencl.h:162
dt_clEnqueueUnmapMemObject_t dt_clEnqueueUnmapMemObject
Definition dlopencl.h:217
dt_clGetKernelInfo_t dt_clGetKernelInfo
Definition dlopencl.h:192
dt_clGetEventProfilingInfo_t dt_clGetEventProfilingInfo
Definition dlopencl.h:201
dt_clCreateCommandQueue_t dt_clCreateCommandQueue
Definition dlopencl.h:160
dt_clCreateImage2D_t dt_clCreateImage2D
Definition dlopencl.h:167
dt_clEnqueueCopyBuffer_t dt_clEnqueueCopyBuffer
Definition dlopencl.h:208
dt_clGetPlatformInfo_t dt_clGetPlatformInfo
Definition dlopencl.h:152
dt_clBuildProgram_t dt_clBuildProgram
Definition dlopencl.h:183
dt_clEnqueueWriteBuffer_t dt_clEnqueueWriteBuffer
Definition dlopencl.h:206
dt_clCreateContext_t dt_clCreateContext
Definition dlopencl.h:155
dt_clGetDeviceIDs_t dt_clGetDeviceIDs
Definition dlopencl.h:153
dt_clGetMemObjectInfo_t dt_clGetMemObjectInfo
Definition dlopencl.h:172
dt_clEnqueueCopyImage_t dt_clEnqueueCopyImage
Definition dlopencl.h:212
dt_clReleaseContext_t dt_clReleaseContext
Definition dlopencl.h:158
dt_clFinish_t dt_clFinish
Definition dlopencl.h:203
dt_clGetDeviceInfo_t dt_clGetDeviceInfo
Definition dlopencl.h:154
dt_clEnqueueCopyImageToBuffer_t dt_clEnqueueCopyImageToBuffer
Definition dlopencl.h:213
dt_clGetImageInfo_t dt_clGetImageInfo
Definition dlopencl.h:173
dt_clCreateBuffer_t dt_clCreateBuffer
Definition dlopencl.h:165
dt_clCreateProgramWithBinary_t dt_clCreateProgramWithBinary
Definition dlopencl.h:180
dt_clReleaseKernel_t dt_clReleaseKernel
Definition dlopencl.h:190
dt_clEnqueueWriteImage_t dt_clEnqueueWriteImage
Definition dlopencl.h:211
dt_clSetKernelArg_t dt_clSetKernelArg
Definition dlopencl.h:191
dt_clReleaseEvent_t dt_clReleaseEvent
Definition dlopencl.h:198
dt_clEnqueueMapImage_t dt_clEnqueueMapImage
Definition dlopencl.h:216
dt_clEnqueueBarrier_t dt_clEnqueueBarrier
Definition dlopencl.h:223
dt_clGetPlatformIDs_t dt_clGetPlatformIDs
Definition dlopencl.h:151
dt_clGetProgramInfo_t dt_clGetProgramInfo
Definition dlopencl.h:185
dt_clGetKernelWorkGroupInfo_t dt_clGetKernelWorkGroupInfo
Definition dlopencl.h:193
dt_clReleaseMemObject_t dt_clReleaseMemObject
Definition dlopencl.h:170
dt_clGetProgramBuildInfo_t dt_clGetProgramBuildInfo
Definition dlopencl.h:186
dt_clReleaseProgram_t dt_clReleaseProgram
Definition dlopencl.h:182
dt_clEnqueueNDRangeKernel_t dt_clEnqueueNDRangeKernel
Definition dlopencl.h:218
dt_clWaitForEvents_t dt_clWaitForEvents
Definition dlopencl.h:194
dt_clEnqueueReadBuffer_t dt_clEnqueueReadBuffer
Definition dlopencl.h:204
dt_clEnqueueCopyBufferToImage_t dt_clEnqueueCopyBufferToImage
Definition dlopencl.h:214
dt_clEnqueueMapBuffer_t dt_clEnqueueMapBuffer
Definition dlopencl.h:215
dt_clCreateKernel_t dt_clCreateKernel
Definition dlopencl.h:187
dt_clCreateProgramWithSource_t dt_clCreateProgramWithSource
Definition dlopencl.h:179
dt_dlopencl_symbols_t * symbols
Definition dlopencl.h:231
char * library
Definition dlopencl.h:232
size_t forced_headroom
Definition opencl.h:204
unsigned int cltype
Definition opencl.h:188
const char * name
Definition opencl.h:149
size_t used_available
Definition opencl.h:156
cl_event * eventlist
Definition opencl.h:137
cl_command_queue cmd_queue
Definition opencl.h:127
cl_context context
Definition opencl.h:126
const char * options_md5
Definition opencl.h:152
cl_ulong max_mem_alloc
Definition opencl.h:130
cl_ulong max_global_mem
Definition opencl.h:131
size_t max_image_width
Definition opencl.h:128
cl_ulong used_global_mem
Definition opencl.h:132
cl_device_id devid
Definition opencl.h:125
int program_used[256]
Definition opencl.h:135
dt_pthread_mutex_t lock
Definition opencl.h:124
size_t peak_memory
Definition opencl.h:155
cl_kernel kernel[512]
Definition opencl.h:134
int kernel_used[512]
Definition opencl.h:136
size_t max_image_height
Definition opencl.h:129
cl_program program[256]
Definition opencl.h:133
size_t memory_in_use
Definition opencl.h:154
const char * vendor
Definition opencl.h:148
dt_opencl_eventtag_t * eventtags
Definition opencl.h:138
const char * options
Definition opencl.h:151
const char * cname
Definition opencl.h:150
const size_t cellsize
Definition opencl.h:287
const size_t overhead
Definition opencl.h:288
int num_devs
Definition opencl.h:236
int print_statistics
Definition opencl.h:233
struct dt_gaussian_cl_global_t * gaussian
Definition opencl.h:257
int * dev_priority_image
Definition opencl.h:242
int error_count
Definition opencl.h:238
int opencl_synchronization_timeout
Definition opencl.h:239
int * dev_priority_preview
Definition opencl.h:243
dt_opencl_device_t * dev
Definition opencl.h:246
struct dt_bilateral_cl_global_t * bilateral
Definition opencl.h:254
int stopped
Definition opencl.h:235
struct dt_colorspaces_cl_global_t * colorspaces
Definition opencl.h:272
struct dt_guided_filter_cl_global_t * guided_filter
Definition opencl.h:275
int enabled
Definition opencl.h:234
struct dt_interpolation_cl_global_t * interpolation
Definition opencl.h:260
struct dt_local_laplacian_cl_global_t * local_laplacian
Definition opencl.h:263
int mandatory[5]
Definition opencl.h:241
dt_pthread_mutex_t lock
Definition opencl.h:231
struct dt_blendop_cl_global_t * blendop
Definition opencl.h:251
struct dt_dwt_cl_global_t * dwt
Definition opencl.h:266
int * dev_priority_export
Definition opencl.h:244
int inited
Definition opencl.h:232
dt_opencl_detected_device_t * detected_devs
Definition opencl.h:247
dt_dlopencl_t * dlocl
Definition opencl.h:248
int num_detected_devs
Definition opencl.h:237
int * dev_priority_thumbnail
Definition opencl.h:245
uint32_t crc
Definition opencl.h:240
struct dt_heal_cl_global_t * heal
Definition opencl.h:269
#define MIN(a, b)
Definition thinplate.c:32
#define MAX(a, b)
Definition thinplate.c:29
gchar * dt_util_str_replace(const gchar *string, const gchar *pattern, const gchar *substitute)
Definition utility.c:136