191 gboolean *
const cache_output,
197 void *cl_mem_input = NULL;
198 void *cl_mem_output = NULL;
199 void *cl_mem_process_input = NULL;
200 void *cl_mem_blend_input = NULL;
201 void *cl_mem_blend_output = NULL;
202 void *cl_mem_process_input_temp = NULL;
203 void *cl_mem_blend_input_temp = NULL;
204 void *cl_mem_blend_output_temp = NULL;
207 gboolean borrowed_cl_mem_input =
FALSE;
219 actual_input_dsc.
bpp);
220 borrowed_cl_mem_input = (!
IS_NULL_PTR(cl_mem_input));
236 &borrowed_cl_mem_input, piece, previous_piece,
tiling,
237 pixelpipe_flow, cache_output,
238 input_entry, output_entry);
246 const float required_factor_cl
253 required_factor_cl,
tiling->overhead);
257 "[dev_pixelpipe] %s pre-check didn't fit on device, flushing cached pinned buffers and retrying\n",
262 required_factor_cl,
tiling->overhead);
266 && (
module->flags() & IOP_FLAGS_PREVIEW_NON_OPENCL))
267 && (fits_on_device || piece->process_tiling_ready);
269 if(!possible_cl || !fits_on_device) *cache_output =
TRUE;
276 if(possible_cl && !fits_on_device)
281 * ceilf(required_factor_cl));
284 const float border =
tiling->overlap + 1;
285 const gboolean possible = (cl_px > dx * border) || (cl_px > dy * border) || (cl_px > border * border);
289 "[dt_dev_pixelpipe_process_rec] CL: tiling impossible in module `%s'. avail=%.1fM, requ=%.1fM (%ix%i). overlap=%i\n",
290 module->name(), cl_px / 1e6f, dx * dy / 1e6f, (
int)dx, (
int)dy, (
int)
tiling->overlap);
297 input_entry, output_entry))
301 if(!possible_cl)
goto error;
309 &locked_input_entry, NULL))
312 cl_mem_process_input = cl_mem_input;
316 "output", output_entry,
320 const int cst_before_cl = process_input_dsc.
cst;
325 module,
"module input colorspace temp",
333 &process_input_dsc.
cst, work_profile))
335 cl_mem_process_input = cl_mem_process_input_temp;
341 const int cst_after_cl = process_input_dsc.
cst;
346 cst_before_cl, cst_after_cl);
348 if(!module->process_cl(module, pipe, piece, cl_mem_process_input, cl_mem_output))
357 = (
module->dev->gui_attached && (module == module->dev->gui_module) && (pipe == module->dev->pipe))
358 ? module->request_mask_display
359 : DT_DEV_PIXELPIPE_DISPLAY_NONE;
362 cl_mem_blend_input = cl_mem_process_input;
363 cl_mem_blend_output = cl_mem_output;
364 blend_input_dsc = process_input_dsc;
365 blend_output_dsc = piece->
dsc_out;
370 const int blend_in_before = blend_input_dsc.
cst;
374 module,
"blend input colorspace temp",
375 cl_mem_process_input);
380 cl_mem_process_input, cl_mem_blend_input_temp,
382 blend_input_dsc.
cst, blend_cst,
383 &blend_input_dsc.
cst, work_profile);
384 cl_mem_blend_input = cl_mem_blend_input_temp;
386 const int blend_in_after = blend_input_dsc.
cst;
388 &process_input_dsc, &blend_input_dsc,
390 process_input_dsc.
bpp, blend_input_dsc.
bpp,
391 blend_in_before, blend_in_after);
392 const int blend_out_before = blend_output_dsc.
cst;
397 "blend output colorspace temp", cl_mem_output);
404 &blend_output_dsc.
cst, work_profile);
405 cl_mem_blend_output = cl_mem_blend_output_temp;
407 const int blend_out_after = blend_output_dsc.
cst;
409 &piece->
dsc_out, &blend_output_dsc,
412 blend_out_before, blend_out_after);
428 size_t origin[] = { 0, 0, 0 };
431 region) != CL_SUCCESS)
459 const float *module_input = input;
460 const float *blend_input = input;
461 float *module_input_temp = NULL;
462 float *blend_input_temp = NULL;
463 gboolean input_locked =
FALSE;
465 if(borrowed_cl_mem_input)
469 borrowed_cl_mem_input =
FALSE;
487 &process_input_dsc.
cst, work_profile);
489 input_locked =
FALSE;
490 module_input = module_input_temp;
504 int fail = !
module->process_tiling_cl(module, pipe, piece, module_input, output, piece->dsc_in.bpp);
518 blend_input = module_input;
519 blend_input_dsc = process_input_dsc;
520 void *blend_output = output;
521 blend_output_dsc = piece->
dsc_out;
524 = (
module->dev->gui_attached && (module == module->dev->gui_module) && (pipe == module->dev->pipe))
525 ? module->request_mask_display
526 : DT_DEV_PIXELPIPE_DISPLAY_NONE;
547 &blend_input_dsc.
cst, work_profile);
548 blend_input = blend_input_temp;
552 input_locked =
FALSE;
558 float *blend_output_temp
571 &blend_output_dsc.
cst, work_profile);
572 blend_output = blend_output_temp;
584 memcpy(output, blend_output,
591 &blend_output_dsc.
cst, work_profile);
597 if(blend_output != output)
611 if(locked_input_entry)
617 if(borrowed_cl_mem_input)
621 borrowed_cl_mem_input =
FALSE;
648 if(locked_input_entry)
658 const size_t required_mib
662 dt_control_log(_(
"OpenCL failed for module `%s`: image buffer needs %" G_GSIZE_FORMAT
663 " MiB but device limit is %" G_GSIZE_FORMAT
" MiB; falling back to CPU"),
664 module->name(), required_mib, max_alloc_mib);
666 cache_output, cpu_input_entry, output_entry);
672 "[dev_pixelpipe] %s GPU error fallback will reuse host input\n",
678 cpu_input_entry, output_entry))
680 if(borrowed_cl_mem_input)
684 borrowed_cl_mem_input =
FALSE;
695 "[dev_pixelpipe] %s CPU fallback has no input buffer (cache allocation failed?)\n",
700 if(borrowed_cl_mem_input)
710 cache_output, cpu_input_entry, output_entry);