diffuse_8c_source.html

/*

   This file is part of the Ansel project.

   Copyright (C) 2021-2023, 2025-2026 Aurélien PIERRE.

   Copyright (C) 2021 Chris Elston.

   Copyright (C) 2021 Hubert Kowalski.

   Copyright (C) 2021 luzpaz.

   Copyright (C) 2021-2022 Pascal Obry.

   Copyright (C) 2021-2022 quovadit.

   Copyright (C) 2021 Ralf Brown.

   Copyright (C) 2021-2022 Sakari Kapanen.

   Copyright (C) 2021 Victor Forsiuk.

   Copyright (C) 2022 Diederik Ter Rahe.

   Copyright (C) 2022 Hanno Schwalm.

   Copyright (C) 2022 Martin Bařinka.

   Copyright (C) 2022 Philipp Lutz.

   Copyright (C) 2023, 2025 Guillaume Stutin.

   Copyright (C) 2023 Luca Zulberti.

   Copyright (C) 2024 Alynx Zhou.


   Ansel is free software: you can redistribute it and/or modify

   it under the terms of the GNU General Public License as published by

   the Free Software Foundation, either version 3 of the License, or

   (at your option) any later version.


   Ansel is distributed in the hope that it will be useful,

   but WITHOUT ANY WARRANTY; without even the implied warranty of

   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

   GNU General Public License for more details.


   You should have received a copy of the GNU General Public License

   along with Ansel.  If not, see <http://www.gnu.org/licenses/>.

*/


#ifdef HAVE_CONFIG_H

#include "config.h"

#endif

#include "bauhaus/bauhaus.h"

#include "common/bspline.h"

#include "common/darktable.h"

#include "common/dwt.h"

#include "common/gaussian.h"

#include "common/image.h"

#include "common/imagebuf.h"

#include "common/iop_profile.h"

#include "common/opencl.h"

#include "control/control.h"

#include "develop/develop.h"

#include "develop/imageop_gui.h"

#include "develop/imageop_math.h"

#include "develop/noise_generator.h"

#include "develop/openmp_maths.h"

#include "develop/tiling.h"

#include "dtgtk/button.h"

#include "dtgtk/drawingarea.h"

#include "dtgtk/expander.h"

#include "dtgtk/paint.h"


#include "gui/gtk.h"

#include "gui/presets.h"

#include "iop/iop_api.h"


// Set to one to output intermediate image steps as PFM in /tmp

#define DEBUG_DUMP_PFM 0


// Diffuse v3 adds a new parameter that allows more aggressive "sharpening"

// (and mathematically-accurate multiscale handling...)

// on coarse scales, ensuring each HF details band is normalized to the same

// energy. This makes the `radius span` parameter much more impactful.

#define DIFFUSE_V3 0


#if DIFFUSE_V3

DT_MODULE_INTROSPECTION(3, dt_iop_diffuse_params_t)

#else

DT_MODULE_INTROSPECTION(2, dt_iop_diffuse_params_t)

#endif


#define MAX_NUM_SCALES 10


typedef struct dt_iop_diffuse_params_t

{

  // global parameters

  int iterations;           // $MIN: 0    $MAX: 500  $DEFAULT: 1  $DESCRIPTION: "iterations"

  float sharpness;          // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "sharpness"

  int radius;               // $MIN: 0    $MAX: 2048 $DEFAULT: 8  $DESCRIPTION: "radius span"

  float regularization;     // $MIN: 0.   $MAX: 6.   $DEFAULT: 0. $DESCRIPTION: "edge sensitivity"

  float variance_threshold; // $MIN: -2.  $MAX: 2.   $DEFAULT: 0. $DESCRIPTION: "edge threshold"


  float anisotropy_first;   // $MIN: -10. $MAX: 10.  $DEFAULT: 0. $DESCRIPTION: "1st order anisotropy"

  float anisotropy_second;  // $MIN: -10. $MAX: 10.  $DEFAULT: 0. $DESCRIPTION: "2nd order anisotropy"

  float anisotropy_third;   // $MIN: -10. $MAX: 10.  $DEFAULT: 0. $DESCRIPTION: "3rd order anisotropy"

  float anisotropy_fourth;  // $MIN: -10. $MAX: 10.  $DEFAULT: 0. $DESCRIPTION: "4th order anisotropy"


  float threshold;          // $MIN: 0.   $MAX: 8.   $DEFAULT: 0. $DESCRIPTION: "luminance masking threshold"


  float first;              // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "1st order speed"

  float second;             // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "2nd order speed"

  float third;              // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "3rd order speed"

  float fourth;             // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "4th order speed"


  // v2

  int radius_center;        // $MIN: 0    $MAX: 1024 $DEFAULT: 0  $DESCRIPTION: "central radius"


  // new versions add params mandatorily at the end, so we can memcpy old parameters at the beginning


  // v3 : Ansel 1.0

  // bool normalize_band_energy; // $DEFAULT: FALSE $DESCRIPTION: "normalize coarse scales"

  // When disabled, this will boost coarse scale sharpening by a lot.

  // There is no reason to enable it for new edits,

  // it's there to keep compatiblity with old edits.


} dt_iop_diffuse_params_t;


typedef struct dt_iop_diffuse_gui_data_t

{

  GtkWidget *iterations, *fourth, *third, *second, *radius, *radius_center, *sharpness, *threshold, *regularization, *first,

      *anisotropy_first, *anisotropy_second, *anisotropy_third, *anisotropy_fourth, *regularization_first, *variance_threshold;

} dt_iop_diffuse_gui_data_t;


typedef struct dt_iop_diffuse_global_data_t

{

  int kernel_filmic_bspline_vertical;

  int kernel_filmic_bspline_horizontal;

  int kernel_filmic_bspline_vertical_local;

  int kernel_filmic_bspline_horizontal_local;

  int kernel_filmic_wavelets_detail;


  int kernel_diffuse_build_mask;

  int kernel_diffuse_inpaint_mask;

  int kernel_diffuse_pde;

} dt_iop_diffuse_global_data_t;


// only copy params struct to avoid a commit_params()

typedef struct dt_iop_diffuse_params_t dt_iop_diffuse_data_t;


void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *params,

                  dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)

{

  memcpy(piece->data, params, self->params_size);

  piece->cache_output_on_ram = TRUE;

}


typedef enum dt_isotropy_t

{

  DT_ISOTROPY_ISOTROPE = 0, // diffuse in all directions with same intensity

  DT_ISOTROPY_ISOPHOTE = 1, // diffuse more in the isophote direction (orthogonal to gradient)

  DT_ISOTROPY_GRADIENT = 2  // diffuse more in the gradient direction

} dt_isotropy_t;


__OMP_DECLARE_SIMD__()


static inline dt_isotropy_t check_isotropy_mode(const float anisotropy)

{

  // user param is negative, positive or zero. The sign encodes the direction of diffusion, the magnitude encodes the ratio of anisotropy

  // ultimately, the anisotropy factor needs to be positive before going into the exponential

  if(anisotropy == 0.f)

    return DT_ISOTROPY_ISOTROPE;

  else if(anisotropy > 0.f)

    return DT_ISOTROPY_ISOPHOTE;

  else

    return DT_ISOTROPY_GRADIENT; // if(anisotropy > 0.f)

}


const char *name()

{

  return _("diffuse or _sharpen");

}


const char *aliases()

{

  return _("diffusion|deconvolution|blur|sharpening");

}


const char **description(struct dt_iop_module_t *self)

{

  return dt_iop_set_description(self,

                                _("simulate directional diffusion of light with heat transfer model\n"

                                  "to apply an iterative edge-oriented blur,\n"

                                  "inpaint damaged parts of the image,"

                                  "or to remove blur with blind deconvolution."),

                                _("corrective and creative"),

                                _("linear, RGB, scene-referred"),

                                _("linear, RGB"),

                                _("linear, RGB, scene-referred"));

}


int default_group()

{

  return IOP_GROUP_SHARPNESS;

}


int flags()

{

  return IOP_FLAGS_INCLUDE_IN_STYLES | IOP_FLAGS_SUPPORTS_BLENDING | IOP_FLAGS_ALLOW_TILING;

}


int default_colorspace(dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece)

{

  return IOP_CS_RGB;

}


int legacy_params(dt_iop_module_t *self, const void *const old_params, const int old_version, void *new_params,

                  const int new_version)

{

  if(old_version == 1 && new_version == 2)

  {

    typedef struct dt_iop_diffuse_params_v1_t

    {

      // global parameters

      int iterations;

      float sharpness;

      int radius;

      float regularization;

      float variance_threshold;


      float anisotropy_first;

      float anisotropy_second;

      float anisotropy_third;

      float anisotropy_fourth;


      float threshold;


      float first;

      float second;

      float third;

      float fourth;

    } dt_iop_diffuse_params_v1_t;


    dt_iop_diffuse_params_v1_t *o = (dt_iop_diffuse_params_v1_t *)old_params;

    dt_iop_diffuse_params_t *n = (dt_iop_diffuse_params_t *)new_params;

    dt_iop_diffuse_params_t *d = (dt_iop_diffuse_params_t *)self->default_params;


    *n = *d; // start with a fresh copy of default parameters


    // copy common parameters

    memcpy(n, o, sizeof(dt_iop_diffuse_params_v1_t));


    // init only new parameters

    n->radius_center = 0;


#if !DIFFUSE_V3

    // When version 3 will be out, we need to handle v1 -> v2 -> v3 conversion,

    // so don't return just yet.

    return 0;

#endif

  }


#if DIFFUSE_V3

  if(old_version == 2 && new_version == 3)

  {

    typedef struct dt_iop_diffuse_params_v2_t

    {

      // global parameters

      int iterations;           // $MIN: 0    $MAX: 500  $DEFAULT: 1  $DESCRIPTION: "iterations"

      float sharpness;          // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "sharpness"

      int radius;               // $MIN: 0    $MAX: 2048 $DEFAULT: 8  $DESCRIPTION: "radius span"

      float regularization;     // $MIN: 0.   $MAX: 8.   $DEFAULT: 0. $DESCRIPTION: "edge sensitivity"

      float variance_threshold; // $MIN: -3.  $MAX: 3.   $DEFAULT: 0. $DESCRIPTION: "edge threshold"


      float anisotropy_first;   // $MIN: -100. $MAX: 100.  $DEFAULT: 0. $DESCRIPTION: "1st order anisotropy"

      float anisotropy_second;  // $MIN: -100. $MAX: 100.  $DEFAULT: 0. $DESCRIPTION: "2nd order anisotropy"

      float anisotropy_third;   // $MIN: -100. $MAX: 100.  $DEFAULT: 0. $DESCRIPTION: "3rd order anisotropy"

      float anisotropy_fourth;  // $MIN: -100. $MAX: 100.  $DEFAULT: 0. $DESCRIPTION: "4th order anisotropy"


      float threshold;          // $MIN: 0.   $MAX: 8.   $DEFAULT: 0. $DESCRIPTION: "luminance masking threshold"


      float first;              // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "1st order speed"

      float second;             // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "2nd order speed"

      float third;              // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "3rd order speed"

      float fourth;             // $MIN: -1.  $MAX: 1.   $DEFAULT: 0. $DESCRIPTION: "4th order speed"


      // v2

      int radius_center;        // $MIN: 0    $MAX: 1024 $DEFAULT: 0  $DESCRIPTION: "central radius"


    } dt_iop_diffuse_params_v2_t;


    dt_iop_diffuse_params_v2_t *o = (dt_iop_diffuse_params_v2_t *)old_params;

    dt_iop_diffuse_params_t *n = (dt_iop_diffuse_params_t *)new_params;

    dt_iop_diffuse_params_t *d = (dt_iop_diffuse_params_t *)self->default_params;


    *n = *d; // start with a fresh copy of default parameters


    // copy common parameters

    memcpy(n, o, sizeof(dt_iop_diffuse_params_v2_t));


    // init only new parameters

    n->normalize_band_energy = 1; // legacy compatiblity


    return 0;

  }

#endif


  return 1;

}


void init_presets(dt_iop_module_so_t *self)

{

  dt_iop_diffuse_params_t p;

  memset(&p, 0, sizeof(p));

  p.radius_center = 0;


  // deblurring presets

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = +0.f;

  p.regularization = 1.f;


  p.anisotropy_first = +2.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = +2.f;

  p.anisotropy_fourth = 0.f;


  p.first = -0.25f;

  p.second = +0.125f;

  p.third = -0.125f;

  p.fourth = +0.0625f;


  p.radius = 8;

  p.iterations = 8;

  dt_gui_presets_add_generic(_("lens deblur: soft"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius = 10;

  p.iterations = 16;

  dt_gui_presets_add_generic(_("lens deblur: medium"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius = 12;

  p.iterations = 24;

  dt_gui_presets_add_generic(_("lens deblur: hard"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 10;

  p.radius = 512;

  p.sharpness = 0.f;

  p.variance_threshold = 0.f;

  p.regularization = 2.5f;


  p.first = -0.20f;

  p.second = +0.10f;

  p.third = -0.20f;

  p.fourth = +0.10f;


  p.anisotropy_first = 2.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = 2.f;

  p.anisotropy_fourth = 0.f;


  dt_gui_presets_add_generic(_("dehaze"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 32;

  p.sharpness = 0.f;

  p.threshold = 0.f;

  p.variance_threshold = -0.f;

  p.regularization = 4.f;


  p.anisotropy_first = +2.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = +2.f;

  p.anisotropy_fourth = 0.f;


  p.radius = 1;

  p.radius_center = 2;


  p.first = +0.06f;

  p.second = 0.f;

  p.third = +0.06f;

  p.fourth = 0.f;

  dt_gui_presets_add_generic(_("denoise: fine"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius = 3;

  p.radius_center = 4;


  p.first = +0.05f;

  p.second = 0.f;

  p.third = +0.05f;

  p.fourth = 0.f;

  dt_gui_presets_add_generic(_("denoise: medium"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius = 6;

  p.radius_center = 8;


  p.first = +0.04f;

  p.second = 0.f;

  p.third = +0.04f;

  p.fourth = 0.f;

  dt_gui_presets_add_generic(_("denoise: coarse"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius_center = 0;


  p.iterations = 2;

  p.radius = 32;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 4.f;


  p.anisotropy_first = +4.f;

  p.anisotropy_second = +4.f;

  p.anisotropy_third = +4.f;

  p.anisotropy_fourth = +4.f;


  p.first = +1.f;

  p.second = +1.f;

  p.third = +1.f;

  p.fourth = +1.f;

  dt_gui_presets_add_generic(_("surface blur"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 1;

  p.radius = 32;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 0.f;


  p.anisotropy_first = 0.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = 0.f;

  p.anisotropy_fourth = 0.f;


  p.first = +0.5f;

  p.second = +0.5f;

  p.third = +0.5f;

  p.fourth = +0.5f;

  dt_gui_presets_add_generic(_("bloom"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 1;

  p.radius = 4;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 1.f;


  p.anisotropy_first = +1.f;

  p.anisotropy_second = +1.f;

  p.anisotropy_third = +1.f;

  p.anisotropy_fourth = +1.f;


  p.first = -0.25f;

  p.second = -0.25f;

  p.third = -0.25f;

  p.fourth = -0.25f;

  dt_gui_presets_add_generic(_("sharpen demosaicing (no AA filter)"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius = 8;

  dt_gui_presets_add_generic(_("sharpen demosaicing (AA filter)"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 4;

  p.radius = 64;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 2.f;


  p.anisotropy_first = 0.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = +4.f;

  p.anisotropy_fourth = +4.f;


  p.first = 0.f;

  p.second = 0.f;

  p.third = +0.5f;

  p.fourth = +0.5f;

  dt_gui_presets_add_generic(_("simulate watercolor"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 50;

  p.radius = 64;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 4.f;


  p.anisotropy_first = -5.f;

  p.anisotropy_second = -5.f;

  p.anisotropy_third = -5.f;

  p.anisotropy_fourth = -5.f;


  p.first = -1.f;

  p.second = -1.f;

  p.third = -1.f;

  p.fourth = -1.f;

  dt_gui_presets_add_generic(_("simulate line drawing"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  // local contrast

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;


  p.anisotropy_first = -2.5f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = 0.f;

  p.anisotropy_fourth = -2.5f;


  p.first = -0.50f;

  p.second = 0.f;

  p.third = 0.f;

  p.fourth = -0.50f;


  p.iterations = 10;

  p.radius = 333;

  p.radius_center = 512;

  p.regularization = 0.1f;

  dt_gui_presets_add_generic(_("add local contrast"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.iterations = 32;

  p.radius = 4;

  p.radius_center = 0;

  p.sharpness = 0.0f;

  p.threshold = 1.41f;

  p.variance_threshold = 0.f;

  p.regularization = 0.f;


  p.anisotropy_first = +0.f;

  p.anisotropy_second = +0.f;

  p.anisotropy_third = +0.f;

  p.anisotropy_fourth = +2.f;


  p.first = +0.0f;

  p.second = +0.0f;

  p.third = +0.0f;

  p.fourth = +0.5f;

  dt_gui_presets_add_generic(_("inpaint highlights"), self->op, self->version(), &p, sizeof(p), 1, DEVELOP_BLEND_CS_RGB_SCENE);


  // fast presets for slow hardware

  p.radius_center = 0;

  p.radius = 128;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 0.f;


  p.anisotropy_first = 0.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = 5.f;

  p.anisotropy_fourth = 0.f;


  p.first = 0.f;

  p.second = 0.f;

  p.third = -0.50f;

  p.fourth = 0.f;


  p.iterations = 1;

  dt_gui_presets_add_generic(_("fast sharpness"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);


  p.radius_center = 512;

  p.radius = 512;

  p.sharpness = 0.0f;

  p.threshold = 0.0f;

  p.variance_threshold = 0.f;

  p.regularization = 0.f;


  p.anisotropy_first = 0.f;

  p.anisotropy_second = 0.f;

  p.anisotropy_third = 5.f;

  p.anisotropy_fourth = 0.f;


  p.first = 0.f;

  p.second = 0.f;

  p.third = -0.50f;

  p.fourth = 0.f;


  p.iterations = 1;

  dt_gui_presets_add_generic(_("fast local contrast"), self->op, self->version(), &p, sizeof(p), 1,

                             DEVELOP_BLEND_CS_RGB_SCENE);

}


void tiling_callback(struct dt_iop_module_t *self, const struct dt_dev_pixelpipe_t *pipe, const struct dt_dev_pixelpipe_iop_t *piece, struct dt_develop_tiling_t *tiling)

{

  const dt_iop_roi_t *const roi_in = &piece->roi_in;

  dt_iop_diffuse_data_t *data = (dt_iop_diffuse_data_t *)piece->data;


  const float zoom = dt_dev_get_module_scale(pipe, roi_in);

  const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;

  const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);

  const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);

  const int max_filter_radius = (1 << scales);


  // Account for the exact full-frame buffers kept alive by the CPU/OpenCL paths:

  // one borrowed input, one output, two temp ping-pong buffers, two low-pass ping-pong

  // buffers, one stored detail buffer per wavelet scale, and one 8-bit mask.

  tiling->factor = 6.0625f + scales;

  tiling->factor_cl = 6.0625f + scales;


  tiling->maxbuf = 1.0f;

  tiling->maxbuf_cl = 1.0f;

  tiling->overhead = 0;

  tiling->overlap = max_filter_radius;

  tiling->xalign = 1;

  tiling->yalign = 1;

  return;

}


__DT_CLONE_TARGETS__


static inline void init_reconstruct(float *const restrict reconstructed, const size_t width,

                                    const size_t height)

{

// init the reconstructed buffer with non-clipped and partially clipped pixels

  __OMP_PARALLEL_FOR_SIMD__(aligned(reconstructed:64))

  for(size_t k = 0; k < height * width * 4; k++) reconstructed[k] = 0.f;


}


// Discretization parameters for the Partial Derivative Equation solver

#define H 1         // spatial step

#define KAPPA 0.25f // 0.25 if h = 1, 1 if h = 2


static inline __attribute__((always_inline)) void find_gradients(const dt_aligned_pixel_simd_t pixels[9],

                                                                 dt_aligned_pixel_simd_t xy[2])

{

  // Compute the gradient with centered finite differences in a 3x3 stencil

  // warning : x is vertical, y is horizontal

  const dt_aligned_pixel_simd_t half = dt_simd_set1(0.5f);

  xy[0] = (pixels[7] - pixels[1]) * half;

  xy[1] = (pixels[5] - pixels[3]) * half;

}


static inline __attribute__((always_inline)) void find_laplacians(const dt_aligned_pixel_simd_t pixels[9],

                                                                  dt_aligned_pixel_simd_t xy[2])

{

  // Compute the laplacian with centered finite differences in a 3x3 stencil

  // warning : x is vertical, y is horizontal

  const dt_aligned_pixel_simd_t two = dt_simd_set1(2.f);

  xy[0] = (pixels[7] + pixels[1]) - two * pixels[4];

  xy[1] = (pixels[5] + pixels[3]) - two * pixels[4];

}


static inline __attribute__((always_inline)) void rotation_matrix_isophote(

    const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,

    const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,

    dt_aligned_pixel_simd_t a[2][2])

{

  // Write the coefficients of a square symmetrical matrice of rotation of the gradient :

  // [[ a11, a12 ],

  //  [ a12, a22 ]]

  // taken from https://www.researchgate.net/publication/220663968

  // c dampens the gradient direction

  a[0][0] = cos_theta2 + c2 * sin_theta2;

  a[1][1] = c2 * cos_theta2 + sin_theta2;

  a[0][1] = a[1][0] = (c2 - dt_simd_set1(1.f)) * cos_theta_sin_theta;

}


static inline __attribute__((always_inline)) void rotation_matrix_gradient(

    const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,

    const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,

    dt_aligned_pixel_simd_t a[2][2])

{

  // Write the coefficients of a square symmetrical matrice of rotation of the gradient :

  // [[ a11, a12 ],

  //  [ a12, a22 ]]

  // based on https://www.researchgate.net/publication/220663968 and inverted

  // c dampens the isophote direction

  a[0][0] = c2 * cos_theta2 + sin_theta2;

  a[1][1] = cos_theta2 + c2 * sin_theta2;

  a[0][1] = a[1][0] = (dt_simd_set1(1.f) - c2) * cos_theta_sin_theta;

}


static inline __attribute__((always_inline)) void build_matrix(const dt_aligned_pixel_simd_t a[2][2],

                                                               dt_aligned_pixel_simd_t kernel[9])

{

  const dt_aligned_pixel_simd_t half = dt_simd_set1(0.5f);

  const dt_aligned_pixel_simd_t minus_two = dt_simd_set1(-2.f);

  const dt_aligned_pixel_simd_t b11 = a[0][1] * half;

  const dt_aligned_pixel_simd_t b13 = -b11;

  const dt_aligned_pixel_simd_t b22 = minus_two * (a[0][0] + a[1][1]);


  // build the kernel of rotated anisotropic laplacian

  // from https://www.researchgate.net/publication/220663968 :

  // [ [ a12 / 2,  a22,            -a12 / 2 ],

  //   [ a11,      -2 (a11 + a22), a11      ],

  //   [ -a12 / 2,   a22,          a12 / 2  ] ]

  // N.B. we have flipped the signs of the a12 terms

  // compared to the paper. There's probably a mismatch

  // of coordinate convention between the paper and the

  // original derivation of this convolution mask

  // (Witkin 1991, https://doi.org/10.1145/127719.122750).

  kernel[0] = b11;

  kernel[1] = a[1][1];

  kernel[2] = b13;

  kernel[3] = a[0][0];

  kernel[4] = b22;

  kernel[5] = a[0][0];

  kernel[6] = b13;

  kernel[7] = a[1][1];

  kernel[8] = b11;

}


static inline __attribute__((always_inline)) void isotrope_laplacian(dt_aligned_pixel_simd_t kernel[9])

{

  // see in https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Second-order-isotropic-finite-differences

  // for references (Oono & Puri)

  const dt_aligned_pixel_simd_t corner = dt_simd_set1(0.25f);

  const dt_aligned_pixel_simd_t edge = dt_simd_set1(0.5f);

  const dt_aligned_pixel_simd_t center = dt_simd_set1(-3.f);

  kernel[0] = corner;

  kernel[1] = edge;

  kernel[2] = corner;

  kernel[3] = edge;

  kernel[4] = center;

  kernel[5] = edge;

  kernel[6] = corner;

  kernel[7] = edge;

  kernel[8] = corner;

}


static inline __attribute__((always_inline)) void compute_kernel(

    const dt_aligned_pixel_simd_t c2, const dt_aligned_pixel_simd_t cos_theta_sin_theta,

    const dt_aligned_pixel_simd_t cos_theta2, const dt_aligned_pixel_simd_t sin_theta2,

    const dt_isotropy_t isotropy_type, dt_aligned_pixel_simd_t kernel[9])

{

  // Build the matrix of rotation with anisotropy


  switch(isotropy_type)

  {

    case(DT_ISOTROPY_ISOTROPE):

    default:

    {

      isotrope_laplacian(kernel);

      break;

    }

    case(DT_ISOTROPY_ISOPHOTE):

    {

      dt_aligned_pixel_simd_t a[2][2] = { { dt_simd_set1(0.f) } };

      rotation_matrix_isophote(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);

      build_matrix(a, kernel);

      break;

    }

    case(DT_ISOTROPY_GRADIENT):

    {

      dt_aligned_pixel_simd_t a[2][2] = { { dt_simd_set1(0.f) } };

      rotation_matrix_gradient(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);

      build_matrix(a, kernel);

      break;

    }

  }

}


__DT_CLONE_TARGETS__


static inline void heat_PDE_diffusion(const float *const restrict high_freq, const float *const restrict low_freq,

                                      const uint8_t *const restrict mask, const int has_mask,

                                      float *const restrict output, const size_t width,

                                      const size_t height, const dt_aligned_pixel_simd_t anisotropy,

                                      const dt_isotropy_t isotropy_type[4],

                                      const float variance_threshold, const int mult,

                                      const float normalized_regularization,

                                      const dt_aligned_pixel_simd_t ABCD, const float strength,

                                      const int use_nontemporal)

{

  // Simultaneous inpainting for image structure and texture using anisotropic heat transfer model

  // https://www.researchgate.net/publication/220663968

  // modified as follow :

  //  * apply it in a multi-scale wavelet setup : we basically solve it twice, on the wavelets LF and HF layers.

  //  * replace the manual texture direction/distance selection by an automatic detection similar to the structure one,

  //  * generalize the framework for isotropic diffusion and anisotropic weighted on the isophote direction

  //  * add an HF-band energy regularization to better avoid edges.

  // The sharpness setting mimics the contrast equalizer effect by simply multiplying the HF by some gain.


  float *const restrict out = DT_IS_ALIGNED(output);

  const float *const restrict LF = DT_IS_ALIGNED(low_freq);

  const float *const restrict HF = DT_IS_ALIGNED(high_freq);

  const dt_aligned_pixel_simd_t zero = dt_simd_set1(0.f);

  const dt_aligned_pixel_simd_t flt_min = dt_simd_set1(1e-8f);

  const dt_aligned_pixel_simd_t variance_threshold_v = dt_simd_set1(variance_threshold);

  const dt_aligned_pixel_simd_t normalized_regularization_v = dt_simd_set1(normalized_regularization);

  const dt_aligned_pixel_simd_t strength_v = dt_simd_set1(strength);


  __OMP_PARALLEL_FOR__()

  for(size_t row = 0; row < height; ++row)

  {

    // interleave the order in which we process the rows so that we minimize cache misses

    const size_t i = dwt_interleave_rows(row, height, mult);

    // compute the 'above' and 'below' coordinates, clamping them to the image, once for the entire row

    const size_t i_neighbours[3]

      = { MAX((int)(i - mult * H), (int)0) * width,            // x - mult

          i * width,                                           // x

          MIN((int)(i + mult * H), (int)height - 1) * width }; // x + mult

    for(size_t j = 0; j < width; ++j)

    {

      const size_t idx = (i * width + j);

      const size_t index = idx * 4;

      const uint8_t opacity = (has_mask) ? mask[idx] : 1;


      if(opacity)

      {

        // non-local neighbours coordinates

        const size_t j_neighbours[3]

          = { MAX((int)(j - mult * H), (int)0),            // y - mult

              j,                                          // y

              MIN((int)(j + mult * H), (int)width - 1) }; // y + mult


        // fetch non-local pixels and store them locally and contiguously

        dt_aligned_pixel_simd_t neighbour_pixel_HF[9];

        dt_aligned_pixel_simd_t neighbour_pixel_LF[9];

        dt_aligned_pixel_simd_t energy = zero;


        for(size_t ii = 0; ii < 3; ii++)

          for(size_t jj = 0; jj < 3; jj++)

          {

            const size_t neighbor = 4 * (i_neighbours[ii] + j_neighbours[jj]);

            const dt_aligned_pixel_simd_t hf_value = dt_load_simd_aligned(HF + neighbor);

            const dt_aligned_pixel_simd_t lf_value = dt_load_simd_aligned(LF + neighbor);

            neighbour_pixel_HF[3 * ii + jj] = hf_value;

            neighbour_pixel_LF[3 * ii + jj] = lf_value;

            // Clamp LF to a strictly positive floor to avoid divide-by-zero in

            // the HF/LF energy estimate without branching per channel.

            const dt_aligned_pixel_simd_t safe_lf = dt_simd_max_zero(lf_value - flt_min) + flt_min;

            const dt_aligned_pixel_simd_t ratio = hf_value / safe_lf;

            energy += ratio * ratio;

          }


        // normalized_regularization already folds together the user

        // regularization, the 3x3-support averaging factor, the physical blur

        // radius carried by the current wavelet band and its scale normalization.

        energy = dt_simd_max_zero(variance_threshold_v + energy * normalized_regularization_v - flt_min) + flt_min;


        // build the local anisotropic convolution filters for gradients and laplacians

        dt_aligned_pixel_simd_t lf_gradient[2], hf_gradient[2]; // x, y for each channel

        find_gradients(neighbour_pixel_LF, lf_gradient);

        find_gradients(neighbour_pixel_HF, hf_gradient);


        // c² in https://www.researchgate.net/publication/220663968

        dt_aligned_pixel_simd_t c2[4];

        dt_aligned_pixel_simd_t grad_x = lf_gradient[0];

        dt_aligned_pixel_simd_t grad_y = lf_gradient[1];

        dt_aligned_pixel_simd_t c2_first = zero;

        dt_aligned_pixel_simd_t c2_third = zero;

        dt_aligned_pixel_simd_t cos_theta_grad_sq = zero;

        dt_aligned_pixel_simd_t sin_theta_grad_sq = zero;

        dt_aligned_pixel_simd_t cos_theta_sin_theta_grad = zero;

        for_each_channel(c)

        {

          const float magnitude_grad = dt_fast_hypotf(grad_x[c], grad_y[c]);

          c2_first[c] = -magnitude_grad * anisotropy[0];

          c2_third[c] = -magnitude_grad * anisotropy[2];

          // Compute cos/sin(arg(grad)) with a branchless normalization, forcing

          // arg(grad)=0 when magnitude is zero.

          const float nonzero = (magnitude_grad != 0.f);

          const float inv_mag = 1.f / (magnitude_grad + (1.f - nonzero));

          grad_x[c] = grad_x[c] * inv_mag + (1.f - nonzero); // cos(0)

          grad_y[c] = grad_y[c] * inv_mag;                  // sin(0)

          // Warning : now gradient = { cos(arg(grad)) , sin(arg(grad)) }

          cos_theta_grad_sq[c] = sqf(grad_x[c]);

          sin_theta_grad_sq[c] = sqf(grad_y[c]);

          cos_theta_sin_theta_grad[c] = grad_x[c] * grad_y[c];

        }


        c2[0] = c2_first;

        c2[2] = c2_third;

        dt_aligned_pixel_simd_t lapl_x = hf_gradient[0];

        dt_aligned_pixel_simd_t lapl_y = hf_gradient[1];

        dt_aligned_pixel_simd_t c2_second = zero;

        dt_aligned_pixel_simd_t c2_fourth = zero;

        dt_aligned_pixel_simd_t cos_theta_lapl_sq = zero;

        dt_aligned_pixel_simd_t sin_theta_lapl_sq = zero;

        dt_aligned_pixel_simd_t cos_theta_sin_theta_lapl = zero;

        for_each_channel(c)

        {

          const float magnitude_lapl = dt_fast_hypotf(lapl_x[c], lapl_y[c]);

          c2_second[c] = -magnitude_lapl * anisotropy[1];

          c2_fourth[c] = -magnitude_lapl * anisotropy[3];

          // Compute cos/sin(arg(lapl)) with a branchless normalization, forcing

          // arg(lapl)=0 when magnitude is zero.

          const float nonzero = (magnitude_lapl != 0.f);

          const float inv_mag = 1.f / (magnitude_lapl + (1.f - nonzero));

          lapl_x[c] = lapl_x[c] * inv_mag + (1.f - nonzero); // cos(0)

          lapl_y[c] = lapl_y[c] * inv_mag;                  // sin(0)

          // Warning : now laplacian = { cos(arg(lapl)) , sin(arg(lapl)) }

          cos_theta_lapl_sq[c] = sqf(lapl_x[c]);

          sin_theta_lapl_sq[c] = sqf(lapl_y[c]);

          cos_theta_sin_theta_lapl[c] = lapl_x[c] * lapl_y[c];

        }

        c2[1] = c2_second;

        c2[3] = c2_fourth;


        // elements of c2 need to be expf(mag*anistropy), but we haven't applied the expf() yet.  Do that now.

        for(size_t k = 0; k < 4; k++)

          for_each_channel(c) c2[k][c] = dt_fast_expf(c2[k][c]);


        dt_aligned_pixel_simd_t kern_first[9], kern_second[9], kern_third[9], kern_fourth[9];

        compute_kernel(c2[0], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type[0],

                       kern_first);

        compute_kernel(c2[1], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type[1],

                       kern_second);

        compute_kernel(c2[2], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type[2],

                       kern_third);

        compute_kernel(c2[3], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type[3],

                       kern_fourth);


        dt_aligned_pixel_simd_t derivatives[4] = { zero, zero, zero, zero };

        // Convolve filters and accumulate the local HF band energy over the

        // current 3x3 support. This is not a statistical variance estimator:

        // HF is a band-pass residual, so we normalize each sample by the

        // corresponding LF value before squaring it, then normalize the summed

        // ratio by the physical kernel-variance increment of the current

        // wavelet band.

        for(size_t k = 0; k < 9; k++)

        {

          derivatives[0] = kern_first[k] * neighbour_pixel_LF[k] + derivatives[0];

          derivatives[1] = kern_second[k] * neighbour_pixel_LF[k] + derivatives[1];

          derivatives[2] = kern_third[k] * neighbour_pixel_HF[k] + derivatives[2];

          derivatives[3] = kern_fourth[k] * neighbour_pixel_HF[k] + derivatives[3];

        }


        // compute the update

        dt_aligned_pixel_simd_t update = derivatives[0] * ABCD[0];

        update = derivatives[1] * ABCD[1] + update;

        update = derivatives[2] * ABCD[2] + update;

        update = derivatives[3] * ABCD[3] + update;

        const dt_aligned_pixel_simd_t acc = neighbour_pixel_HF[4] * strength_v + update / energy;


        if(use_nontemporal)

          dt_store_simd_nontemporal(out + index, dt_simd_max_zero(acc + neighbour_pixel_LF[4]));

        else

          dt_store_simd_aligned(out + index, dt_simd_max_zero(acc + neighbour_pixel_LF[4]));

      }

      else

      {

        // only copy input to output, do nothing

        if(use_nontemporal)

          dt_store_simd_nontemporal(out + index, dt_simd_max_zero(dt_load_simd_aligned(HF + index)

                                                                  + dt_load_simd_aligned(LF + index)));

        else

          dt_store_simd_aligned(out + index, dt_simd_max_zero(dt_load_simd_aligned(HF + index)

                                                              + dt_load_simd_aligned(LF + index)));

      }

    }

  }


  if(use_nontemporal)

    dt_omploop_sfence();  // ensure the final nontemporal writeback completes before the caller reads out

}


static inline float compute_anisotropy_factor(const float user_param)

{

  // compute the inverse of the K param in c evaluation from

  // https://www.researchgate.net/publication/220663968

  // but in a perceptually-even way, for better GUI interaction

  return sqf(user_param);

}


#if DEBUG_DUMP_PFM

__DT_CLONE_TARGETS__

static void dump_PFM(const char *filename, const float* out, const uint32_t w, const uint32_t h)

{

  FILE *f = g_fopen(filename, "wb");

  fprintf(f, "PF\n%d %d\n-1.0\n", w, h);

  for(int j = h - 1 ; j >= 0 ; j--)

    for(int i = 0 ; i < w ; i++)

      for(int c = 0 ; c < 3 ; c++)

        fwrite(out + (j * w + i) * 4 + c, 1, sizeof(float), f);

  fclose(f);

}

#endif


__DT_CLONE_TARGETS__


static inline int wavelets_process(const float *const restrict in, float *const restrict reconstructed,

                                   const uint8_t *const restrict mask, const size_t width,

                                   const size_t height, const dt_iop_diffuse_data_t *const data,

                                   const float zoom, const int scales,

                                   const int has_mask,

                                   float *const restrict HF[MAX_NUM_SCALES],

                                   float *const restrict LF_odd,

                                   float *const restrict LF_even)

{

  const dt_aligned_pixel_simd_t anisotropy

      = { compute_anisotropy_factor(data->anisotropy_first),

          compute_anisotropy_factor(data->anisotropy_second),

          compute_anisotropy_factor(data->anisotropy_third),

          compute_anisotropy_factor(data->anisotropy_fourth) };


  const dt_isotropy_t DT_ALIGNED_PIXEL isotropy_type[4]

      = { check_isotropy_mode(data->anisotropy_first),

          check_isotropy_mode(data->anisotropy_second),

          check_isotropy_mode(data->anisotropy_third),

          check_isotropy_mode(data->anisotropy_fourth) };


  const float regularization = powf(10.f, data->regularization) - 1.f;

  const float variance_threshold = powf(10.f, data->variance_threshold);


  // À trous decimated wavelet decompose

  // there is a paper from a guy we know that explains it : https://jo.dreggn.org/home/2010_atrous.pdf

  // the wavelets decomposition here is the same as the equalizer/atrous module,

  float *restrict residual; // will store the temp buffer containing the last step of blur

  // allocate a one-row temporary buffer for the decomposition

  size_t padded_size;

  float *const tempbuf = dt_pixelpipe_cache_alloc_perthread_float(4 * width, &padded_size); //TODO: alloc in caller

  if(IS_NULL_PTR(tempbuf)) return 1;


  for(int s = 0; s < scales; ++s)

  {

    const int mult = 1 << s;


    const float *restrict buffer_in;

    float *restrict buffer_out;


    if(s == 0)

    {

      buffer_in = in;

      buffer_out = LF_odd;

    }

    else if(s % 2 != 0)

    {

      buffer_in = LF_odd;

      buffer_out = LF_even;

    }

    else

    {

      buffer_in = LF_even;

      buffer_out = LF_odd;

    }


    decompose_2D_Bspline(buffer_in, HF[s], buffer_out, width, height, mult, tempbuf, padded_size);


    residual = buffer_out;


#if DEBUG_DUMP_PFM

    char name[64];

    sprintf(name, "/tmp/scale-input-%i.pfm", s);

    dump_PFM(name, buffer_in, width, height);


    sprintf(name, "/tmp/scale-blur-%i.pfm", s);

    dump_PFM(name, buffer_out, width, height);

#endif

  }

  dt_pixelpipe_cache_free_align(tempbuf);


  // will store the temp buffer NOT containing the last step of blur

  float *restrict temp = (residual == LF_even) ? LF_odd : LF_even;

  int count = 0;


  for(int s = scales - 1; s > -1; --s)

  {

    const int mult = 1 << s;

    const float current_radius = equivalent_sigma_at_step(B_SPLINE_SIGMA, s);

    const float real_radius = current_radius * zoom;


#if DIFFUSE_V3

    const float normalized_regularization =

      (data->normalize_band_energy)

        ? regularization * sqf(real_radius) / 9.f

        : regularization / 9.f;

#else

    const float normalized_regularization = regularization / 9.f * sqf(real_radius);

#endif


    const float norm = expf(-sqf(real_radius - (float)data->radius_center) / sqf(data->radius));


    const dt_aligned_pixel_simd_t ABCD = { data->first * KAPPA * norm,

                                      data->second * KAPPA * norm,

                                      data->third * KAPPA * norm,

                                      data->fourth * KAPPA * norm };

    const float strength = data->sharpness * norm + 1.f;


    const float *restrict buffer_in;

    float *restrict buffer_out;


    if(count == 0)

    {

      buffer_in = residual;

      buffer_out = temp;

    }

    else if(count % 2 != 0)

    {

      buffer_in = temp;

      buffer_out = residual;

    }

    else

    {

      buffer_in = residual;

      buffer_out = temp;

    }


    if(s == 0) buffer_out = reconstructed;


    heat_PDE_diffusion(HF[s], buffer_in, mask, has_mask, buffer_out, width, height,

                       anisotropy, isotropy_type, variance_threshold, mult,

                       normalized_regularization, ABCD, strength, (s == 0));


    count++;

  }


  return 0;

}


__DT_CLONE_TARGETS__


static inline void build_mask(const float *const restrict input, uint8_t *const restrict mask,

                              const float threshold, const size_t width, const size_t height)

{

  __OMP_PARALLEL_FOR_SIMD__(aligned(mask, input : 64))

  for(size_t k = 0; k < height * width * 4; k += 4)

  {

    // TRUE if any channel is above threshold

    mask[k / 4] = (input[k] > threshold || input[k + 1] > threshold || input[k + 2] > threshold);

  }


}


__DT_CLONE_TARGETS__


static inline void inpaint_mask(float *const restrict inpainted, const float *const restrict original,

                                const uint8_t *const restrict mask, const size_t width,

                                const size_t height)

{

  // init the reconstruction with noise inside the masked areas

  __OMP_PARALLEL_FOR__()

  for(size_t k = 0; k < height * width * 4; k += 4)

  {

    if(mask[k / 4])

    {

      const uint32_t i = k / width;

      const uint32_t j = k - i;

      uint32_t DT_ALIGNED_ARRAY state[4]

          = { splitmix32(j + 1), splitmix32((uint64_t)(j + 1) * (i + 3)),

              splitmix32(1337), splitmix32(666) };

      xoshiro128plus(state);

      xoshiro128plus(state);

      xoshiro128plus(state);

      xoshiro128plus(state);


      for_four_channels(c, aligned(inpainted, original, state:64))

        inpainted[k + c] = fabsf(gaussian_noise(original[k + c], original[k + c], i % 2 || j % 2, state));

    }

    else

    {

      for_four_channels(c, aligned(original, inpainted:64))

        inpainted[k + c] = original[k + c];

    }

  }


}


__DT_CLONE_TARGETS__


int process(dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece,

            const void *const restrict ivoid, void *const restrict ovoid)

{

  const dt_iop_roi_t *const roi_in = &piece->roi_in;

  const dt_iop_roi_t *const roi_out = &piece->roi_out;

  const dt_iop_diffuse_data_t *const data = (dt_iop_diffuse_data_t *)piece->data;


  float *restrict in = DT_IS_ALIGNED((float *const restrict)ivoid);

  float *const restrict out = DT_IS_ALIGNED((float *const restrict)ovoid);


  float *const restrict temp1 = dt_pixelpipe_cache_alloc_align_float((size_t)roi_out->width * roi_out->height * 4, pipe);

  float *const restrict temp2 = dt_pixelpipe_cache_alloc_align_float((size_t)roi_out->width * roi_out->height * 4, pipe);


  float *restrict temp_in = NULL;

  float *restrict temp_out = NULL;

  int err = 0;


  uint8_t *const restrict mask = dt_pixelpipe_cache_alloc_align(

      sizeof(uint8_t) * roi_out->width * roi_out->height,

      pipe);


  const float zoom = dt_dev_get_module_scale(pipe, roi_in);

  const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;

  // No legacy iteration remap is applied here anymore. The current solver uses

  // the historical a-trous band order and kernel-variance increments exactly,

  // so any extra factor would be content-dependent and belong to pixel math,

  // not to the user parameter itself.

  const int iterations = MAX((int)ceilf((float)data->iterations), 1);

  const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);

  const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);


  gboolean out_of_memory = (IS_NULL_PTR(temp1)) || (IS_NULL_PTR(temp2));

  // One full-resolution buffer per stored wavelet band.

  float *restrict HF[MAX_NUM_SCALES] = { NULL };

  for(int s = 0; s < scales; s++)

  {

    HF[s] = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);

    if(!HF[s]) out_of_memory = TRUE;

  }

  // Two ping-pong low-pass buffers reused by the decomposition/synthesis.

  float *const restrict LF_odd = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);

  float *const restrict LF_even = dt_pixelpipe_cache_alloc_align_float(roi_out->width * roi_out->height * 4, pipe);


  // PAUSE !

  // check that all buffers exist before processing,

  // because we use a lot of memory here.

  if(IS_NULL_PTR(mask) || IS_NULL_PTR(temp1) || IS_NULL_PTR(temp2) || IS_NULL_PTR(LF_odd) || IS_NULL_PTR(LF_even) || out_of_memory)

  {

    err = 1;

    goto error;

  }


  const int has_mask = (data->threshold > 0.f);


  if(has_mask)

  {

    // build a boolean mask, TRUE where image is above threshold, FALSE otherwise

    build_mask(in, mask, data->threshold, roi_out->width, roi_out->height);


    // init the inpainting area with noise

    inpaint_mask(temp1, in, mask, roi_out->width, roi_out->height);


    in = temp1;

  }


  for(int it = 0; it < iterations; it++)

  {

    if(it == 0)

    {

      temp_in = in;

      temp_out = temp2;

    }

    else if(it % 2 == 0)

    {

      temp_in = temp1;

      temp_out = temp2;

    }

    else

    {

      temp_in = temp2;

      temp_out = temp1;

    }


    if(it == (int)iterations - 1)

      temp_out = out;


    if(wavelets_process(temp_in, temp_out, mask, roi_out->width, roi_out->height,

                        data, zoom, scales, has_mask, HF, LF_odd, LF_even))

    {

      err = 1;

      goto error;

    }

  }


error:

  dt_pixelpipe_cache_free_align(mask);

  dt_pixelpipe_cache_free_align(temp1);

  dt_pixelpipe_cache_free_align(temp2);

  dt_pixelpipe_cache_free_align(LF_even);

  dt_pixelpipe_cache_free_align(LF_odd);

  for(int s = 0; s < scales; s++)

    if(HF[s]) dt_pixelpipe_cache_free_align(HF[s]);

  return err;

}


#if HAVE_OPENCL


static inline cl_int wavelets_process_cl(const int devid, cl_mem in, cl_mem reconstructed, cl_mem mask,

                                         const size_t sizes[3], const int width, const int height,

                                         const dt_iop_diffuse_data_t *const data,

                                         dt_iop_diffuse_global_data_t *const gd,

                                         const float zoom, const int scales,

                                         const int has_mask,

                                         cl_mem HF[MAX_NUM_SCALES],

                                         cl_mem LF_odd,

                                         cl_mem LF_even)

{

  cl_int err = -999;


  const dt_aligned_pixel_simd_t anisotropy

      = { compute_anisotropy_factor(data->anisotropy_first),

          compute_anisotropy_factor(data->anisotropy_second),

          compute_anisotropy_factor(data->anisotropy_third),

          compute_anisotropy_factor(data->anisotropy_fourth) };


  /*

  fprintf(stdout, "anisotropy : %f ; %f ; %f ; %f \n",

                  anisotropy[0], anisotropy[1], anisotropy[2], anisotropy[3]);

  */


  const dt_isotropy_t DT_ALIGNED_PIXEL isotropy_type[4]

      = { check_isotropy_mode(data->anisotropy_first),

          check_isotropy_mode(data->anisotropy_second),

          check_isotropy_mode(data->anisotropy_third),

          check_isotropy_mode(data->anisotropy_fourth) };


  /*

  fprintf(stdout, "type : %d ; %d ; %d ; %d \n",

                  isotropy_type[0], isotropy_type[1], isotropy_type[2], isotropy_type[3]);

  */


  const float regularization = powf(10.f, data->regularization) - 1.f;

  const float variance_threshold = powf(10.f, data->variance_threshold);

  // Same a-trous decomposition as the CPU path, mirrored in OpenCL.

  cl_mem residual;


  for(int s = 0; s < scales; ++s)

  {

    const int mult = 1 << s;


    cl_mem buffer_in;

    cl_mem buffer_out;


    if(s == 0)

    {

      buffer_in = in;

      buffer_out = LF_odd;

    }

    else if(s % 2 != 0)

    {

      buffer_in = LF_odd;

      buffer_out = LF_even;

    }

    else

    {

      buffer_in = LF_even;

      buffer_out = LF_odd;

    }


    // Compute wavelets low-frequency scales

    const int clamp_lf = 1;

    int hblocksize;

    dt_opencl_local_buffer_t hlocopt = (dt_opencl_local_buffer_t){ .xoffset = 2 * mult, .xfactor = 1,

                                                                    .yoffset = 0, .yfactor = 1,

                                                                    .cellsize = 4 * sizeof(float), .overhead = 0,

                                                                    .sizex = 1 << 16, .sizey = 1 };

    if(dt_opencl_local_buffer_opt(devid, gd->kernel_filmic_bspline_horizontal_local, &hlocopt))

      hblocksize = hlocopt.sizex;

    else

      hblocksize = 1;


    // Keep the same separable order as the CPU path: vertical pass first,

    // store its intermediate into HF[s], then horizontal pass builds LF.

    int vblocksize;

    dt_opencl_local_buffer_t vlocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1,

                                                                    .yoffset = 2 * mult, .yfactor = 1,

                                                                    .cellsize = 4 * sizeof(float), .overhead = 0,

                                                                    .sizex = 1, .sizey = 1 << 16 };

    if(dt_opencl_local_buffer_opt(devid, gd->kernel_filmic_bspline_vertical_local, &vlocopt))

      vblocksize = vlocopt.sizey;

    else

      vblocksize = 1;


    if(vblocksize > 1)

    {

      const size_t vertical_sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUP(height, vblocksize), 1 };

      const size_t vertical_local[3] = { 1, vblocksize, 1 };

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 0, sizeof(cl_mem), (void *)&buffer_in);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 1, sizeof(cl_mem), (void *)&HF[s]);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 2, sizeof(int), (void *)&width);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 3, sizeof(int), (void *)&height);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 4, sizeof(int), (void *)&mult);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 5, sizeof(int), (void *)&clamp_lf);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical_local, 6,

                               (vblocksize + 4 * mult) * 4 * sizeof(float), NULL);

      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_filmic_bspline_vertical_local,

                                                   vertical_sizes, vertical_local);

    }

    else

    {

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 0, sizeof(cl_mem), (void *)&buffer_in);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 1, sizeof(cl_mem), (void *)&HF[s]);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 2, sizeof(int), (void *)&width);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 3, sizeof(int), (void *)&height);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 4, sizeof(int), (void *)&mult);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_vertical, 5, sizeof(int), (void *)&clamp_lf);

      err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_bspline_vertical, sizes);

    }

    if(err != CL_SUCCESS) return err;


    if(hblocksize > 1)

    {

      const size_t horizontal_sizes[3] = { ROUNDUP(width, hblocksize), ROUNDUPDHT(height, devid), 1 };

      const size_t horizontal_local[3] = { hblocksize, 1, 1 };

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 0, sizeof(cl_mem), (void *)&HF[s]);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 1, sizeof(cl_mem), (void *)&buffer_out);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 2, sizeof(int), (void *)&width);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 3, sizeof(int), (void *)&height);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 4, sizeof(int), (void *)&mult);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 5, sizeof(int), (void *)&clamp_lf);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal_local, 6,

                               (hblocksize + 4 * mult) * 4 * sizeof(float), NULL);

      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_filmic_bspline_horizontal_local,

                                                   horizontal_sizes, horizontal_local);

    }

    else

    {

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 0, sizeof(cl_mem), (void *)&HF[s]);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 1, sizeof(cl_mem), (void *)&buffer_out);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 2, sizeof(int), (void *)&width);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 3, sizeof(int), (void *)&height);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 4, sizeof(int), (void *)&mult);

      dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_bspline_horizontal, 5, sizeof(int), (void *)&clamp_lf);

      err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_bspline_horizontal, sizes);

    }

    if(err != CL_SUCCESS) return err;


    // Compute wavelets high-frequency scales and backup the maximum of texture over the RGB channels

    // Note : HF = detail - LF

    dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 0, sizeof(cl_mem), (void *)&buffer_in);

    dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 1, sizeof(cl_mem), (void *)&buffer_out);

    dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 2, sizeof(cl_mem), (void *)&HF[s]);

    dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 3, sizeof(int), (void *)&width);

    dt_opencl_set_kernel_arg(devid, gd->kernel_filmic_wavelets_detail, 4, sizeof(int), (void *)&height);

    err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_wavelets_detail, sizes);

    if(err != CL_SUCCESS) return err;


    residual = buffer_out;

  }


  // Ping-pong low-pass buffer not currently holding the coarsest residual.

  cl_mem temp = (residual == LF_even) ? LF_odd : LF_even;

  int count = 0;


  for(int s = scales - 1; s > -1; --s)

  {

    const int mult = 1 << s;

    const float current_radius = equivalent_sigma_at_step(B_SPLINE_SIGMA, s);

    const float real_radius = current_radius * zoom;


#if DIFFUSE_V3

    const float normalized_regularization =

      (data->normalize_band_energy)

        ? regularization * sqf(real_radius) / 9.f

        : regularization / 9.f;

#else

    const float normalized_regularization = regularization / 9.f * sqf(real_radius);

#endif


    const float norm = expf(-sqf(real_radius - (float)data->radius_center) / sqf(data->radius));


    const dt_aligned_pixel_simd_t ABCD = { data->first * KAPPA * norm,

                                      data->second * KAPPA * norm,

                                      data->third * KAPPA * norm,

                                      data->fourth * KAPPA * norm };

    const float strength = data->sharpness * norm + 1.f;


    cl_mem buffer_in;

    cl_mem buffer_out;


    if(count == 0)

    {

      buffer_in = residual;

      buffer_out = temp;

    }

    else if(count % 2 != 0)

    {

      buffer_in = temp;

      buffer_out = residual;

    }

    else

    {

      buffer_in = residual;

      buffer_out = temp;

    }


    if(s == 0) buffer_out = reconstructed;


    // Compute wavelets low-frequency scales

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 0, sizeof(cl_mem), (void *)&HF[s]);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 1, sizeof(cl_mem), (void *)&buffer_in);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 2, sizeof(cl_mem), (void *)&mask);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 3, sizeof(int), (void *)&has_mask);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 4, sizeof(cl_mem), (void *)&buffer_out);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 5, sizeof(int), (void *)&width);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 6, sizeof(int), (void *)&height);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 7, 4 * sizeof(float), (void *)&anisotropy);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 8, 4 * sizeof(dt_isotropy_t), (void *)&isotropy_type);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 9, sizeof(float), (void *)&normalized_regularization);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 10, sizeof(float), (void *)&variance_threshold);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 11, sizeof(int), (void *)&mult);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 12, 4 * sizeof(float), (void *)&ABCD);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_pde, 13, sizeof(float), (void *)&strength);

    err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_diffuse_pde, sizes);

    if(err != CL_SUCCESS) return err;


    count++;

  }


  return err;

}


int process_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out)

{

  const dt_iop_roi_t *const roi_in = &piece->roi_in;

  const dt_iop_roi_t *const roi_out = &piece->roi_out;

  const dt_iop_diffuse_data_t *const data = (dt_iop_diffuse_data_t *)piece->data;

  dt_iop_diffuse_global_data_t *const gd = (dt_iop_diffuse_global_data_t *)self->global_data;


  int out_of_memory = FALSE;


  cl_int err = -999;


  const int devid = pipe->devid;

  const int width = roi_in->width;

  const int height = roi_in->height;


  size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };


  cl_mem in = dev_in;


  cl_mem temp1 = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);

  cl_mem temp2 = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);


  cl_mem temp_in = NULL;

  cl_mem temp_out = NULL;


  cl_mem mask = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(uint8_t));


  const float zoom = dt_dev_get_module_scale(pipe, roi_in);

  const float final_radius = (data->radius + data->radius_center) * 2.f / zoom;

  // See the CPU path above: iterations stay in user space because the current

  // solver already matches the historical a-trous band ordering and kernel

  // variance increments. There is no content-independent remap left to apply.

  const int iterations = MAX((int)ceilf((float)data->iterations), 1);

  const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);

  const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);

  // One device buffer per stored wavelet band.

  cl_mem HF[MAX_NUM_SCALES] = { NULL };

  for(int s = 0; s < scales; s++)

  {

    HF[s] = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);

    if(!HF[s]) out_of_memory = TRUE;

  }

  // Two low-pass ping-pong buffers reused across all scales.

  cl_mem LF_even = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);

  cl_mem LF_odd = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4);


  // PAUSE !

  // check that all buffers exist before processing,

  // because we use a lot of memory here.

  if(IS_NULL_PTR(mask) || IS_NULL_PTR(temp1) || IS_NULL_PTR(temp2) || IS_NULL_PTR(LF_odd) || IS_NULL_PTR(LF_even) || out_of_memory)

  {

    err = CL_MEM_OBJECT_ALLOCATION_FAILURE;

    goto error;

  }


  const int has_mask = (data->threshold > 0.f);


  if(has_mask)

  {

    // build a boolean mask, TRUE where image is above threshold, FALSE otherwise

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 0, sizeof(cl_mem), (void *)&in);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 1, sizeof(cl_mem), (void *)&mask);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 2, sizeof(float), (void *)&data->threshold);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 3, sizeof(int), (void *)&roi_out->width);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_build_mask, 4, sizeof(int), (void *)&roi_out->height);

    err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_diffuse_build_mask, sizes);

    if(err != CL_SUCCESS) goto error;


    // init the inpainting area with noise

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 0, sizeof(cl_mem), (void *)&temp1);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 1, sizeof(cl_mem), (void *)&in);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 2, sizeof(cl_mem), (void *)&mask);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 3, sizeof(int), (void *)&roi_out->width);

    dt_opencl_set_kernel_arg(devid, gd->kernel_diffuse_inpaint_mask, 4, sizeof(int), (void *)&roi_out->height);

    err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_diffuse_inpaint_mask, sizes);

    if(err != CL_SUCCESS) goto error;


    in = temp1;

  }


  for(int it = 0; it < iterations; it++)

  {

    if(it == 0)

    {

      temp_in = in;

      temp_out = temp2;

    }

    else if(it % 2 == 0)

    {

      temp_in = temp1;

      temp_out = temp2;

    }

    else

    {

      temp_in = temp2;

      temp_out = temp1;

    }


    if(it == (int)iterations - 1) temp_out = dev_out;

    err = wavelets_process_cl(devid, temp_in, temp_out, mask, sizes, width, height,

                              data, gd, zoom, scales, has_mask, HF, LF_odd, LF_even);

    if(err != CL_SUCCESS) goto error;

  }


  // cleanup and exit on success

  dt_opencl_release_mem_object(mask);

  dt_opencl_release_mem_object(temp1);

  dt_opencl_release_mem_object(temp2);

  dt_opencl_release_mem_object(LF_even);

  dt_opencl_release_mem_object(LF_odd);

  for(int s = 0; s < scales; s++) dt_opencl_release_mem_object(HF[s]);

  return TRUE;


error:

  dt_opencl_release_mem_object(temp1);

  dt_opencl_release_mem_object(temp2);

  dt_opencl_release_mem_object(mask);

  dt_opencl_release_mem_object(LF_even);

  dt_opencl_release_mem_object(LF_odd);

  for(int s = 0; s < scales; s++) dt_opencl_release_mem_object(HF[s]);


  dt_print(DT_DEBUG_OPENCL, "[opencl_diffuse] couldn't enqueue kernel! %d\n", err);

  return FALSE;

}


void init_global(dt_iop_module_so_t *module)

{

  const int program = 33; // diffuse.cl in programs.conf

  dt_iop_diffuse_global_data_t *gd = (dt_iop_diffuse_global_data_t *)malloc(sizeof(dt_iop_diffuse_global_data_t));


  module->data = gd;

  gd->kernel_diffuse_build_mask = dt_opencl_create_kernel(program, "build_mask");

  gd->kernel_diffuse_inpaint_mask = dt_opencl_create_kernel(program, "inpaint_mask");

  gd->kernel_diffuse_pde = dt_opencl_create_kernel(program, "diffuse_pde");


  const int wavelets = 35; // bspline.cl, from programs.conf

  gd->kernel_filmic_bspline_horizontal = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_horizontal");

  gd->kernel_filmic_bspline_vertical = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_vertical");

  gd->kernel_filmic_bspline_horizontal_local = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_horizontal_local");

  gd->kernel_filmic_bspline_vertical_local = dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_vertical_local");

  gd->kernel_filmic_wavelets_detail = dt_opencl_create_kernel(wavelets, "wavelets_detail_level");

}


void cleanup_global(dt_iop_module_so_t *module)

{

  dt_iop_diffuse_global_data_t *gd = (dt_iop_diffuse_global_data_t *)module->data;

  dt_opencl_free_kernel(gd->kernel_diffuse_build_mask);

  dt_opencl_free_kernel(gd->kernel_diffuse_inpaint_mask);

  dt_opencl_free_kernel(gd->kernel_diffuse_pde);


  dt_opencl_free_kernel(gd->kernel_filmic_bspline_vertical);

  dt_opencl_free_kernel(gd->kernel_filmic_bspline_horizontal);

  dt_opencl_free_kernel(gd->kernel_filmic_bspline_vertical_local);

  dt_opencl_free_kernel(gd->kernel_filmic_bspline_horizontal_local);

  dt_opencl_free_kernel(gd->kernel_filmic_wavelets_detail);

  dt_free(module->data);

}


#endif


void gui_init(struct dt_iop_module_t *self)

{

  dt_iop_diffuse_gui_data_t *g = IOP_GUI_ALLOC(diffuse);

  self->widget = gtk_box_new(GTK_ORIENTATION_VERTICAL, DT_GUI_BOX_SPACING);


  gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("properties")), FALSE, FALSE, 0);


  g->iterations = dt_bauhaus_slider_from_params(self, "iterations");

  dt_bauhaus_slider_set_soft_range(g->iterations, 1., 128);

  gtk_widget_set_tooltip_text(g->iterations,

                              _("more iterations make the effect stronger but the module slower.\n"

                                "this is analogous to giving more time to the diffusion reaction.\n"

                                "if you plan on sharpening or inpainting, \n"

                                "more iterations help reconstruction."));


  g->radius_center = dt_bauhaus_slider_from_params(self, "radius_center");

  dt_bauhaus_slider_set_soft_range(g->radius_center, 0., 512.);

  dt_bauhaus_slider_set_format(g->radius_center, " px");

  gtk_widget_set_tooltip_text(

      g->radius_center, _("main scale of the diffusion.\n"

                          "zero makes diffusion act on the finest details more heavily.\n"

                          "non-zero defines the size of the details to diffuse heavily.\n"

                          "for deblurring and denoising, set to zero.\n"

                          "increase to act on local contrast instead."));


  g->radius = dt_bauhaus_slider_from_params(self, "radius");

  dt_bauhaus_slider_set_soft_range(g->radius, 1., 512.);

  dt_bauhaus_slider_set_format(g->radius, " px");

  gtk_widget_set_tooltip_text(

      g->radius, _("width of the diffusion around the central radius.\n"

                   "high values diffuse on a large band of radii.\n"

                   "low values diffuse closer to the central radius.\n"

                   "if you plan on deblurring, \n"

                   "the radius should be around the width of your lens blur."));


  GtkWidget *label_speed = dt_ui_section_label_new(_("speed (sharpen \342\206\224 diffuse)"));

  gtk_box_pack_start(GTK_BOX(self->widget), label_speed, FALSE, FALSE, 0);


  g->first = dt_bauhaus_slider_from_params(self, "first");

  dt_bauhaus_slider_set_digits(g->first, 4);

  dt_bauhaus_slider_set_format(g->first, "%");

  gtk_widget_set_tooltip_text(g->first, _("diffusion speed of low-frequency wavelet layers\n"

                  "in the direction of 1st order anisotropy (set below).\n\n"

                  "negative values sharpen, \n"

                  "positive values diffuse and blur, \n"

                  "zero does nothing."));


  g->second = dt_bauhaus_slider_from_params(self, "second");

  dt_bauhaus_slider_set_digits(g->second, 4);

  dt_bauhaus_slider_set_format(g->second, "%");

  gtk_widget_set_tooltip_text(g->second, _("diffusion speed of low-frequency wavelet layers\n"

                  "in the direction of 2nd order anisotropy (set below).\n\n"

                  "negative values sharpen, \n"

                  "positive values diffuse and blur, \n"

                  "zero does nothing."));


  g->third = dt_bauhaus_slider_from_params(self, "third");

  dt_bauhaus_slider_set_digits(g->third, 4);

  dt_bauhaus_slider_set_format(g->third, "%");

  gtk_widget_set_tooltip_text(g->third, _("diffusion speed of high-frequency wavelet layers\n"

                  "in the direction of 3rd order anisotropy (set below).\n\n"

                  "negative values sharpen, \n"

                  "positive values diffuse and blur, \n"

                  "zero does nothing."));


  g->fourth = dt_bauhaus_slider_from_params(self, "fourth");

  dt_bauhaus_slider_set_digits(g->fourth, 4);

  dt_bauhaus_slider_set_format(g->fourth, "%");

  gtk_widget_set_tooltip_text(g->fourth, _("diffusion speed of high-frequency wavelet layers\n"

                  "in the direction of 4th order anisotropy (set below).\n\n"

                  "negative values sharpen, \n"

                  "positive values diffuse and blur, \n"

                  "zero does nothing."));


  GtkWidget *label_direction = dt_ui_section_label_new(_("direction"));

  gtk_box_pack_start(GTK_BOX(self->widget), label_direction, FALSE, FALSE, 0);


  g->anisotropy_first = dt_bauhaus_slider_from_params(self, "anisotropy_first");

  dt_bauhaus_slider_set_digits(g->anisotropy_first, 4);

  dt_bauhaus_slider_set_format(g->anisotropy_first, "%");

  gtk_widget_set_tooltip_text(g->anisotropy_first, _("direction of 1st order speed (set above).\n\n"

                  "negative values follow gradients more closely, \n"

                  "positive values rather avoid edges (isophotes), \n"

                  "zero affects both equally (isotropic)."));


  g->anisotropy_second = dt_bauhaus_slider_from_params(self, "anisotropy_second");

  dt_bauhaus_slider_set_digits(g->anisotropy_second, 4);

  dt_bauhaus_slider_set_format(g->anisotropy_second, "%");

  gtk_widget_set_tooltip_text(g->anisotropy_second,_("direction of 2nd order speed (set above).\n\n"

                  "negative values follow gradients more closely, \n"

                  "positive values rather avoid edges (isophotes), \n"

                  "zero affects both equally (isotropic)."));


  g->anisotropy_third = dt_bauhaus_slider_from_params(self, "anisotropy_third");

  dt_bauhaus_slider_set_digits(g->anisotropy_third, 4);

  dt_bauhaus_slider_set_format(g->anisotropy_third, "%");

  gtk_widget_set_tooltip_text(g->anisotropy_third,_("direction of 3rd order speed (set above).\n\n"

                  "negative values follow gradients more closely, \n"

                  "positive values rather avoid edges (isophotes), \n"

                  "zero affects both equally (isotropic)."));


  g->anisotropy_fourth = dt_bauhaus_slider_from_params(self, "anisotropy_fourth");

  dt_bauhaus_slider_set_digits(g->anisotropy_fourth, 4);

  dt_bauhaus_slider_set_format(g->anisotropy_fourth, "%");

  gtk_widget_set_tooltip_text(g->anisotropy_fourth,_("direction of 4th order speed (set above).\n\n"

                  "negative values follow gradients more closely, \n"

                  "positive values rather avoid edges (isophotes), \n"

                  "zero affects both equally (isotropic)."));


  gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("edge management")), FALSE, FALSE, 0);


  g->sharpness = dt_bauhaus_slider_from_params(self, "sharpness");

  dt_bauhaus_slider_set_format(g->sharpness, "%");

  gtk_widget_set_tooltip_text(g->sharpness,

                              _("increase or decrease the sharpness of the highest frequencies.\n"

                              "can be used to keep details after blooming,\n"

                              "for standalone sharpening set speed to negative values."));


  g->regularization = dt_bauhaus_slider_from_params(self, "regularization");

  gtk_widget_set_tooltip_text(g->regularization,

                              _("define the sensitivity of the variance penalty for edges.\n"

                                "increase to exclude more edges from diffusion,\n"

                                "if fringes or halos appear."));


  g->variance_threshold = dt_bauhaus_slider_from_params(self, "variance_threshold");

  gtk_widget_set_tooltip_text(g->variance_threshold,

                              _("define the variance threshold between edge amplification and penalty.\n"

                                "decrease if you want pixels on smooth surfaces get a boost,\n"

                                "increase if you see noise appear on smooth surfaces or\n"

                                "if dark areas seem oversharpened compared to bright areas."));


  gtk_box_pack_start(GTK_BOX(self->widget), dt_ui_section_label_new(_("diffusion spatiality")), FALSE, FALSE, 0);


  g->threshold = dt_bauhaus_slider_from_params(self, "threshold");

  dt_bauhaus_slider_set_format(g->threshold, "%");

  dt_bauhaus_slider_set_digits(g->threshold, 2);

  gtk_widget_set_tooltip_text(g->threshold,

                              _("luminance threshold for the mask.\n"

                                "0. disables the luminance masking and applies the module on the whole image.\n"

                                "any higher value excludes pixels with luminance lower than the threshold.\n"

                                "this can be used to inpaint highlights."));

}


// clang-format off

// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py

// vim: shiftwidth=2 expandtab tabstop=2 cindent

// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;

// clang-format on

error
static void error(char *msg)
Definition ashift_lsd.c:202

TRUE
#define TRUE
Definition ashift_lsd.c:162

FALSE
#define FALSE
Definition ashift_lsd.c:158

dt_bauhaus_slider_set_soft_range
void dt_bauhaus_slider_set_soft_range(GtkWidget *widget, float soft_min, float soft_max)
Definition bauhaus.c:1647

dt_bauhaus_slider_set_digits
void dt_bauhaus_slider_set_digits(GtkWidget *widget, int val)
Definition bauhaus.c:3534

dt_bauhaus_slider_set_format
void dt_bauhaus_slider_set_format(GtkWidget *widget, const char *format)
Definition bauhaus.c:3598

bauhaus.h

width
int width
Definition bilateral.h:1

height
int height
Definition bilateral.h:1

DEVELOP_BLEND_CS_RGB_SCENE
@ DEVELOP_BLEND_CS_RGB_SCENE
Definition blend.h:60

bspline.h

B_SPLINE_SIGMA
#define B_SPLINE_SIGMA
Definition bspline.h:34

num_steps_to_reach_equivalent_sigma
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:61

equivalent_sigma_at_step
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:48

decompose_2D_Bspline
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:347

button.h

p
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
Definition chromatic_adaptation.h:309

dt_simd_set1
return vector dt_simd_set1(valid ?(scaling+NORM_MIN) :NORM_MIN)

IOP_CS_RGB
@ IOP_CS_RGB
Definition color_conversion.h:34

i
const float i
Definition colorspaces_inline_conversions.h:440

g
const float g
Definition colorspaces_inline_conversions.h:674

f
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:102

threshold
const float threshold
Definition colorspaces_inline_conversions.h:176

d
const float d
Definition colorspaces_inline_conversions.h:680

out
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42

n
const float n
Definition colorspaces_inline_conversions.h:678

dt_store_simd_aligned
dt_store_simd_aligned(out, dt_mat3x4_mul_vec4(vin, dt_colormatrix_row_to_simd(matrix, 0), dt_colormatrix_row_to_simd(matrix, 1), dt_colormatrix_row_to_simd(matrix, 2)))

row
static const int row
Definition colorspaces_inline_conversions.h:35

strength
static float strength(float value, float strength)
Definition colorzones.c:420

control.h

dt_print
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1542

darktable.h

DT_ALIGNED_PIXEL
#define DT_ALIGNED_PIXEL
Definition darktable.h:389

DT_DEBUG_OPENCL
@ DT_DEBUG_OPENCL
Definition darktable.h:722

DT_ALIGNED_ARRAY
#define DT_ALIGNED_ARRAY
Definition darktable.h:388

DT_IS_ALIGNED
#define DT_IS_ALIGNED(x)
Definition darktable.h:371

for_each_channel
#define for_each_channel(_var,...)
Definition darktable.h:662

__attribute__
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524

dt_pixelpipe_cache_alloc_align
#define dt_pixelpipe_cache_alloc_align(size, pipe)
Definition darktable.h:437

dt_free
#define dt_free(ptr)
Definition darktable.h:456

DT_MODULE_INTROSPECTION
#define DT_MODULE_INTROSPECTION(MODVER, PARAMSTYPE)
Definition darktable.h:151

__OMP_DECLARE_SIMD__
#define __OMP_DECLARE_SIMD__(...)
Definition darktable.h:263

dt_pixelpipe_cache_free_align
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453

dt_pixelpipe_cache_alloc_align_float
#define dt_pixelpipe_cache_alloc_align_float(pixels, pipe)
Definition darktable.h:442

__DT_CLONE_TARGETS__
#define __DT_CLONE_TARGETS__
Definition darktable.h:367

for_four_channels
#define for_four_channels(_var,...)
Definition darktable.h:664

__OMP_PARALLEL_FOR__
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258

__OMP_PARALLEL_FOR_SIMD__
#define __OMP_PARALLEL_FOR_SIMD__(...)
Definition darktable.h:259

dt_pixelpipe_cache_alloc_perthread_float
#define dt_pixelpipe_cache_alloc_perthread_float(n, padded_size)
Definition darktable.h:1030

IS_NULL_PTR
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281

splitmix32
static unsigned int splitmix32(const unsigned long seed)
Definition data/kernels/noise_generator.h:32

xoshiro128plus
static float xoshiro128plus(uint state[4])
Definition data/kernels/noise_generator.h:49

dt_iop_params_t
void dt_iop_params_t
Definition dev_history.h:41

develop.h

description
const char ** description(struct dt_iop_module_t *self)
Definition diffuse.c:176

default_group
int default_group()
Definition diffuse.c:189

wavelets_process
static __DT_CLONE_TARGETS__ int wavelets_process(const float *const restrict in, float *const restrict reconstructed, const uint8_t *const restrict mask, const size_t width, const size_t height, const dt_iop_diffuse_data_t *const data, const float zoom, const int scales, const int has_mask, float *const restrict HF[10], float *const restrict LF_odd, float *const restrict LF_even)
Definition diffuse.c:968

process
__DT_CLONE_TARGETS__ int process(dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, const void *const restrict ivoid, void *const restrict ovoid)
Definition diffuse.c:1145

commit_params
void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
Definition diffuse.c:136

dt_isotropy_t
dt_isotropy_t
Definition diffuse.c:145

DT_ISOTROPY_ISOPHOTE
@ DT_ISOTROPY_ISOPHOTE
Definition diffuse.c:147

DT_ISOTROPY_ISOTROPE
@ DT_ISOTROPY_ISOTROPE
Definition diffuse.c:146

DT_ISOTROPY_GRADIENT
@ DT_ISOTROPY_GRADIENT
Definition diffuse.c:148

compute_anisotropy_factor
static float compute_anisotropy_factor(const float user_param)
Definition diffuse.c:945

aliases
const char * aliases()
Definition diffuse.c:171

build_mask
static __DT_CLONE_TARGETS__ void build_mask(const float *const restrict input, uint8_t *const restrict mask, const float threshold, const size_t width, const size_t height)
Definition diffuse.c:1099

heat_PDE_diffusion
static __DT_CLONE_TARGETS__ void heat_PDE_diffusion(const float *const restrict high_freq, const float *const restrict low_freq, const uint8_t *const restrict mask, const int has_mask, float *const restrict output, const size_t width, const size_t height, const dt_aligned_pixel_simd_t anisotropy, const dt_isotropy_t isotropy_type[4], const float variance_threshold, const int mult, const float normalized_regularization, const dt_aligned_pixel_simd_t ABCD, const float strength, const int use_nontemporal)
Definition diffuse.c:750

name
const char * name()
Definition diffuse.c:166

gui_init
void gui_init(struct dt_iop_module_t *self)
Definition diffuse.c:1637

tiling_callback
void tiling_callback(struct dt_iop_module_t *self, const struct dt_dev_pixelpipe_t *pipe, const struct dt_dev_pixelpipe_iop_t *piece, struct dt_develop_tiling_t *tiling)
Definition diffuse.c:575

cleanup_global
void cleanup_global(dt_iop_module_so_t *module)
Definition diffuse.c:1620

wavelets_process_cl
static cl_int wavelets_process_cl(const int devid, cl_mem in, cl_mem reconstructed, cl_mem mask, const size_t sizes[3], const int width, const int height, const dt_iop_diffuse_data_t *const data, dt_iop_diffuse_global_data_t *const gd, const float zoom, const int scales, const int has_mask, cl_mem HF[10], cl_mem LF_odd, cl_mem LF_even)
Definition diffuse.c:1251

default_colorspace
int default_colorspace(dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece)
Definition diffuse.c:199

flags
int flags()
Definition diffuse.c:194

init_presets
void init_presets(dt_iop_module_so_t *self)
Definition diffuse.c:298

H
#define H
Definition diffuse.c:613

MAX_NUM_SCALES
#define MAX_NUM_SCALES
Definition diffuse.c:77

init_reconstruct
static __DT_CLONE_TARGETS__ void init_reconstruct(float *const restrict reconstructed, const size_t width, const size_t height)
Definition diffuse.c:602

KAPPA
#define KAPPA
Definition diffuse.c:614

check_isotropy_mode
static dt_isotropy_t check_isotropy_mode(const float anisotropy)
Definition diffuse.c:153

init_global
void init_global(dt_iop_module_so_t *module)
Definition diffuse.c:1601

process_cl
int process_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out)
Definition diffuse.c:1476

legacy_params
int legacy_params(dt_iop_module_t *self, const void *const old_params, const int old_version, void *new_params, const int new_version)
Definition diffuse.c:204

inpaint_mask
static __DT_CLONE_TARGETS__ void inpaint_mask(float *const restrict inpainted, const float *const restrict original, const uint8_t *const restrict mask, const size_t width, const size_t height)
Definition diffuse.c:1112

drawingarea.h

paint.h

dwt.h

dwt_interleave_rows
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93

expander.h

gaussian.h

gtk.h

dt_ui_section_label_new
static GtkWidget * dt_ui_section_label_new(const gchar *str)
Definition gtk.h:451

DT_GUI_BOX_SPACING
#define DT_GUI_BOX_SPACING
Definition gtk.h:109

dt_gui_presets_add_generic
void dt_gui_presets_add_generic(const char *name, dt_dev_operation_t op, const int32_t version, const void *params, const int32_t params_size, const int32_t enabled, const dt_develop_blend_colorspace_t blend_cst)
Definition gui/presets.c:188

presets.h

image.h

imagebuf.h

dt_iop_set_description
const char ** dt_iop_set_description(dt_iop_module_t *module, const char *main_text, const char *purpose, const char *input, const char *process, const char *output)
Definition imageop.c:3141

dt_dev_get_module_scale
float dt_dev_get_module_scale(const dt_dev_pixelpipe_t *const pipe, const dt_iop_roi_t *const roi_in)
Definition imageop.c:131

dt_omploop_sfence
#define dt_omploop_sfence()
Definition imageop.h:702

IOP_FLAGS_INCLUDE_IN_STYLES
@ IOP_FLAGS_INCLUDE_IN_STYLES
Definition imageop.h:166

IOP_FLAGS_SUPPORTS_BLENDING
@ IOP_FLAGS_SUPPORTS_BLENDING
Definition imageop.h:167

IOP_FLAGS_ALLOW_TILING
@ IOP_FLAGS_ALLOW_TILING
Definition imageop.h:169

IOP_GROUP_SHARPNESS
@ IOP_GROUP_SHARPNESS
Definition imageop.h:141

IOP_GUI_ALLOC
#define IOP_GUI_ALLOC(module)
Definition imageop.h:599

dt_bauhaus_slider_from_params
GtkWidget * dt_bauhaus_slider_from_params(dt_iop_module_t *self, const char *param)
Definition imageop_gui.c:77

imageop_gui.h

imageop_math.h

ovoid
void *const ovoid
Definition imageop_math.h:178

kernel
static float kernel(const float *x, const float *y)
Definition iop/colorchecker.c:469

iop_api.h

iop_profile.h

ABCD
#define ABCD(A, B, C, D)

k
float *const restrict const size_t k
Definition luminance_mask.h:78

derive_filmic_v6_gamut_mapping.c
c
Definition derive_filmic_v6_gamut_mapping.py:35

tiling
Definition tiling.py:1

dt_opencl_local_buffer_opt
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:3156

dt_opencl_enqueue_kernel_2d
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:2136

dt_opencl_alloc_device
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2471

dt_opencl_create_kernel
int dt_opencl_create_kernel(const int prog, const char *name)
Definition opencl.c:2030

dt_opencl_free_kernel
void dt_opencl_free_kernel(const int kernel)
Definition opencl.c:2073

dt_opencl_set_kernel_arg
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:2127

dt_opencl_enqueue_kernel_2d_with_local
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:2142

dt_opencl_release_mem_object
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2383

opencl.h

ROUNDUP
#define ROUNDUP(a, n)
Definition opencl.h:78

ROUNDUPDHT
#define ROUNDUPDHT(a, b)
Definition opencl.h:82

ROUNDUPDWD
#define ROUNDUPDWD(a, b)
Definition opencl.h:81

openmp_maths.h

GtkWidget
struct _GtkWidget GtkWidget
Definition splash.h:29

noise_generator.h

state
const float uint32_t state[4]
Definition src/develop/noise_generator.h:72

uint64_t
unsigned __int64 uint64_t
Definition strptime.c:75

dt_dev_pixelpipe_iop_t
Definition pixelpipe_hb.h:96

dt_dev_pixelpipe_iop_t::data
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:97

dt_dev_pixelpipe_iop_t::roi_in
dt_iop_roi_t roi_in
Definition pixelpipe_hb.h:132

dt_dev_pixelpipe_iop_t::roi_out
dt_iop_roi_t roi_out
Definition pixelpipe_hb.h:132

dt_dev_pixelpipe_iop_t::cache_output_on_ram
gboolean cache_output_on_ram
Definition pixelpipe_hb.h:159

dt_dev_pixelpipe_t
Definition pixelpipe_hb.h:217

dt_dev_pixelpipe_t::devid
int devid
Definition pixelpipe_hb.h:316

dt_develop_tiling_t
Definition tiling.h:39

dt_iop_diffuse_global_data_t
Definition diffuse.c:120

dt_iop_diffuse_global_data_t::kernel_filmic_wavelets_detail
int kernel_filmic_wavelets_detail
Definition diffuse.c:125

dt_iop_diffuse_global_data_t::kernel_diffuse_build_mask
int kernel_diffuse_build_mask
Definition diffuse.c:127

dt_iop_diffuse_global_data_t::kernel_diffuse_pde
int kernel_diffuse_pde
Definition diffuse.c:129

dt_iop_diffuse_global_data_t::kernel_diffuse_inpaint_mask
int kernel_diffuse_inpaint_mask
Definition diffuse.c:128

dt_iop_diffuse_global_data_t::kernel_filmic_bspline_vertical
int kernel_filmic_bspline_vertical
Definition diffuse.c:121

dt_iop_diffuse_global_data_t::kernel_filmic_bspline_horizontal_local
int kernel_filmic_bspline_horizontal_local
Definition diffuse.c:124

dt_iop_diffuse_global_data_t::kernel_filmic_bspline_horizontal
int kernel_filmic_bspline_horizontal
Definition diffuse.c:122

dt_iop_diffuse_global_data_t::kernel_filmic_bspline_vertical_local
int kernel_filmic_bspline_vertical_local
Definition diffuse.c:123

dt_iop_diffuse_gui_data_t
Definition diffuse.c:114

dt_iop_diffuse_gui_data_t::fourth
GtkWidget * fourth
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::third
GtkWidget * third
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::iterations
GtkWidget * iterations
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::anisotropy_third
GtkWidget * anisotropy_third
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::anisotropy_first
GtkWidget * anisotropy_first
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::sharpness
GtkWidget * sharpness
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::regularization
GtkWidget * regularization
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::radius
GtkWidget * radius
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::regularization_first
GtkWidget * regularization_first
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::anisotropy_second
GtkWidget * anisotropy_second
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::second
GtkWidget * second
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::radius_center
GtkWidget * radius_center
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::threshold
GtkWidget * threshold
Definition diffuse.c:115

dt_iop_diffuse_gui_data_t::variance_threshold
GtkWidget * variance_threshold
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::anisotropy_fourth
GtkWidget * anisotropy_fourth
Definition diffuse.c:116

dt_iop_diffuse_gui_data_t::first
GtkWidget * first
Definition diffuse.c:115

dt_iop_diffuse_params_t
Definition diffuse.c:79

dt_iop_diffuse_params_t::sharpness
float sharpness
Definition diffuse.c:82

dt_iop_diffuse_params_t::anisotropy_first
float anisotropy_first
Definition diffuse.c:87

dt_iop_diffuse_params_t::regularization
float regularization
Definition diffuse.c:84

dt_iop_diffuse_params_t::iterations
int iterations
Definition diffuse.c:81

dt_iop_diffuse_params_t::fourth
float fourth
Definition diffuse.c:97

dt_iop_diffuse_params_t::radius
int radius
Definition diffuse.c:83

dt_iop_diffuse_params_t::anisotropy_fourth
float anisotropy_fourth
Definition diffuse.c:90

dt_iop_diffuse_params_t::second
float second
Definition diffuse.c:95

dt_iop_diffuse_params_t::first
float first
Definition diffuse.c:94

dt_iop_diffuse_params_t::radius_center
int radius_center
Definition diffuse.c:100

dt_iop_diffuse_params_t::third
float third
Definition diffuse.c:96

dt_iop_diffuse_params_t::variance_threshold
float variance_threshold
Definition diffuse.c:85

dt_iop_diffuse_params_t::anisotropy_third
float anisotropy_third
Definition diffuse.c:89

dt_iop_diffuse_params_t::threshold
float threshold
Definition diffuse.c:92

dt_iop_diffuse_params_t::anisotropy_second
float anisotropy_second
Definition diffuse.c:88

dt_iop_module_so_t
Definition imageop.h:220

dt_iop_module_so_t::op
GModule *dt_dev_operation_t op
Definition imageop.h:230

dt_iop_module_so_t::data
dt_iop_global_data_t * data
Definition imageop.h:233

dt_iop_module_t
Definition imageop.h:246

dt_iop_module_t::default_params
dt_iop_params_t * default_params
Definition imageop.h:307

dt_iop_module_t::widget
GtkWidget * widget
Definition imageop.h:337

dt_iop_module_t::global_data
dt_iop_global_data_t * global_data
Definition imageop.h:314

dt_iop_module_t::params_size
int32_t params_size
Definition imageop.h:309

dt_iop_roi_t
Region of interest passed through the pixelpipe.
Definition imageop.h:72

dt_iop_roi_t::width
int width
Definition imageop.h:73

dt_iop_roi_t::height
int height
Definition imageop.h:73

dt_opencl_local_buffer_t
Definition opencl.h:282

dt_opencl_local_buffer_t::xoffset
const int xoffset
Definition opencl.h:283

dt_opencl_local_buffer_t::sizey
int sizey
Definition opencl.h:290

dt_opencl_local_buffer_t::sizex
int sizex
Definition opencl.h:289

c2
#define c2
Definition colorspaces_inline_conversions.h:796

MIN
#define MIN(a, b)
Definition thinplate.c:32

MAX
#define MAX(a, b)
Definition thinplate.c:29

tiling.h