bspline_8h_source.html

#pragma once


#include "common/darktable.h"

#include "common/dwt.h"

#include "develop/openmp_maths.h"

#include "math.h"


// B spline filter

#define BSPLINE_FSIZE 5


// The B spline best approximate a Gaussian of standard deviation :

// see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/

#define B_SPLINE_SIGMA 1.0553651328015339f


static inline float normalize_laplacian(const float sigma)

{

  // Normalize the wavelet scale to approximate a laplacian

  // see https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Scaling-coefficient

  return 2.f / sqf(sigma);

}


// Normalization scaling of the wavelet to approximate a laplacian

// from the function above for sigma = B_SPLINE_SIGMA as a constant

#define B_SPLINE_TO_LAPLACIAN 3.182727439285017f

#define B_SPLINE_TO_LAPLACIAN_2 10.129753952777762f // square


static inline float equivalent_sigma_at_step(const float sigma, const unsigned int s)

{

  // If we stack several gaussian blurs of standard deviation sigma on top of each other,

  // this is the equivalent standard deviation we get at the end (after s steps)

  // First step is s = 0

  // see

  // https://eng.aurelienpierre.com/2021/03/rotation-invariant-laplacian-for-2d-grids/#Multi-scale-iterative-scheme

  if(s == 0)

    return sigma;

  else

    return sqrtf(sqf(equivalent_sigma_at_step(sigma, s - 1)) + sqf(exp2f((float)s) * sigma));

}


static inline unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)

{

  // The inverse of the above : compute the number of scales needed to reach the desired equivalent sigma_final

  // after sequential blurs of constant sigma_filter

  unsigned int s = 0;

  float radius = sigma_filter;

  while(radius < sigma_final)

  {

    ++s;

    radius = sqrtf(sqf(radius) + sqf((float)(1 << s) * sigma_filter));

  }

  return s + 1;

}


#ifdef _OPENMP

#pragma omp declare simd aligned(buf, indices, result:64)

#endif


static inline void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[BSPLINE_FSIZE],

                                         dt_aligned_pixel_t result, const gboolean clip_negatives)

{

  // scalar product of 2 3x5 vectors stored as RGB planes and B-spline filter,

  // e.g. RRRRR - GGGGG - BBBBB

  static const float filter[BSPLINE_FSIZE] = { 1.0f / 16.0f,

                                               4.0f / 16.0f,

                                               6.0f / 16.0f,

                                               4.0f / 16.0f,

                                               1.0f / 16.0f };


  if(clip_negatives)

  {

    for_each_channel(c, aligned(buf,indices,result))

    {

      result[c] = MAX(0.0f, filter[0] * buf[indices[0] + c] +

                            filter[1] * buf[indices[1] + c] +

                            filter[2] * buf[indices[2] + c] +

                            filter[3] * buf[indices[3] + c] +

                            filter[4] * buf[indices[4] + c]);

    }

  }

  else

  {

    for_each_channel(c, aligned(buf,indices,result))

    {

      result[c] = filter[0] * buf[indices[0] + c] +

                  filter[1] * buf[indices[1] + c] +

                  filter[2] * buf[indices[2] + c] +

                  filter[3] * buf[indices[3] + c] +

                  filter[4] * buf[indices[4] + c];

    }

  }

}


#ifdef _OPENMP

#pragma omp declare simd aligned(in, temp)

#endif


static inline void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp,

                                          size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)

{

  size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];

  // compute the index offsets of the pixels of interest; since the offsets are the same for the entire row,

  // we only need to do this once and can then process the entire row

  indices[0] = 4 * width * MAX((int)row - 2 * mult, 0);

  indices[1] = 4 * width * MAX((int)row - mult, 0);

  indices[2] = 4 * width * row;

  indices[3] = 4 * width * MIN(row + mult, height-1);

  indices[4] = 4 * width * MIN(row + 2 * mult, height-1);

  for(size_t j = 0; j < width; j++)

  {

    // Compute the vertical blur of the current pixel and store it in the temp buffer for the row

    sparse_scalar_product(in + j * 4, indices, temp + j * 4, clip_negatives);

  }

}


#ifdef _OPENMP

#pragma omp declare simd aligned(temp, out)

#endif


static inline void _bspline_horizontal(const float *const restrict temp, float *const restrict out,

                                       size_t col, size_t width, int mult, const gboolean clip_negatives)

{

  // Compute the array indices of the pixels of interest; since the offsets will change near the ends of

  // the row, we need to recompute for each pixel

  size_t DT_ALIGNED_ARRAY indices[BSPLINE_FSIZE];

  indices[0] = 4 * MAX((int)col - 2 * mult, 0);

  indices[1] = 4 * MAX((int)col - mult,  0);

  indices[2] = 4 * col;

  indices[3] = 4 * MIN(col + mult, width-1);

  indices[4] = 4 * MIN(col + 2 * mult, width-1);

  // Compute the horizontal blur of the already vertically-blurred pixel and store the result at the proper

  //  row/column location in the output buffer

  sparse_scalar_product(temp, indices, out, clip_negatives);

}


#ifdef _OPENMP

#pragma omp declare simd aligned(in, out:64) aligned(tempbuf:16)

#endif


inline static void blur_2D_Bspline(const float *const restrict in, float *const restrict out,

                                   float *const restrict tempbuf,

                                   const size_t width, const size_t height, const int mult, const gboolean clip_negatives)

{

  // À-trous B-spline interpolation/blur shifted by mult

  #ifdef _OPENMP

  #pragma omp parallel for default(none) \

    dt_omp_firstprivate(width, height, mult)  \

    dt_omp_sharedconst(out, in, tempbuf, clip_negatives) \

    schedule(static)

  #endif

  for(size_t row = 0; row < height; row++)

  {

    // get a thread-private one-row temporary buffer

    float *const temp = tempbuf + 4 * width * dt_get_thread_num();

    // interleave the order in which we process the rows so that we minimize cache misses

    const size_t i = dwt_interleave_rows(row, height, mult);

    // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur

    _bspline_vertical_pass(in, temp, i, width, height, mult, clip_negatives);

    // Convolve B-spline filter horizontally over current row

    for(size_t j = 0; j < width; j++)

    {

      _bspline_horizontal(temp, out + (i * width + j) * 4, j, width, mult, clip_negatives);

    }

  }

}


#ifdef _OPENMP

#pragma omp declare simd aligned(in, HF, LF:64) aligned(tempbuf:16)

#endif


inline static void decompose_2D_Bspline(const float *const restrict in,

                                        float *const restrict HF,

                                        float *const restrict LF,

                                        const size_t width, const size_t height, const int mult,

                                        float *const tempbuf, size_t padded_size)

{

  // Blur and compute the wavelet at once

#ifdef _OPENMP

#pragma omp parallel for default(none) \

    dt_omp_firstprivate(width, height, mult, padded_size) \

    dt_omp_sharedconst(in, HF, LF, tempbuf)  \

    schedule(static)

#endif

  for(size_t row = 0; row < height; row++)

  {

    // get a thread-private one-row temporary buffer

    float *restrict DT_ALIGNED_ARRAY const temp = dt_get_perthread(tempbuf, padded_size);

    // interleave the order in which we process the rows so that we minimize cache misses

    const size_t i = dwt_interleave_rows(row, height, mult);

    // Convolve B-spline filter over columns: for each pixel in the current row, compute vertical blur

    _bspline_vertical_pass(in, temp, i, width, height, mult, TRUE); // always clip negatives

    // Convolve B-spline filter horizontally over current row

    for(size_t j = 0; j < width; j++)

    {

      const size_t index = 4U * (i * width + j);

      _bspline_horizontal(temp, LF + index, j, width, mult, TRUE); // always clip negatives

      // compute the HF component by subtracting the LF from the original input

      for_four_channels(c)

        HF[index + c] = in[index + c] - LF[index + c];

    }

  }

}


// clang-format off

// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py

// vim: shiftwidth=2 expandtab tabstop=2 cindent

// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;

// clang-format on

TRUE
#define TRUE
Definition ashift_lsd.c:151

width
int width
Definition bilateral.h:1

height
int height
Definition bilateral.h:1

num_steps_to_reach_equivalent_sigma
static unsigned int num_steps_to_reach_equivalent_sigma(const float sigma_filter, const float sigma_final)
Definition bspline.h:40

_bspline_horizontal
static void _bspline_horizontal(const float *const restrict temp, float *const restrict out, size_t col, size_t width, int mult, const gboolean clip_negatives)
Definition bspline.h:117

sparse_scalar_product
static void sparse_scalar_product(const dt_aligned_pixel_t buf, const size_t indices[5], dt_aligned_pixel_t result, const gboolean clip_negatives)
Definition bspline.h:58

equivalent_sigma_at_step
static float equivalent_sigma_at_step(const float sigma, const unsigned int s)
Definition bspline.h:27

blur_2D_Bspline
static void blur_2D_Bspline(const float *const restrict in, float *const restrict out, float *const restrict tempbuf, const size_t width, const size_t height, const int mult, const gboolean clip_negatives)
Definition bspline.h:136

BSPLINE_FSIZE
#define BSPLINE_FSIZE
Definition bspline.h:9

normalize_laplacian
static float normalize_laplacian(const float sigma)
Definition bspline.h:15

decompose_2D_Bspline
static void decompose_2D_Bspline(const float *const restrict in, float *const restrict HF, float *const restrict LF, const size_t width, const size_t height, const int mult, float *const tempbuf, size_t padded_size)
Definition bspline.h:166

_bspline_vertical_pass
static void _bspline_vertical_pass(const float *const restrict in, float *const restrict temp, size_t row, size_t width, size_t height, int mult, const gboolean clip_negatives)
Definition bspline.h:96

darktable.h

DT_ALIGNED_ARRAY
#define DT_ALIGNED_ARRAY
Definition darktable.h:270

for_each_channel
#define for_each_channel(_var,...)
Definition darktable.h:411

dt_get_thread_num
static int dt_get_thread_num()
Definition darktable.h:227

dt_get_perthread
#define dt_get_perthread(buf, padsize)
Definition darktable.h:797

for_four_channels
#define for_four_channels(_var,...)
Definition darktable.h:413

dwt.h

dwt_interleave_rows
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:89

math.h

sqf
static float sqf(const float x)
Definition math.h:215

openmp_maths.h

MIN
#define MIN(a, b)
Definition thinplate.c:23

MAX
#define MAX(a, b)
Definition thinplate.c:20