eaw_8c_source.html

/*

    This file is part of darktable,

    Copyright (C) 2020-2021 Hubert Kowalski.

    Copyright (C) 2020-2021 Ralf Brown.

    Copyright (C) 2021 parafin.

    Copyright (C) 2021 Pascal Obry.

    Copyright (C) 2022 Martin Bařinka.


    darktable is free software: you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation, either version 3 of the License, or

    (at your option) any later version.


    darktable is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License

    along with darktable.  If not, see <http://www.gnu.org/licenses/>.

*/


#include "common/eaw.h"

#include "common/darktable.h"

#include "common/math.h"

#include "control/control.h"     // needed by dwt.h

#include "common/dwt.h"          // for dwt_interleave_rows

#include <math.h>


static inline void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)

{

  dt_aligned_pixel_t square;

  for_each_channel(c) square[c] = c1[c] - c2[c];

  for_each_channel(c) square[c] = square[c] * square[c];


  const float wl = dt_fast_expf(-sharpen * square[0]);

  const float wc = dt_fast_expf(-sharpen * (square[1] + square[2]));


  weight[0] = wl;

  weight[1] = wc;

  weight[2] = wc;

  weight[3] = 1.0f;

}


#define SUM_PIXEL_CONTRIBUTION(ii, jj)                                                                 \

  do                                                                                                         \

  {                                                                                                          \

    const float f = filter[(ii)] * filter[(jj)];                                                             \

    dt_aligned_pixel_t wp;                                                                                   \

    weight(px, px2, sharpen, wp);                                                                            \

    dt_aligned_pixel_t w;                                                                                    \

    dt_aligned_pixel_t pd;                                                                                   \

    for_four_channels(c,aligned(px2))                                                                        \

    {                                                                                                        \

      w[c] = f * wp[c];                                                                                      \

      wgt[c] += w[c];                                                                                        \

      pd[c] = w[c] * px2[c];                                                                                 \

      sum[c] += pd[c];                                                                                       \

    }                                                                                                        \

  } while(0)


#define SUM_PIXEL_PROLOGUE                                                                                   \

  dt_aligned_pixel_t sum = { 0.0f, 0.0f, 0.0f, 0.0f };                                                       \

  dt_aligned_pixel_t wgt = { 0.0f, 0.0f, 0.0f, 0.0f };


#define SUM_PIXEL_EPILOGUE                                                                                   \

  for_each_channel(c)                              \

  {                              \

    sum[c] /= wgt[c];                                                                \

    pcoarse[c] = sum[c];                                                                                     \

    const float det = (px[c] - sum[c]);                      \

    pdetail[c] = det;                                                                \

  }                                                                                    \

  px += 4;                                                                                                   \

  pdetail += 4;                                                                                              \

  pcoarse += 4;


void eaw_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail,

                   const int scale, const float sharpen, const int32_t width, const int32_t height)

{

  const int mult = 1 << scale;

  static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };

  const int boundary = 2 * mult;

  __OMP_PARALLEL_FOR__()

  for(int rowid = 0; rowid < height; rowid++)

  {

    const size_t j = dwt_interleave_rows(rowid, height, mult);

    const float *px = ((float *)in) + (size_t)4 * j * width;

    const float *px2;

    float *pdetail = detail + (size_t)4 * j * width;

    float *pcoarse = out + (size_t)4 * j * width;


    // for the first and last 'boundary' rows, we have to perform boundary tests for the entire row;

    //   for the central bulk, we only need to use those slower versions on the leftmost and rightmost pixels

    const int lbound = (j < boundary || j >= height - boundary) ? width-boundary : boundary;


    /* The first "2*mult" pixels need a boundary check because we might try to access past the left edge,

     * which requires nearest pixel interpolation */

    int i;

    for(i = 0; i < lbound; i++)

    {

      SUM_PIXEL_PROLOGUE;

      for(int jj = 0; jj < 5; jj++)

      {

        const int y = j + mult * (jj-2);

        const int clamp_y = CLAMP(y,0,height-1);

        for(int ii = 0; ii < 5; ii++)

        {

          int x = i + mult * ((ii)-2);

          if(x < 0) x = 0;      // we might be looking past the left edge

          px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;

          SUM_PIXEL_CONTRIBUTION(ii, jj);

        }

      }

      SUM_PIXEL_EPILOGUE;

    }


    /* For pixels [2*mult, width-2*mult], we don't need to do any boundary checks */

    for( ; i < width - boundary; i++)

    {

      SUM_PIXEL_PROLOGUE;

      px2 = ((float *)in) + (size_t)4 * (i - 2 * mult + (size_t)(j - 2 * mult) * width);

      for(int jj = 0; jj < 5; jj++)

      {

        for(int ii = 0; ii < 5; ii++)

        {

          SUM_PIXEL_CONTRIBUTION(ii, jj);

          px2 += (size_t)4 * mult;

        }

        px2 += (size_t)4 * (width - 5) * mult;

      }

      SUM_PIXEL_EPILOGUE;

    }


    /* Last 2*mult pixels in the row require the boundary check again */

    for( ; i < width; i++)

    {

      SUM_PIXEL_PROLOGUE;

      for(int jj = 0; jj < 5; jj++)

      {

        const int y = j + mult * (jj-2);

        const int clamp_y = CLAMP(y,0,height-1);

        for(int ii = 0; ii < 5; ii++)

        {

          int x = i + mult * ((ii)-2);

          if(x >= width) x = width - 1;   // we might be looking beyond the right edge

          px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;

          SUM_PIXEL_CONTRIBUTION(ii, jj);

        }

      }

      SUM_PIXEL_EPILOGUE;

    }

  }

}


void eaw_synthesize(float *const out, const float *const in, const float *const restrict detail,

                    const float *const restrict threshold, const float *const restrict boost,

                    const int32_t width, const int32_t height)

{

  __OMP_PARALLEL_FOR__()

  for(size_t k = 0; k < (size_t)width * height; k++)

  {

    __OMP_SIMD__(simdlen(4) aligned(detail, in, out, threshold, boost))

    for(size_t c = 0; c < 4; c++)

    {

      // decrease the absolute magnitude of the detail by the threshold; copysignf does not vectorize, but it

      // turns out that just adding up two clamped alternatives gives exactly the same result and DOES vectorize

      //const float absamt = fmaxf(0.0f, (fabsf(detail[k + c]) - threshold[c]));

      //const float amount = copysignf(absamt, detail[k + c]);

      const float amount = MAX(detail[4*k+c] - threshold[c], 0.0f) + MIN(detail[4*k+c] + threshold[c], 0.0f);

      out[4*k + c] = in[4*k + c] + (boost[c] * amount);

    }

  }

}


// =====================================================================================

// begin wavelet code from denoiseprofile.c

// =====================================================================================


static inline float dn_weight(const float *c1, const float *c2, const float inv_sigma2)

{

  // 3d distance based on color

  dt_aligned_pixel_t sqr;

  for_each_channel(c)

  {

    const float diff = c1[c] - c2[c];

    sqr[c] = diff * diff;

  }

  const float dot = (sqr[0] + sqr[1] + sqr[2]) * inv_sigma2;

  const float var

      = 0.02f; // FIXME: this should ideally depend on the image before noise stabilizing transforms!

  const float off2 = 9.0f; // (3 sigma)^2

  return fast_mexp2f(MAX(0, dot * var - off2));

}


typedef struct _aligned_pixel {

  union {

    dt_aligned_pixel_t v;

  };

} _aligned_pixel;


#ifdef _OPENMP

static inline _aligned_pixel add_float4(_aligned_pixel acc, _aligned_pixel newval)

{

  for_four_channels(c) acc.v[c] += newval.v[c];

  return acc;

}

#pragma omp declare reduction(vsum:_aligned_pixel:omp_out=add_float4(omp_out,omp_in)) \

  initializer(omp_priv = { .v = { 0.0f, 0.0f, 0.0f, 0.0f } })

#endif


#undef SUM_PIXEL_CONTRIBUTION

#define SUM_PIXEL_CONTRIBUTION(ii, jj)                                                                 \

  do                                                                                                         \

  {                                                                                                          \

    const float f = filter[(ii)] * filter[(jj)];                                                             \

    const float wp = dn_weight(px, px2, inv_sigma2);                                                         \

    const float w = f * wp;                                                                                  \

    dt_aligned_pixel_t pd;                                                                                   \

    for_each_channel(c,aligned(px2))                                                                         \

    {                                                                                                        \

      pd[c] = w * px2[c];                                                                                    \

      wgt[c] += w;                                                                                           \

      sum[c] += pd[c];                                                                                       \

    }                                                                                                        \

  } while(0)


#undef SUM_PIXEL_EPILOGUE

#define SUM_PIXEL_EPILOGUE                                                                                   \

  for_each_channel(c)                              \

  {                              \

    sum[c] /= wgt[c];                                                                \

    pcoarse[c] = sum[c];                                                                                     \

    const float det = (px[c] - sum[c]);                      \

    pdetail[c] = det;                                                                \

    sum_sq.v[c] += (det*det);                                                      \

  }                                                                                    \

  px += 4;                                                                                                   \

  pdetail += 4;                                                                                              \

  pcoarse += 4;


void eaw_dn_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail,

                      dt_aligned_pixel_t sum_squared, const int scale, const float inv_sigma2,

                      const int32_t width, const int32_t height)

{

  const int mult = 1u << scale;

  static const float filter[5] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };

  const int boundary = 2 * mult;


  _aligned_pixel sum_sq = { .v = { 0.0f } };


#if !(defined(__apple_build_version__) && __apple_build_version__ < 11030000) //makes Xcode 11.3.1 compiler crash

__OMP_PARALLEL_FOR__(reduction(vsum: sum_sq) )

#endif

  for(int rowid = 0; rowid < height; rowid++)

  {

    const size_t j = dwt_interleave_rows(rowid, height, mult);

    const float *px = ((float *)in) + (size_t)4 * j * width;

    const float *px2;

    float *pdetail = detail + (size_t)4 * j * width;

    float *pcoarse = out + (size_t)4 * j * width;


    // for the first and last 'boundary' rows, we have to perform boundary tests for the entire row;

    //   for the central bulk, we only need to use those slower versions on the leftmost and rightmost pixels

    const int lbound = (j < boundary || j >= height - boundary) ? width-boundary : boundary;


    /* The first "2*mult" pixels need a boundary check because we might try to access past the left edge,

     * which requires nearest pixel interpolation */

    int i;

    for(i = 0; i < lbound; i++)

    {

      SUM_PIXEL_PROLOGUE;

      for(int jj = 0; jj < 5; jj++)

      {

        const int y = j + mult * (jj-2);

        const int clamp_y = CLAMP(y,0,height-1);

        for(int ii = 0; ii < 5; ii++)

        {

          int x = i + mult * ((ii)-2);

          if(x < 0) x = 0;      // we might be looking past the left edge

          px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;

          SUM_PIXEL_CONTRIBUTION(ii, jj);

        }

      }

      SUM_PIXEL_EPILOGUE;

    }


    /* For pixels [2*mult, width-2*mult], we don't need to do any boundary checks */

    for( ; i < width - boundary; i++)

    {

      SUM_PIXEL_PROLOGUE;

      px2 = ((float *)in) + (size_t)4 * (i - 2 * mult + (size_t)(j - 2 * mult) * width);

      for(int jj = 0; jj < 5; jj++)

      {

        for(int ii = 0; ii < 5; ii++)

        {

          SUM_PIXEL_CONTRIBUTION(ii, jj);

          px2 += (size_t)4 * mult;

        }

        px2 += (size_t)4 * (width - 5) * mult;

      }

      SUM_PIXEL_EPILOGUE;

    }


    /* Last 2*mult pixels in the row require the boundary check again */

    for( ; i < width; i++)

    {

      SUM_PIXEL_PROLOGUE;

      for(int jj = 0; jj < 5; jj++)

      {

        const int y = j + mult * (jj-2);

        const int clamp_y = CLAMP(y,0,height-1);

        for(int ii = 0; ii < 5; ii++)

        {

          int x = i + mult * ((ii)-2);

          if(x >= width) x = width - 1;   // we might be looking past the right edge

          px2 = ((float *)in) + 4 * x + (size_t)4 * clamp_y * width;

          SUM_PIXEL_CONTRIBUTION(ii, jj);

        }

      }

      SUM_PIXEL_EPILOGUE;

    }

  }

  for_each_channel(c)

    sum_squared[c] = sum_sq.v[c];

}


#undef SUM_PIXEL_CONTRIBUTION

#undef SUM_PIXEL_PROLOGUE

#undef SUM_PIXEL_EPILOGUE


// clang-format off

// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py

// vim: shiftwidth=2 expandtab tabstop=2 cindent

// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;

// clang-format on


width
int width
Definition bilateral.h:1

height
int height
Definition bilateral.h:1

i
const float i
Definition colorspaces_inline_conversions.h:440

threshold
const float threshold
Definition colorspaces_inline_conversions.h:176

out
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42

control.h

darktable.h

__OMP_SIMD__
#define __OMP_SIMD__(...)
Definition darktable.h:262

for_each_channel
#define for_each_channel(_var,...)
Definition darktable.h:662

for_four_channels
#define for_four_channels(_var,...)
Definition darktable.h:664

__OMP_PARALLEL_FOR__
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258

dwt.h

dwt_interleave_rows
static int dwt_interleave_rows(const int rowid, const int height, const int stride)
Definition dwt.h:93

SUM_PIXEL_CONTRIBUTION
#define SUM_PIXEL_CONTRIBUTION(ii, jj)
Definition eaw.c:46

SUM_PIXEL_PROLOGUE
#define SUM_PIXEL_PROLOGUE
Definition eaw.c:63

eaw_decompose
void eaw_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, const int scale, const float sharpen, const int32_t width, const int32_t height)
Definition eaw.c:80

SUM_PIXEL_EPILOGUE
#define SUM_PIXEL_EPILOGUE
Definition eaw.c:67

weight
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:30

_aligned_pixel
struct _aligned_pixel _aligned_pixel

dn_weight
static float dn_weight(const float *c1, const float *c2, const float inv_sigma2)
Definition eaw.c:182

eaw_dn_decompose
void eaw_dn_decompose(float *const restrict out, const float *const restrict in, float *const restrict detail, dt_aligned_pixel_t sum_squared, const int scale, const float inv_sigma2, const int32_t width, const int32_t height)
Definition eaw.c:243

eaw_synthesize
void eaw_synthesize(float *const out, const float *const in, const float *const restrict detail, const float *const restrict threshold, const float *const restrict boost, const int32_t width, const int32_t height)
Definition eaw.c:158

eaw.h

x
static const float x
Definition iop_profile.h:235

k
float *const restrict const size_t k
Definition luminance_mask.h:78

math.h

fast_mexp2f
static float fast_mexp2f(const float x)
Definition math.h:304

derive_filmic_v6_gamut_mapping.c
c
Definition derive_filmic_v6_gamut_mapping.py:35

dt_aligned_pixel_t
float dt_aligned_pixel_t[4]
Definition noiseprofile.c:28

_aligned_pixel
Definition eaw.c:198

_aligned_pixel::v
dt_aligned_pixel_t v
Definition eaw.c:200

c2
#define c2
Definition colorspaces_inline_conversions.h:796

c1
#define c1
Definition colorspaces_inline_conversions.h:795

MIN
#define MIN(a, b)
Definition thinplate.c:32

MAX
#define MAX(a, b)
Definition thinplate.c:29