Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
locallaplacian.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2016-2017 johannes hanika.
4 Copyright (C) 2016 Maximilian Trescher.
5 Copyright (C) 2017, 2019 luzpaz.
6 Copyright (C) 2017 Peter Budai.
7 Copyright (C) 2017 Ulrich Pegelow.
8 Copyright (C) 2019 Andreas Schneider.
9 Copyright (C) 2019-2020, 2025-2026 Aurélien PIERRE.
10 Copyright (C) 2019 Roman Lebedev.
11 Copyright (C) 2020 Hubert Kowalski.
12 Copyright (C) 2020-2021 Pascal Obry.
13 Copyright (C) 2020-2021 Ralf Brown.
14 Copyright (C) 2021 Chris Elston.
15 Copyright (C) 2021 Hanno Schwalm.
16 Copyright (C) 2022 Martin Bařinka.
17
18 darktable is free software: you can redistribute it and/or modify
19 it under the terms of the GNU General Public License as published by
20 the Free Software Foundation, either version 3 of the License, or
21 (at your option) any later version.
22
23 darktable is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 GNU General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with darktable. If not, see <http://www.gnu.org/licenses/>.
30*/
31
32#include "common/darktable.h"
34#include "common/math.h"
35
36#include <string.h>
37#include <stdint.h>
38#include <stdlib.h>
39#include <assert.h>
40#include <stdio.h>
41
42// the maximum number of levels for the gaussian pyramid
43#define max_levels 30
44// the number of segments for the piecewise linear interpolation
45#define num_gamma 6
46
47//#define DEBUG_DUMP
48
49// downsample width/height to given level
50static inline int dl(int size, const int level)
51{
52 for(int l=0;l<level;l++)
53 size = (size-1)/2+1;
54 return size;
55}
56
57#ifdef DEBUG_DUMP
58static void dump_PFM(const char *filename, const float* out, const uint32_t w, const uint32_t h)
59{
60 FILE *f = g_fopen(filename, "wb");
61 fprintf(f, "PF\n%d %d\n-1.0\n", w, h);
62 for(int j=0;j<h;j++)
63 for(int i=0;i<w;i++)
64 for(int c=0;c<3;c++)
65 fwrite(out + w*j+i, 1, sizeof(float), f);
66 fclose(f);
67}
68#define debug_dump_PFM dump_PFM
69#else
70#define debug_dump_PFM(f,b,w,h)
71#endif
72
73// needs a boundary of 1 or 2px around i,j or else it will crash.
74// (translates to a 1px boundary around the corresponding pixel in the coarse buffer)
75// more precisely, 1<=i<wd-1 for even wd and
76// 1<=i<wd-2 for odd wd (j likewise with ht)
77static inline float ll_expand_gaussian(
78 const float *const coarse,
79 const int i,
80 const int j,
81 const int wd,
82 const int ht)
83{
84 assert(i > 0);
85 assert(i < wd-1);
86 assert(j > 0);
87 assert(j < ht-1);
88 assert(j/2 + 1 < (ht-1)/2+1);
89 assert(i/2 + 1 < (wd-1)/2+1);
90 const int cw = (wd-1)/2+1;
91 const int ind = (j/2)*cw+i/2;
92 // case 0: case 1: case 2: case 3:
93 // x . x . x x . x . x x . x . x x . x . x
94 // . . . . . . . . . . . .[.]. . .[.]. . .
95 // x .[x]. x x[.]x . x x . x . x x . x . x
96 // . . . . . . . . . . . . . . . . . . . .
97 // x . x . x x . x . x x . x . x x . x . x
98 switch((i&1) + 2*(j&1))
99 {
100 case 0: // both are even, 3x3 stencil
101 return 4./256. * (
102 6.0f*(coarse[ind-cw] + coarse[ind-1] + 6.0f*coarse[ind] + coarse[ind+1] + coarse[ind+cw])
103 + coarse[ind-cw-1] + coarse[ind-cw+1] + coarse[ind+cw-1] + coarse[ind+cw+1]);
104 case 1: // i is odd, 2x3 stencil
105 return 4./256. * (
106 24.0*(coarse[ind] + coarse[ind+1]) +
107 4.0*(coarse[ind-cw] + coarse[ind-cw+1] + coarse[ind+cw] + coarse[ind+cw+1]));
108 case 2: // j is odd, 3x2 stencil
109 return 4./256. * (
110 24.0*(coarse[ind] + coarse[ind+cw]) +
111 4.0*(coarse[ind-1] + coarse[ind+1] + coarse[ind+cw-1] + coarse[ind+cw+1]));
112 default: // case 3: // both are odd, 2x2 stencil
113 return .25f * (coarse[ind] + coarse[ind+1] + coarse[ind+cw] + coarse[ind+cw+1]);
114 }
115}
116
117// helper to fill in one pixel boundary by copying it
118static inline void ll_fill_boundary1(
119 float *const input,
120 const int wd,
121 const int ht)
122{
123 for(int j=1;j<ht-1;j++) input[j*wd] = input[j*wd+1];
124 for(int j=1;j<ht-1;j++) input[j*wd+wd-1] = input[j*wd+wd-2];
125 memcpy(input, input+wd, sizeof(float)*wd);
126 memcpy(input+wd*(ht-1), input+wd*(ht-2), sizeof(float)*wd);
127}
128
129// helper to fill in two pixels boundary by copying it
130static inline void ll_fill_boundary2(
131 float *const input,
132 const int wd,
133 const int ht)
134{
135 for(int j=1;j<ht-1;j++) input[j*wd] = input[j*wd+1];
136 if(wd & 1) for(int j=1;j<ht-1;j++) input[j*wd+wd-1] = input[j*wd+wd-2];
137 else for(int j=1;j<ht-1;j++) input[j*wd+wd-1] = input[j*wd+wd-2] = input[j*wd+wd-3];
138 memcpy(input, input+wd, sizeof(float)*wd);
139 if(!(ht & 1)) memcpy(input+wd*(ht-2), input+wd*(ht-3), sizeof(float)*wd);
140 memcpy(input+wd*(ht-1), input+wd*(ht-2), sizeof(float)*wd);
141}
142
144 float *buf, // the buffer to be padded
145 const uint32_t w, // width of a line
146 const uint32_t h, // total height, including top and bottom padding
147 const uint32_t padding) // number of lines of padding on each side
148{
150 for(int j=0;j<padding;j++)
151 {
152 memcpy(buf + w*j, buf+padding*w, sizeof(float)*w);
153 memcpy(buf + w*(h-padding+j), buf+w*(h-padding-1), sizeof(float)*w);
154 }
155}
156
157static inline void gauss_expand(
158 const float *const input, // coarse input
159 float *const fine, // upsampled, blurry output
160 const int wd, // fine res
161 const int ht)
162{
163 __OMP_PARALLEL_FOR__(collapse(2))
164 for(int j=1;j<((ht-1)&~1);j++) // even ht: two px boundary. odd ht: one px.
165 for(int i=1;i<((wd-1)&~1);i++)
166 fine[j*wd+i] = ll_expand_gaussian(input, i, j, wd, ht);
167 ll_fill_boundary2(fine, wd, ht);
168}
169
170static inline void gauss_reduce(
171 const float *const input, // fine input buffer
172 float *const coarse, // coarse scale, blurred input buf
173 const int wd, // fine res
174 const int ht)
175{
176 // blur, store only coarse res
177 const int cw = (wd-1)/2+1, ch = (ht-1)/2+1;
178
179 // this is the scalar (non-simd) code:
180 const float w[5] = { 1.f/16.f, 4.f/16.f, 6.f/16.f, 4.f/16.f, 1.f/16.f };
181 memset(coarse, 0, sizeof(float)*cw*ch);
182 // direct 5x5 stencil only on required pixels:
183#ifdef _OPENMP
184 // DON'T parallelize the very smallest levels of the pyramid, as the threading overhead
185 // is greater than the time needed to do it sequentially
186#pragma omp parallel for default(firstprivate) if (ch*cw>500) \
187 collapse(2)
188#endif
189 for(int j=1;j<ch-1;j++)
190 for(int i=1;i<cw-1;i++)
191 {
192 for(int jj=-2;jj<=2;jj++)
193 for(int ii=-2;ii<=2;ii++)
194 coarse[j*cw+i] += input[(2*j+jj)*wd+2*i+ii] * w[ii+2] * w[jj+2];
195 }
196 ll_fill_boundary1(coarse, cw, ch);
197}
198
199// allocate output buffer with monochrome brightness channel from input, padded
200// up by max_supp on all four sides, dimensions written to wd2 ht2
201static inline float *ll_pad_input(
202 const float *const input,
203 const int wd,
204 const int ht,
205 const int max_supp,
206 int *wd2,
207 int *ht2,
209{
210 const int stride = 4;
211 *wd2 = 2*max_supp + wd;
212 *ht2 = 2*max_supp + ht;
213 float *const out = dt_pixelpipe_cache_alloc_align_float_cache((size_t) *wd2 * *ht2, 0);
214 if(IS_NULL_PTR(out)) return NULL;
215
216 if(b && b->mode == 2)
217 { // pad by preview buffer
218 __OMP_PARALLEL_FOR__(collapse(2)) // fill regular pixels:
219 for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
220 out[(j+max_supp)**wd2+i+max_supp] = input[stride*(wd*j+i)] * 0.01f; // L -> [0,1]
221
222 // for all out of roi pixels on the boundary we wish to pad:
223 // compute coordinate in full image.
224 // if not out of buf:
225 // compute padded preview pixel coordinate (clamp to padded preview buffer size)
226 // else
227 // pad as usual (hi-res sample and hold)
228#define LL_FILL(fallback) do {\
229 float isx = ((i - max_supp) + b->roi->x)/b->roi->scale;\
230 float isy = ((j - max_supp) + b->roi->y)/b->roi->scale;\
231 if(isx < 0 || isy >= b->buf->width\
232 || isy < 0 || isy >= b->buf->height)\
233 out[*wd2*j+i] = (fallback);\
234 else\
235 {\
236 int px = CLAMP(isx / (float)b->buf->width * b->wd + (b->pwd-b->wd)/2, 0, b->pwd-1);\
237 int py = CLAMP(isy / (float)b->buf->height * b->ht + (b->pht-b->ht)/2, 0, b->pht-1);\
238 /* TODO: linear interpolation?*/\
239 out[*wd2*j+i] = b->pad0[b->pwd*py+px];\
240 } } while(0)
241 __OMP_PARALLEL_FOR__(collapse(2)) // left border
242 for(int j=max_supp;j<*ht2-max_supp;j++) for(int i=0;i<max_supp;i++)
243 LL_FILL(input[stride*wd*(j-max_supp)]* 0.01f);
244 __OMP_PARALLEL_FOR__(collapse(2)) // right border
245 for(int j=max_supp;j<*ht2-max_supp;j++) for(int i=wd+max_supp;i<*wd2;i++)
246 LL_FILL(input[stride*((j-max_supp)*wd+wd-1)] * 0.01f);
247 __OMP_PARALLEL_FOR__(collapse(2)) // top border
248 for(int j=0;j<max_supp;j++) for(int i=0;i<*wd2;i++)
249 LL_FILL(out[*wd2*max_supp+i]);
250 __OMP_PARALLEL_FOR__(collapse(2)) // bottom border
251 for(int j=max_supp+ht;j<*ht2;j++) for(int i=0;i<*wd2;i++)
252 LL_FILL(out[*wd2*(max_supp+ht-1)+i]);
253#undef LL_FILL
254 }
255 else
256 { // pad by replication:
258 for(int j=0;j<ht;j++)
259 {
260 for(int i=0;i<max_supp;i++)
261 out[(j+max_supp)**wd2+i] = input[stride*wd*j]* 0.01f; // L -> [0,1]
262 for(int i=0;i<wd;i++)
263 out[(j+max_supp)**wd2+i+max_supp] = input[stride*(wd*j+i)] * 0.01f; // L -> [0,1]
264 for(int i=wd+max_supp;i<*wd2;i++)
265 out[(j+max_supp)**wd2+i] = input[stride*(j*wd+wd-1)] * 0.01f; // L -> [0,1]
266 }
267 pad_by_replication(out, *wd2, *ht2, max_supp);
268 }
269#ifdef DEBUG_DUMP
270 if(b && b->mode == 2)
271 {
272 dump_PFM("/tmp/padded.pfm",out,*wd2,*ht2);
273 }
274#endif
275 return out;
276}
277
278
279static inline float ll_laplacian(
280 const float *const coarse, // coarse res gaussian
281 const float *const fine, // fine res gaussian
282 const int i, // fine index
283 const int j,
284 const int wd, // fine width
285 const int ht) // fine height
286{
287 const float c = ll_expand_gaussian(coarse,
288 CLAMPS(i, 1, ((wd-1)&~1)-1), CLAMPS(j, 1, ((ht-1)&~1)-1), wd, ht);
289 return fine[j*wd+i] - c;
290}
291
292static inline float curve_scalar(
293 const float x,
294 const float g,
295 const float sigma,
296 const float shadows,
297 const float highlights,
298 const float clarity)
299{
300 const float c = x-g;
301 float val;
302 // blend in via quadratic bezier
303 if (c > 2*sigma) val = g + sigma + shadows * (c-sigma);
304 else if(c < -2*sigma) val = g - sigma + highlights * (c+sigma);
305 else if(c > 0.0f)
306 { // shadow contrast
307 const float t = CLAMPS(c / (2.0f*sigma), 0.0f, 1.0f);
308 const float t2 = t * t;
309 const float mt = 1.0f-t;
310 val = g + sigma * 2.0f*mt*t + t2*(sigma + sigma*shadows);
311 }
312 else
313 { // highlight contrast
314 const float t = CLAMPS(-c / (2.0f*sigma), 0.0f, 1.0f);
315 const float t2 = t * t;
316 const float mt = 1.0f-t;
317 val = g - sigma * 2.0f*mt*t + t2*(- sigma - sigma*highlights);
318 }
319 // midtone local contrast
320 val += clarity * c * expf(-c*c/(2.0*sigma*sigma/3.0f));
321 return val;
322}
323
324// scalar version
326 float *const out,
327 const float *const in,
328 const uint32_t w,
329 const uint32_t h,
330 const uint32_t padding,
331 const float g,
332 const float sigma,
333 const float shadows,
334 const float highlights,
335 const float clarity)
336{
338 for(uint32_t j=padding;j<h-padding;j++)
339 {
340 const float *in2 = in + j*w + padding;
341 float *out2 = out + j*w + padding;
342 for(uint32_t i=padding;i<w-padding;i++)
343 (*out2++) = curve_scalar(*(in2++), g, sigma, shadows, highlights, clarity);
344 out2 = out + j*w;
345 for(int i=0;i<padding;i++) out2[i] = out2[padding];
346 for(int i=w-padding;i<w;i++) out2[i] = out2[w-padding-1];
347 }
348 pad_by_replication(out, w, h, padding);
349}
350
352 const float *const input, // input buffer in some Labx or yuvx format
353 float *const out, // output buffer with colour
354 const int wd, // width and
355 const int ht, // height of the input buffer
356 const float sigma, // user param: separate shadows/mid-tones/highlights
357 const float shadows, // user param: lift shadows
358 const float highlights, // user param: compress highlights
359 const float clarity, // user param: increase clarity/local contrast
360 const int use_sse2, // flag whether to use SSE version
362{
363 if(wd <= 1 || ht <= 1) return 0;
364
365 // don't divide by 2 more often than we can:
366 const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(wd,ht)));
367 int last_level = num_levels-1;
368 if(b && b->mode == 2) // higher number here makes it less prone to aliasing and slower.
369 last_level = num_levels > 4 ? 4 : num_levels-1;
370 const int max_supp = 1<<last_level;
371 int w, h;
372 int err = 0;
373 float *padded[max_levels] = {0};
374 if(b && b->mode == 2)
375 padded[0] = ll_pad_input(input, wd, ht, max_supp, &w, &h, b);
376 else
377 padded[0] = ll_pad_input(input, wd, ht, max_supp, &w, &h, 0);
378 if(padded[0] == NULL)
379 {
380 err = 1;
381 goto error;
382 }
383
384 // allocate pyramid pointers for padded input
385 for(int l=1;l<=last_level;l++)
386 {
387 padded[l] = dt_pixelpipe_cache_alloc_align_float_cache((size_t)dl(w,l) * dl(h,l), 0);
388 if(padded[l] == NULL)
389 {
390 err = 1;
391 goto error;
392 }
393 }
394
395 // allocate pyramid pointers for output
396 float *output[max_levels] = {0};
397 for(int l=0;l<=last_level;l++)
398 {
399 output[l] = dt_pixelpipe_cache_alloc_align_float_cache((size_t)dl(w,l) * dl(h,l), 0);
400 if(output[l] == NULL)
401 {
402 err = 1;
403 goto error;
404 }
405 }
406
407 // create gauss pyramid of padded input, write coarse directly to output
408 for(int l=1;l<last_level;l++)
409 gauss_reduce(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1));
410 gauss_reduce(padded[last_level-1], output[last_level], dl(w,last_level-1), dl(h,last_level-1));
411
412 // evenly sample brightness [0,1]:
413 float gamma[num_gamma] = {0.0f};
414 for(int k=0;k<num_gamma;k++) gamma[k] = (k+.5f)/(float)num_gamma;
415 // for(int k=0;k<num_gamma;k++) gamma[k] = k/(num_gamma-1.0f);
416
417 // allocate memory for intermediate laplacian pyramids
418 float *buf[num_gamma][max_levels] = {{0}};
419 for(int k=0;k<num_gamma;k++) for(int l=0;l<=last_level;l++)
420 {
421 buf[k][l] = dt_pixelpipe_cache_alloc_align_float_cache((size_t)dl(w,l)*dl(h,l), 0);
422 if(buf[k][l] == NULL)
423 {
424 err = 1;
425 goto error;
426 }
427 }
428
429 // the paper says remapping only level 3 not 0 does the trick, too
430 // (but i really like the additional octave of sharpness we get,
431 // willing to pay the cost).
432 for(int k=0;k<num_gamma;k++)
433 { // process images
434 apply_curve(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);
435
436 // create gaussian pyramids
437 for(int l=1;l<=last_level;l++)
438 gauss_reduce(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1));
439 }
440
441 // resample output[last_level] from preview
442 // requires to transform from padded/downsampled to full image and then
443 // to padded/downsampled in preview
444 if(b && b->mode == 2)
445 {
446 const float isize = powf(2.0f, last_level) / b->roi->scale; // pixel size of coarsest level in image space
447 const float psize = isize / b->buf->width * b->wd; // pixel footprint rescaled to preview buffer
448 const float pl = log2f(psize); // mip level in preview buffer
449 const int pl0 = CLAMP((int)pl, 0, b->num_levels-1), pl1 = CLAMP((int)(pl+1), 0, b->num_levels-1);
450 const float weight = CLAMP(pl-pl0, 0, 1); // weight between mip levels
451 const float mul0 = 1.0/powf(2.0f, pl0);
452 const float mul1 = 1.0/powf(2.0f, pl1);
453 const float mul = powf(2.0f, last_level);
454 const int pw = dl(w,last_level), ph = dl(h,last_level);
455 const int pw0 = dl(b->pwd, pl0), ph0 = dl(b->pht, pl0);
456 const int pw1 = dl(b->pwd, pl1), ph1 = dl(b->pht, pl1);
457 debug_dump_PFM("/tmp/coarse.pfm", b->output[pl0], pw0, ph0);
458 debug_dump_PFM("/tmp/oldcoarse.pfm", output[last_level], pw, ph);
459#ifdef _OPENMP
460#pragma omp parallel for collapse(2) default(shared)
461#endif
462 for(int j=0;j<ph;j++) for(int i=0;i<pw;i++)
463 {
464 // image coordinates in full buffer
465 float ix = ((i*mul - max_supp) + b->roi->x)/b->roi->scale;
466 float iy = ((j*mul - max_supp) + b->roi->y)/b->roi->scale;
467 // coordinates in padded preview buffer (
468 float px = CLAMP(ix / (float)b->buf->width * b->wd + (b->pwd-b->wd)/2.0f, 0, b->pwd);
469 float py = CLAMP(iy / (float)b->buf->height * b->ht + (b->pht-b->ht)/2.0f, 0, b->pht);
470 // trilinear lookup:
471 int px0 = CLAMP(px*mul0, 0, pw0-1);
472 int py0 = CLAMP(py*mul0, 0, ph0-1);
473 int px1 = CLAMP(px*mul1, 0, pw1-1);
474 int py1 = CLAMP(py*mul1, 0, ph1-1);
475#if 1
476 float f0x = CLAMP(px*mul0 - px0, 0.0f, 1.0f);
477 float f0y = CLAMP(py*mul0 - py0, 0.0f, 1.0f);
478 float f1x = CLAMP(px*mul1 - px1, 0.0f, 1.0f);
479 float f1y = CLAMP(py*mul1 - py1, 0.0f, 1.0f);
480 float c0 =
481 (1.0f-f0x)*(1.0f-f0y)*b->output[pl0][CLAMP(py0 , 0, ph0-1)*pw0 + CLAMP(px0 , 0, pw0-1)]+
482 ( f0x)*(1.0f-f0y)*b->output[pl0][CLAMP(py0 , 0, ph0-1)*pw0 + CLAMP(px0+1, 0, pw0-1)]+
483 (1.0f-f0x)*( f0y)*b->output[pl0][CLAMP(py0+1, 0, ph0-1)*pw0 + CLAMP(px0 , 0, pw0-1)]+
484 ( f0x)*( f0y)*b->output[pl0][CLAMP(py0+1, 0, ph0-1)*pw0 + CLAMP(px0+1, 0, pw0-1)];
485 float c1 =
486 (1.0f-f1x)*(1.0f-f1y)*b->output[pl1][CLAMP(py1 , 0, ph1-1)*pw1 + CLAMP(px1 , 0, pw1-1)]+
487 ( f1x)*(1.0f-f1y)*b->output[pl1][CLAMP(py1 , 0, ph1-1)*pw1 + CLAMP(px1+1, 0, pw1-1)]+
488 (1.0f-f1x)*( f1y)*b->output[pl1][CLAMP(py1+1, 0, ph1-1)*pw1 + CLAMP(px1 , 0, pw1-1)]+
489 ( f1x)*( f1y)*b->output[pl1][CLAMP(py1+1, 0, ph1-1)*pw1 + CLAMP(px1+1, 0, pw1-1)];
490#else
491 float c0 = b->output[pl0][py0*pw0 + px0];
492 float c1 = b->output[pl1][py1*pw1 + px1];
493#endif
494 output[last_level][j*pw+i] = weight * c1 + (1.0f-weight) * c0;
495 }
496 debug_dump_PFM("/tmp/newcoarse.pfm", output[last_level], pw, ph);
497 }
498
499 // assemble output pyramid coarse to fine
500 for(int l=last_level-1;l >= 0; l--)
501 {
502 const int pw = dl(w,l), ph = dl(h,l);
503
504 gauss_expand(output[l+1], output[l], pw, ph);
505 // go through all coefficients in the upsampled gauss buffer:
506 __OMP_PARALLEL_FOR__(collapse(2))
507 for(int j=0;j<ph;j++) for(int i=0;i<pw;i++)
508 {
509 const float v = padded[l][j*pw+i];
510 int hi = 1;
511 for(;hi<num_gamma-1 && gamma[hi] <= v;hi++);
512 int lo = hi-1;
513 const float a = CLAMPS((v - gamma[lo])/(gamma[hi]-gamma[lo]), 0.0f, 1.0f);
514 const float l0 = ll_laplacian(buf[lo][l+1], buf[lo][l], i, j, pw, ph);
515 const float l1 = ll_laplacian(buf[hi][l+1], buf[hi][l], i, j, pw, ph);
516 output[l][j*pw+i] += l0 * (1.0f-a) + l1 * a;
517 // we could do this to save on memory (no need for finest buf[][]).
518 // unfortunately it results in a quite noticeable loss of sharpness, i think
519 // the extra level is worth it.
520 // else if(l == 0) // use finest scale from input to not amplify noise (and use less memory)
521 // output[l][j*pw+i] += ll_laplacian(padded[l+1], padded[l], i, j, pw, ph);
522 }
523 }
524 __OMP_PARALLEL_FOR__(collapse(2))
525 for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
526 {
527 out[4*(j*wd+i)+0] = 100.0f * output[0][(j+max_supp)*w+max_supp+i]; // [0,1] -> L
528 out[4*(j*wd+i)+1] = input[4*(j*wd+i)+1]; // copy original colour channels
529 out[4*(j*wd+i)+2] = input[4*(j*wd+i)+2];
530 }
531 if(b && b->mode == 1)
532 { // output the buffers for later re-use
533 b->pad0 = padded[0];
534 b->wd = wd;
535 b->ht = ht;
536 b->pwd = w;
537 b->pht = h;
538 b->num_levels = num_levels;
539 for(int l=0;l<num_levels;l++) b->output[l] = output[l];
540 }
541
542error:;
543 // free all buffers except the ones passed out for preview rendering
544 const int keep_preview = (b && b->mode == 1 && err == 0);
545 for(int l=0;l<max_levels;l++)
546 {
547 if(!keep_preview || l)
549 if(!keep_preview)
551 for(int k=0; k<num_gamma;k++)
553 }
554 return err;
555}
556
557
558size_t local_laplacian_memory_use(const int width, // width of input image
559 const int height) // height of input image
560{
561 const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(width,height)));
562 const int max_supp = 1<<(num_levels-1);
563 const int paddwd = width + 2*max_supp;
564 const int paddht = height + 2*max_supp;
565
566 size_t memory_use = 0;
567
568 for(int l=0;l<num_levels;l++)
569 memory_use += sizeof(float) * (2 + num_gamma) * dl(paddwd, l) * dl(paddht, l);
570
571 return memory_use;
572}
573
574size_t local_laplacian_singlebuffer_size(const int width, // width of input image
575 const int height) // height of input image
576{
577 const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(width,height)));
578 const int max_supp = 1<<(num_levels-1);
579 const int paddwd = width + 2*max_supp;
580 const int paddht = height + 2*max_supp;
581
582 return sizeof(float) * dl(paddwd, 0) * dl(paddht, 0);
583}
584// clang-format off
585// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
586// vim: shiftwidth=2 expandtab tabstop=2 cindent
587// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
588// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const dt_aligned_pixel_t f
const dt_colormatrix_t dt_aligned_pixel_t out
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:30
static const float x
const int t
const float l1
const float v
size_t local_laplacian_memory_use(const int width, const int height)
static float * ll_pad_input(const float *const input, const int wd, const int ht, const int max_supp, int *wd2, int *ht2, local_laplacian_boundary_t *b)
static void pad_by_replication(float *buf, const uint32_t w, const uint32_t h, const uint32_t padding)
#define debug_dump_PFM(f, b, w, h)
#define LL_FILL(fallback)
static float ll_expand_gaussian(const float *const coarse, const int i, const int j, const int wd, const int ht)
#define num_gamma
int local_laplacian_internal(const float *const input, float *const out, const int wd, const int ht, const float sigma, const float shadows, const float highlights, const float clarity, const int use_sse2, local_laplacian_boundary_t *b)
static void gauss_reduce(const float *const input, float *const coarse, const int wd, const int ht)
#define max_levels
static int dl(int size, const int level)
static void ll_fill_boundary1(float *const input, const int wd, const int ht)
static void gauss_expand(const float *const input, float *const fine, const int wd, const int ht)
size_t local_laplacian_singlebuffer_size(const int width, const int height)
static float ll_laplacian(const float *const coarse, const float *const fine, const int i, const int j, const int wd, const int ht)
static float curve_scalar(const float x, const float g, const float sigma, const float shadows, const float highlights, const float clarity)
static void ll_fill_boundary2(float *const input, const int wd, const int ht)
void apply_curve(float *const out, const float *const in, const uint32_t w, const uint32_t h, const uint32_t padding, const float g, const float sigma, const float shadows, const float highlights, const float clarity)
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
#define CLAMPS(A, L, H)
Definition math.h:76
size_t size
Definition mipmap_cache.c:3
const float sigma
#define MIN(a, b)
Definition thinplate.c:32