Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
lmmse.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2021 Hanno Schwalm.
4 Copyright (C) 2021 luzpaz.
5 Copyright (C) 2021 Pascal Obry.
6 Copyright (C) 2022 Martin Bařinka.
7 Copyright (C) 2023, 2026 Aurélien PIERRE.
8
9 darktable is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 darktable is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with darktable. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23/*
24 The lmmse code base used for the darktable port has been taken from rawtherapee derived librtprocess.
25 Adapt for dt and tiling - hanno schwalm 06/2021
26
27 LSMME demosaicing algorithm
28 L. Zhang and X. Wu,
29 Color demozaicing via directional Linear Minimum Mean Square-error Estimation,
30 IEEE Trans. on Image Processing, vol. 14, pp. 2167-2178, Dec. 2005.
31
32 Adapted to RawTherapee by Jacques Desmis 3/2013
33 Improved speed and reduced memory consumption by Ingo Weyrich 2/2015
34*/
35
36/*
37 Refinement based on EECI demosaicing algorithm by L. Chang and Y.P. Tan
38 Paul Lee
39 Adapted for RawTherapee - Jacques Desmis 04/2013
40*/
41
42/* Why tiling?
43 The internal tiling vastly reduces memory footprint and allows data processing to be done mostly
44 with in-cache data thus increasing performance.
45
46 The performance has been tested on a E-2288G for 45mpix images, tiling improves performance > 2-fold.
47 times in sec: basic (0.5->0.15), median (0.6->0.18), 3xmedian (0.8->0.22), 3xmedian + 2x refine (1.2->0.30)
48 The default is now 2 times slower than RCD and 2 times faster than AMaZE
49*/
50
51#ifndef LMMSE_GRP
52 #define LMMSE_GRP 136
53#endif
54
55#ifdef __GNUC__
56 #pragma GCC push_options
57 #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
58#endif
59
60#define LMMSE_OVERLAP 8
61#define BORDER_AROUND 4
62#define LMMSE_TILESIZE (LMMSE_GRP - 2 * BORDER_AROUND)
63#define LMMSE_TILEVALID (LMMSE_TILESIZE - 2 * LMMSE_OVERLAP)
64#define w1 (LMMSE_GRP)
65#define w2 (LMMSE_GRP * 2)
66#define w3 (LMMSE_GRP * 3)
67#define w4 (LMMSE_GRP * 4)
68
69static INLINE float limf(float x, float min, float max)
70{
71 return fmaxf(min, fminf(x, max));
72}
73
74static INLINE float median3f(float x0, float x1, float x2)
75{
76 return fmaxf(fminf(x0,x1), fminf(x2, fmaxf(x0,x1)));
77}
78
79static INLINE float median9f(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7, float a8)
80{
81 float tmp;
82 tmp = fminf(a1, a2);
83 a2 = fmaxf(a1, a2);
84 a1 = tmp;
85 tmp = fminf(a4, a5);
86 a5 = fmaxf(a4, a5);
87 a4 = tmp;
88 tmp = fminf(a7, a8);
89 a8 = fmaxf(a7, a8);
90 a7 = tmp;
91 tmp = fminf(a0, a1);
92 a1 = fmaxf(a0, a1);
93 a0 = tmp;
94 tmp = fminf(a3, a4);
95 a4 = fmaxf(a3, a4);
96 a3 = tmp;
97 tmp = fminf(a6, a7);
98 a7 = fmaxf(a6, a7);
99 a6 = tmp;
100 tmp = fminf(a1, a2);
101 a2 = fmaxf(a1, a2);
102 a1 = tmp;
103 tmp = fminf(a4, a5);
104 a5 = fminf(a4, a5);
105 a4 = tmp;
106 tmp = fminf(a7, a8);
107 a8 = fmaxf(a7, a8);
108 a3 = fmaxf(a0, a3);
109 a5 = fminf(a5, a8);
110 a7 = fmaxf(a4, tmp);
111 tmp = fminf(a4, tmp);
112 a6 = fmaxf(a3, a6);
113 a4 = fmaxf(a1, tmp);
114 a2 = fminf(a2, a5);
115 a4 = fminf(a4, a7);
116 tmp = fminf(a4, a2);
117 a2 = fmaxf(a4, a2);
118 a4 = fmaxf(a6, tmp);
119 return fminf(a4, a2);
120}
121
122static INLINE float calc_gamma(float val, float *table)
123{
124 const float index = val * 65535.0f;
125 if(index < 0.0f) return 0.0f;
126 if(index > 65534.99f) return 1.0f;
127 const int idx = (int)index;
128
129 const float diff = index - (float)idx;
130 const float p1 = table[idx];
131 const float p2 = table[idx+1] - p1;
132 return (p1 + p2 * diff);
133}
134
135#ifdef _OPENMP
136 #pragma omp declare simd aligned(in, out, gamma_in, gamma_out)
137#endif
138static void lmmse_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out,
139 const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint32_t mode, float *const restrict gamma_in, float *const restrict gamma_out)
140{
141 const int width = roi_in->width;
142 const int height = roi_in->height;
143
144 if((width < 16) || (height < 16))
145 {
146 dt_control_log(_("[lmmse_demosaic] too small area"));
147 return;
148 }
149
150 float h0 = 1.0f;
151 float h1 = expf( -1.0f / 8.0f);
152 float h2 = expf( -4.0f / 8.0f);
153 float h3 = expf( -9.0f / 8.0f);
154 float h4 = expf(-16.0f / 8.0f);
155 float hs = h0 + 2.0f * (h1 + h2 + h3 + h4);
156 h0 /= hs;
157 h1 /= hs;
158 h2 /= hs;
159 h3 /= hs;
160 h4 /= hs;
161
162 // median filter iterations
163 const int medians = (mode < 2) ? mode : 3;
164 // refinement steps
165 const int refine = (mode > 2) ? mode - 2 : 0;
166
167 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
168 const float revscaler = 1.0f / scaler;
169
170 const int num_vertical = 1 + (height - 2 * LMMSE_OVERLAP -1) / LMMSE_TILEVALID;
171 const int num_horizontal = 1 + (width - 2 * LMMSE_OVERLAP -1) / LMMSE_TILEVALID;
172#ifdef _OPENMP
173 #pragma omp parallel \
174 dt_omp_firstprivate(width, height, out, in, scaler, revscaler, filters)
175#endif
176 {
177 float *qix[6];
179
180 qix[0] = buffer;
181 for(int i = 1; i < 6; i++)
182 {
183 qix[i] = qix[i - 1] + LMMSE_GRP * LMMSE_GRP;
184 }
185 memset_zero(buffer, sizeof(float) * LMMSE_GRP * LMMSE_GRP * 6);
186
187#ifdef _OPENMP
188 #pragma omp for schedule(simd:dynamic, 6) collapse(2)
189#endif
190 for(int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
191 {
192 for(int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
193 {
194 const int rowStart = tile_vertical * LMMSE_TILEVALID;
195 const int rowEnd = MIN(rowStart + LMMSE_TILESIZE, height);
196
197 const int colStart = tile_horizontal * LMMSE_TILEVALID;
198 const int colEnd = MIN(colStart + LMMSE_TILESIZE, width);
199
200 const int tileRows = MIN(rowEnd - rowStart, LMMSE_TILESIZE);
201 const int tileCols = MIN(colEnd - colStart, LMMSE_TILESIZE);
202
203 // index limit; normally is LMMSE_GRP but maybe missing bottom lines or right columns for outermost tile
204 const int last_rr = tileRows + 2 * BORDER_AROUND;
205 const int last_cc = tileCols + 2 * BORDER_AROUND;
206
207 for(int rrr = BORDER_AROUND, row = rowStart; rrr < tileRows + BORDER_AROUND; rrr++, row++)
208 {
209 float *cfa = qix[5] + rrr * LMMSE_GRP + BORDER_AROUND;
210 int idx = row * width + colStart;
211 for(int ccc = BORDER_AROUND, col = colStart; ccc < tileCols + BORDER_AROUND; ccc++, col++, cfa++, idx++)
212 {
213 cfa[0] = calc_gamma(revscaler * in[idx], gamma_in);
214 }
215 }
216
217 // G-R(B)
218 for(int rr = 2; rr < last_rr - 2; rr++)
219 {
220 // G-R(B) at R(B) location
221 for(int cc = 2 + (FC(rr, 2, filters) & 1); cc < last_cc - 2; cc += 2)
222 {
223 float *cfa = qix[5] + rr * LMMSE_GRP + cc;
224 const float v0 = 0.0625f * (cfa[-w1 - 1] + cfa[-w1 + 1] + cfa[w1 - 1] + cfa[w1 + 1]) + 0.25f * cfa[0];
225 // horizontal
226 float *hdiff = qix[0] + rr * LMMSE_GRP + cc;
227 hdiff[0] = -0.25f * (cfa[ -2] + cfa[ 2]) + 0.5f * (cfa[ -1] + cfa[0] + cfa[ 1]);
228 const float Y0 = v0 + 0.5f * hdiff[0];
229 hdiff[0] = (cfa[0] > 1.75f * Y0) ? median3f(hdiff[0], cfa[ -1], cfa[ 1]) : limf(hdiff[0], 0.0f, 1.0f);
230 hdiff[0] -= cfa[0];
231
232 // vertical
233 float *vdiff = qix[1] + rr * LMMSE_GRP + cc;
234 vdiff[0] = -0.25f * (cfa[-w2] + cfa[w2]) + 0.5f * (cfa[-w1] + cfa[0] + cfa[w1]);
235 const float Y1 = v0 + 0.5f * vdiff[0];
236 vdiff[0] = (cfa[0] > 1.75f * Y1) ? median3f(vdiff[0], cfa[-w1], cfa[w1]) : limf(vdiff[0], 0.0f, 1.0f);
237 vdiff[0] -= cfa[0];
238 }
239
240 // G-R(B) at G location
241 for(int ccc = 2 + (FC(rr, 3, filters) & 1); ccc < last_cc - 2; ccc += 2)
242 {
243 float *cfa = qix[5] + rr * LMMSE_GRP + ccc;
244 float *hdiff = qix[0] + rr * LMMSE_GRP + ccc;
245 float *vdiff = qix[1] + rr * LMMSE_GRP + ccc;
246 hdiff[0] = 0.25f * (cfa[ -2] + cfa[ 2]) - 0.5f * (cfa[ -1] + cfa[0] + cfa[ 1]);
247 vdiff[0] = 0.25f * (cfa[-w2] + cfa[w2]) - 0.5f * (cfa[-w1] + cfa[0] + cfa[w1]);
248 hdiff[0] = limf(hdiff[0], -1.0f, 0.0f) + cfa[0];
249 vdiff[0] = limf(vdiff[0], -1.0f, 0.0f) + cfa[0];
250 }
251 }
252
253 // apply low pass filter on differential colors
254 for (int rr = 4; rr < last_rr - 4; rr++)
255 {
256 for(int cc = 4; cc < last_cc - 4; cc++)
257 {
258 float *hdiff = qix[0] + rr * LMMSE_GRP + cc;
259 float *vdiff = qix[1] + rr * LMMSE_GRP + cc;
260 float *hlp = qix[2] + rr * LMMSE_GRP + cc;
261 float *vlp = qix[3] + rr * LMMSE_GRP + cc;
262 hlp[0] = h0 * hdiff[0] + h1 * (hdiff[ -1] + hdiff[ 1]) + h2 * (hdiff[ -2] + hdiff[ 2]) + h3 * (hdiff[ -3] + hdiff[ 3]) + h4 * (hdiff[ -4] + hdiff[ 4]);
263 vlp[0] = h0 * vdiff[0] + h1 * (vdiff[-w1] + vdiff[w1]) + h2 * (vdiff[-w2] + vdiff[w2]) + h3 * (vdiff[-w3] + vdiff[w3]) + h4 * (vdiff[-w4] + vdiff[w4]);
264 }
265 }
266
267 for(int rr = 4; rr < last_rr - 4; rr++)
268 {
269 for(int cc = 4 + (FC(rr, 4, filters) & 1); cc < last_cc - 4; cc += 2)
270 {
271 float *hdiff = qix[0] + rr * LMMSE_GRP + cc;
272 float *vdiff = qix[1] + rr * LMMSE_GRP + cc;
273 float *hlp = qix[2] + rr * LMMSE_GRP + cc;
274 float *vlp = qix[3] + rr * LMMSE_GRP + cc;
275 float *interp = qix[4] + rr * LMMSE_GRP + cc;
276 // horizontal
277 float p1 = hlp[-4];
278 float p2 = hlp[-3];
279 float p3 = hlp[-2];
280 float p4 = hlp[-1];
281 float p5 = hlp[ 0];
282 float p6 = hlp[ 1];
283 float p7 = hlp[ 2];
284 float p8 = hlp[ 3];
285 float p9 = hlp[ 4];
286 float mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.0f;
287 float vx = 1e-7f + sqf(p1 - mu) + sqf(p2 - mu) + sqf(p3 - mu) + sqf(p4 - mu) + sqf(p5 - mu) + sqf(p6 - mu) + sqf(p7 - mu) + sqf(p8 - mu) + sqf(p9 - mu);
288 p1 -= hdiff[-4];
289 p2 -= hdiff[-3];
290 p3 -= hdiff[-2];
291 p4 -= hdiff[-1];
292 p5 -= hdiff[ 0];
293 p6 -= hdiff[ 1];
294 p7 -= hdiff[ 2];
295 p8 -= hdiff[ 3];
296 p9 -= hdiff[ 4];
297 float vn = 1e-7f + sqf(p1) + sqf(p2) + sqf(p3) + sqf(p4) + sqf(p5) + sqf(p6) + sqf(p7) + sqf(p8) + sqf(p9);
298 float xh = (hdiff[0] * vx + hlp[0] * vn) / (vx + vn);
299 float vh = vx * vn / (vx + vn);
300
301 // vertical
302 p1 = vlp[-w4];
303 p2 = vlp[-w3];
304 p3 = vlp[-w2];
305 p4 = vlp[-w1];
306 p5 = vlp[ 0];
307 p6 = vlp[ w1];
308 p7 = vlp[ w2];
309 p8 = vlp[ w3];
310 p9 = vlp[ w4];
311 mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.0f;
312 vx = 1e-7f + sqf(p1 - mu) + sqf(p2 - mu) + sqf(p3 - mu) + sqf(p4 - mu) + sqf(p5 - mu) + sqf(p6 - mu) + sqf(p7 - mu) + sqf(p8 - mu) + sqf(p9 - mu);
313 p1 -= vdiff[-w4];
314 p2 -= vdiff[-w3];
315 p3 -= vdiff[-w2];
316 p4 -= vdiff[-w1];
317 p5 -= vdiff[ 0];
318 p6 -= vdiff[ w1];
319 p7 -= vdiff[ w2];
320 p8 -= vdiff[ w3];
321 p9 -= vdiff[ w4];
322 vn = 1e-7f + sqf(p1) + sqf(p2) + sqf(p3) + sqf(p4) + sqf(p5) + sqf(p6) + sqf(p7) + sqf(p8) + sqf(p9);
323 float xv = (vdiff[0] * vx + vlp[0] * vn) / (vx + vn);
324 float vv = vx * vn / (vx + vn);
325 // interpolated G-R(B)
326 interp[0] = (xh * vv + xv * vh) / (vh + vv);
327 }
328 }
329
330 // copy CFA values
331 for(int rr = 0, row_in = rowStart - BORDER_AROUND; rr < last_rr; rr++, row_in++)
332 {
333 for(int cc = 0, col_in = colStart - BORDER_AROUND; cc < last_cc; cc++, col_in++)
334 {
335 const int c = FC(rr, cc, filters);
336 const gboolean inside = ((row_in >= 0) && (row_in < height) && (col_in >= 0) && (col_in < width));
337 float *colc = qix[c] + rr * LMMSE_GRP + cc;
338 colc[0] = (inside) ? qix[5][rr * LMMSE_GRP + cc] : 0.0f;
339 if(c != 1)
340 {
341 float *col1 = qix[1] + rr * LMMSE_GRP + cc;
342 float *interp = qix[4] + rr * LMMSE_GRP + cc;
343 col1[0] = (inside) ? colc[0] + interp[0] : 0.0f;
344 }
345 }
346 }
347
348 // bilinear interpolation for R/B
349 // interpolate R/B at G location
350 for(int rr = 1; rr < last_rr - 1; rr++)
351 {
352 for(int cc = 1 + (FC(rr, 2, filters) & 1), c = FC(rr, cc + 1, filters); cc < last_cc - 1; cc += 2)
353 {
354 float *colc = qix[c] + rr * LMMSE_GRP + cc;
355 float *col1 = qix[1] + rr * LMMSE_GRP + cc;
356 colc[0] = col1[0] + 0.5f * (colc[ -1] - col1[ -1] + colc[ 1] - col1[ 1]);
357 c = 2 - c;
358 colc = qix[c] + rr * LMMSE_GRP + cc;
359 colc[0] = col1[0] + 0.5f * (colc[-w1] - col1[-w1] + colc[w1] - col1[w1]);
360 c = 2 - c;
361 }
362 }
363
364 // interpolate R/B at B/R location
365 for(int rr = 1; rr < last_rr - 1; rr++)
366 {
367 for(int cc = 1 + (FC(rr, 1, filters) & 1), c = 2 - FC(rr, cc, filters); cc < last_cc - 1; cc += 2)
368 {
369 float *colc = qix[c] + rr * LMMSE_GRP + cc;
370 float *col1 = qix[1] + rr * LMMSE_GRP + cc;
371 colc[0] = col1[0] + 0.25f * (colc[-w1] - col1[-w1] + colc[ -1] - col1[ -1] + colc[ 1] - col1[ 1] + colc[ w1] - col1[ w1]);
372 }
373 }
374
375 // for the median and refine corrections we need to specify other loop bounds
376 // for inner vs outer tiles
377 const int ccmin = (tile_horizontal == 0) ? 6 : 0 ;
378 const int ccmax = last_cc - ((tile_horizontal == num_horizontal - 1) ? 6 : 0);
379 const int rrmin = (tile_vertical == 0) ? 6 : 0 ;
380 const int rrmax = last_rr - ((tile_vertical == num_vertical - 1) ? 6 : 0);
381
382 // median filter/
383 for(int pass = 0; pass < medians; pass++)
384 {
385 // Apply 3x3 median filter
386 // Compute median(R-G) and median(B-G)
387 for(int rr = 1; rr < last_rr - 1; rr++)
388 {
389 for(int c = 0; c < 3; c += 2)
390 {
391 const int d = c + 3 - (c == 0 ? 0 : 1);
392 for(int cc = 1; cc < last_cc - 1; cc++)
393 {
394 float *corr = qix[d] + rr * LMMSE_GRP + cc;
395 float *colc = qix[c] + rr * LMMSE_GRP + cc;
396 float *col1 = qix[1] + rr * LMMSE_GRP + cc;
397 // Assign 3x3 differential color values
398 corr[0] = median9f(colc[-w1-1] - col1[-w1-1],
399 colc[-w1 ] - col1[-w1 ],
400 colc[-w1+1] - col1[-w1+1],
401 colc[ -1] - col1[ -1],
402 colc[ 0] - col1[ 0],
403 colc[ 1] - col1[ 1],
404 colc[ w1-1] - col1[ w1-1],
405 colc[ w1 ] - col1[ w1 ],
406 colc[ w1+1] - col1[ w1+1]);
407 }
408 }
409 }
410
411 // red/blue at GREEN pixel locations & red/blue and green at BLUE/RED pixel locations
412 for(int rr = rrmin; rr < rrmax - 1; rr++)
413 {
414 float *col0 = qix[0] + rr * LMMSE_GRP + ccmin;
415 float *col1 = qix[1] + rr * LMMSE_GRP + ccmin;
416 float *col2 = qix[2] + rr * LMMSE_GRP + ccmin;
417 float *corr3 = qix[3] + rr * LMMSE_GRP + ccmin;
418 float *corr4 = qix[4] + rr * LMMSE_GRP + ccmin;
419 int c0 = FC(rr, 0, filters);
420 int c1 = FC(rr, 1, filters);
421
422 if(c0 == 1)
423 {
424 c1 = 2 - c1;
425 const int d = c1 + 3 - (c1 == 0 ? 0 : 1);
426 int cc;
427 float *col_c1 = qix[c1] + rr * LMMSE_GRP + ccmin;
428 float *corr_d = qix[d] + rr * LMMSE_GRP + ccmin;
429 for(cc = ccmin; cc < ccmax - 1; cc += 2)
430 {
431 col0[0] = col1[0] + corr3[0];
432 col2[0] = col1[0] + corr4[0];
433 col0++;
434 col1++;
435 col2++;
436 corr3++;
437 corr4++;
438 col_c1++;
439 corr_d++;
440 col_c1[0] = col1[0] + corr_d[0];
441 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
442 col0++;
443 col1++;
444 col2++;
445 corr3++;
446 corr4++;
447 col_c1++;
448 corr_d++;
449 }
450
451 if(cc < ccmax)
452 { // remaining pixel, only if width is odd
453 col0[0] = col1[0] + corr3[0];
454 col2[0] = col1[0] + corr4[0];
455 }
456 }
457 else
458 {
459 c0 = 2 - c0;
460 const int d = c0 + 3 - (c0 == 0 ? 0 : 1);
461 float *col_c0 = qix[c0] + rr * LMMSE_GRP + ccmin;
462 float *corr_d = qix[d] + rr * LMMSE_GRP + ccmin;
463 int cc;
464 for(cc = ccmin; cc < ccmax - 1; cc += 2)
465 {
466 col_c0[0] = col1[0] + corr_d[0];
467 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
468 col0++;
469 col1++;
470 col2++;
471 corr3++;
472 corr4++;
473 col_c0++;
474 corr_d++;
475 col0[0] = col1[0] + corr3[0];
476 col2[0] = col1[0] + corr4[0];
477 col0++;
478 col1++;
479 col2++;
480 corr3++;
481 corr4++;
482 col_c0++;
483 corr_d++;
484 }
485
486 if(cc < ccmax)
487 { // remaining pixel, only if width is odd
488 col_c0[0] = col1[0] + corr_d[0];
489 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
490 }
491 }
492 }
493 }
494
495 // we fill the non-approximated color channels from gamma corrected cfa data
496 for(int rrr = 4; rrr < last_rr - 4; rrr++)
497 {
498 for(int ccc = 4; ccc < last_cc - 4; ccc++)
499 {
500 const int idx = rrr * LMMSE_GRP + ccc;
501 const int c = FC(rrr, ccc, filters);
502 qix[c][idx] = qix[5][idx];
503 }
504 }
505
506 // As we have the color channels fully available we can do the refinements here in tiled code
507 for(int step = 0; step < refine; step++)
508 {
509 // Reinforce interpolated green pixels on RED/BLUE pixel locations
510 for(int rr = rrmin + 2; rr < rrmax - 2; rr++)
511 {
512 for(int cc = ccmin + 2 + (FC(rr, 2, filters) & 1), c = FC(rr, cc, filters); cc < ccmax - 2; cc += 2)
513 {
514 float *rgb1 = qix[1] + rr * LMMSE_GRP + cc;
515 float *rgbc = qix[c] + rr * LMMSE_GRP + cc;
516
517 const float dL = 1.0f / (1.0f + fabsf(rgbc[ -2] - rgbc[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
518 const float dR = 1.0f / (1.0f + fabsf(rgbc[ 2] - rgbc[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
519 const float dU = 1.0f / (1.0f + fabsf(rgbc[-w2] - rgbc[0]) + fabsf(rgb1[w1] - rgb1[-w1]));
520 const float dD = 1.0f / (1.0f + fabsf(rgbc[ w2] - rgbc[0]) + fabsf(rgb1[w1] - rgb1[-w1]));
521 rgb1[0] = (rgbc[0] + ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-w1] - rgbc[-w1]) * dU + (rgb1[w1] - rgbc[w1]) * dD ) / (dL + dR + dU + dD));
522 }
523 }
524 // Reinforce interpolated red/blue pixels on GREEN pixel locations
525 for(int rr = rrmin + 2; rr < rrmax - 2; rr++)
526 {
527 for(int cc = ccmin + 2 + (FC(rr, 3, filters) & 1), c = FC(rr, cc + 1, filters); cc < ccmax - 2; cc += 2)
528 {
529 for(int i = 0; i < 2; c = 2 - c, i++)
530 {
531 float *rgb1 = qix[1] + rr * LMMSE_GRP + cc;
532 float *rgbc = qix[c] + rr * LMMSE_GRP + cc;
533
534 const float dL = 1.0f / (1.0f + fabsf(rgb1[ -2] - rgb1[0]) + fabsf(rgbc[ 1] - rgbc[ -1]));
535 const float dR = 1.0f / (1.0f + fabsf(rgb1[ 2] - rgb1[0]) + fabsf(rgbc[ 1] - rgbc[ -1]));
536 const float dU = 1.0f / (1.0f + fabsf(rgb1[-w2] - rgb1[0]) + fabsf(rgbc[w1] - rgbc[-w1]));
537 const float dD = 1.0f / (1.0f + fabsf(rgb1[ w2] - rgb1[0]) + fabsf(rgbc[w1] - rgbc[-w1]));
538 rgbc[0] = (rgb1[0] - ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-w1] - rgbc[-w1]) * dU + (rgb1[w1] - rgbc[w1]) * dD ) / (dL + dR + dU + dD));
539 }
540 }
541 }
542 // Reinforce integrated red/blue pixels on BLUE/RED pixel locations
543 for(int rr = rrmin + 2; rr < rrmax - 2; rr++)
544 {
545 for(int cc = ccmin + 2 + (FC(rr, 2, filters) & 1), c = 2 - FC(rr, cc, filters); cc < ccmax - 2; cc += 2)
546 {
547 const int d = 2 - c;
548 float *rgb1 = qix[1] + rr * LMMSE_GRP + cc;
549 float *rgbc = qix[c] + rr * LMMSE_GRP + cc;
550 float *rgbd = qix[d] + rr * LMMSE_GRP + cc;
551
552 const float dL = 1.0f / (1.0f + fabsf(rgbd[ -2] - rgbd[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
553 const float dR = 1.0f / (1.0f + fabsf(rgbd[ 2] - rgbd[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
554 const float dU = 1.0f / (1.0f + fabsf(rgbd[-w2] - rgbd[0]) + fabsf(rgb1[w1] - rgb1[-w1]));
555 const float dD = 1.0f / (1.0f + fabsf(rgbd[ w2] - rgbd[0]) + fabsf(rgb1[w1] - rgb1[-w1]));
556 rgbc[0] = (rgb1[0] - ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-w1] - rgbc[-w1]) * dU + (rgb1[w1] - rgbc[w1]) * dD ) / (dL + dR + dU + dD));
557 }
558 }
559 }
560
561 // write result to out
562 // For the outermost tiles in all directions we also write the otherwise overlapped area
563 const int first_vertical = rowStart + ((tile_vertical == 0) ? 0 : LMMSE_OVERLAP);
564 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ? 0 : LMMSE_OVERLAP);
565 const int first_horizontal = colStart + ((tile_horizontal == 0) ? 0 : LMMSE_OVERLAP);
566 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ? 0 : LMMSE_OVERLAP);
567 for(int row = first_vertical, rr = row - rowStart + BORDER_AROUND; row < last_vertical; row++, rr++)
568 {
569 float *dest = out + 4 * (row * width + first_horizontal);
570 const int idx = rr * LMMSE_GRP + first_horizontal - colStart + BORDER_AROUND;
571 float *col0 = qix[0] + idx;
572 float *col1 = qix[1] + idx;
573 float *col2 = qix[2] + idx;
574 for(int col = first_horizontal; col < last_horizontal; col++, dest +=4, col0++, col1++, col2++)
575 {
576 dest[0] = scaler * calc_gamma(col0[0], gamma_out);
577 dest[1] = scaler * calc_gamma(col1[0], gamma_out);
578 dest[2] = scaler * calc_gamma(col2[0], gamma_out);
579 dest[3] = 0.0f;
580 }
581 }
582 }
583 }
585 }
586}
587
588// revert specific aggressive optimizing
589#ifdef __GNUC__
590 #pragma GCC pop_options
591#endif
592
593#undef LMMSE_TILESIZE
594#undef LMMSE_OVERLAP
595#undef BORDER_AROUND
596#undef LMMSE_TILEVALID
597#undef w1
598#undef w2
599#undef w3
600#undef w4
601
602// clang-format off
603// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
604// vim: shiftwidth=2 expandtab tabstop=2 cindent
605// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
606// clang-format on
static int refine(struct point *reg, int *reg_size, image_double modgrad, double reg_angle, double prec, double p, struct rect *rec, image_char used, image_double angles, double density_th)
Definition ashift_lsd.c:2006
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
#define INLINE
Definition cacorrect.c:167
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const float d
Definition colorspaces_inline_conversions.h:931
static const float const float const float min
Definition colorspaces_inline_conversions.h:667
const float max
Definition colorspaces_inline_conversions.h:721
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
static const int row
Definition colorspaces_inline_conversions.h:175
void dt_control_log(const char *msg,...)
Definition control.c:530
static void memset_zero(void *const buffer, size_t size)
Set the memory buffer to zero as a pack of unsigned char.
Definition darktable.h:813
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:371
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
static int FC(const int row, const int col, const unsigned int filters)
Definition data/kernels/common.h:47
static const float x
Definition iop_profile.h:239
static INLINE float calc_gamma(float val, float *table)
Definition lmmse.c:122
static INLINE float median3f(float x0, float x1, float x2)
Definition lmmse.c:74
#define w2
Definition lmmse.c:65
#define LMMSE_TILEVALID
Definition lmmse.c:63
#define BORDER_AROUND
Definition lmmse.c:61
#define LMMSE_OVERLAP
Definition lmmse.c:60
#define LMMSE_GRP
Definition lmmse.c:52
static INLINE float median9f(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7, float a8)
Definition lmmse.c:79
#define w1
Definition lmmse.c:64
#define w4
Definition lmmse.c:67
static void lmmse_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint32_t mode, float *const restrict gamma_in, float *const restrict gamma_out)
Definition lmmse.c:138
#define w3
Definition lmmse.c:66
static INLINE float limf(float x, float min, float max)
Definition lmmse.c:69
#define LMMSE_TILESIZE
Definition lmmse.c:62
static float sqf(const float x)
Definition math.h:223
Definition pixelpipe_hb.h:95
dt_iop_buffer_dsc_t dsc_in
Definition pixelpipe_hb.h:140
dt_aligned_pixel_t processed_maximum
Definition develop/format.h:73
Definition imageop.h:67
int width
Definition imageop.h:68
int height
Definition imageop.h:68
#define c1
Definition colorspaces_inline_conversions.h:1054
#define MIN(a, b)
Definition thinplate.c:32