Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
rcd.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2020-2021 Hanno Schwalm.
4 Copyright (C) 2021 Marco.
5 Copyright (C) 2021 Pascal Obry.
6 Copyright (C) 2021 Ralf Brown.
7 Copyright (C) 2022 Martin Bařinka.
8 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
9
10 darktable is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 darktable is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with darktable. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24
25/*
26* RATIO CORRECTED DEMOSAICING
27* Luis Sanz Rodríguez (luis.sanz.rodriguez(at)gmail(dot)com)
28*
29* Release 2.3 @ 171125
30*
31* Original code from https://github.com/LuisSR/RCD-Demosaicing
32* Licensed under the GNU GPL version 3
33*/
34
35/*
36* The tiling coding was done by Ingo Weyrich (heckflosse67@gmx.de) from rawtherapee
37*/
38
39/*
40* Luis Sanz Rodríguez significantly optimised the v 2.3 code and simplified the directional
41* coefficients in an exact, shorter and more performant formula.
42* In cooperation with Ingo Weyrich and Luis Sanz Rodríguez this has been tuned for performance.
43* Hanno Schwalm (hanno@schwalm-bremen.de)
44*/
45
46/* Some notes about the algorithm
47* 1. The calculated data at the tiling borders RCD_BORDER must be at least 9 to be stable. Why does 8 **not** work?
48* 2. For the outermost tiles we only have to discard a 6 pixel border region interpolated otherwise.
49* 3. The tilesize has a significant influence on performance, the default is a good guess for modern
50* x86/64 machines, tested on Xeon E-2288G, i5-8250U.
51*/
52
53#ifndef RCD_TILESIZE
54 #define RCD_TILESIZE 112
55#endif
56
57/* We don't want to use the -Ofast option in dt as it effects are not well specified and there have been issues
58 leading to crashes.
59 But we can use the 'fast-math' option in code sections if input data and algorithms are well defined.
60
61 We have defined input data and make sure there are no divide-by-zero or overflows by chosen eps
62 Reordering of instructions might lead to a slight loss of presision whigh is not significant here.
63 Not necessary in this code section
64 threadsafe handling of errno
65 signed zero handling
66 handling of math interrupts
67 handling of rounding
68 handling of overflows
69
70 The 'fp-contract=fast' option enables fused multiply&add if available
71*/
72
73#ifdef __GNUC__
74 #pragma GCC push_options
75 #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
76#endif
77
78#define RCD_BORDER 9 // avoid tile-overlap errors
79#define RCD_MARGIN 6 // for the outermost tiles we can have a smaller outer border
80#define RCD_TILEVALID (RCD_TILESIZE - 2 * RCD_BORDER)
81#define w1 RCD_TILESIZE
82#define w2 (2 * RCD_TILESIZE)
83#define w3 (3 * RCD_TILESIZE)
84#define w4 (4 * RCD_TILESIZE)
85
86#define eps 1e-5f // Tolerance to avoid dividing by zero
87#define epssq 1e-10f
88
89// We might have negative data in input and also want to normalise
90static INLINE float safe_in(float a, float scale)
91{
92 return fmaxf(0.0f, a) * scale;
93}
94
96static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
97{
98 const int border = margin + 3;
99 // write approximatad 3-pixel border region to out
100 float sum[8];
101 for(int j = 0; j < height; j++)
102 {
103 for(int i = 0; i < width; i++)
104 {
105 if(i == 3 && j >= 3 && j < height - 3) i = width - 3;
106 if(i == width) break;
107 memset(sum, 0, sizeof(float) * 8);
108 for(int y = j - 1; y != j + 2; y++)
109 {
110 for(int x = i - 1; x != i + 2; x++)
111 {
112 if((y >= 0) && (x >= 0) && (y < height) && (x < width))
113 {
114 const int f = FC(y, x, filters);
115 sum[f] += fmaxf(0.0f, in[(size_t)y * width + x]);
116 sum[f + 4]++;
117 }
118 }
119 }
120 const int f = FC(j, i, filters);
121 for(int c = 0; c < 3; c++)
122 {
123 if(c != f && sum[c + 4] > 0.0f)
124 out[4 * ((size_t)j * width + i) + c] = sum[c] / sum[c + 4];
125 else
126 out[4 * ((size_t)j * width + i) + c] = fmaxf(0.0f, in[(size_t)j * width + i]);
127 }
128 }
129 }
130
131 const float *input = in;
132
133#ifdef _OPENMP
134#pragma omp parallel for default(none) \
135 dt_omp_firstprivate(filters, out, width, height, border) \
136 shared(input) \
137 schedule(static)
138#endif
139 for(int j = 3; j < height - 3; j++)
140 {
141 float *buf = out + (size_t)4 * width * j + 4 * 3;
142 const float *buf_in = input + (size_t)width * j + 3;
143 for(int i = 3; i < width - 3; i++)
144 {
145 if(i == border && j >= border && j < height - border)
146 {
147 i = width - border;
148 buf = out + (size_t)4 * width * j + 4 * i;
149 buf_in = input + (size_t)width * j + i;
150 }
151 if(i == width) break;
152
153 const int c = FC(j, i, filters);
154 dt_aligned_pixel_t color;
155 const float pc = fmaxf(0.0f, buf_in[0]);
156 if(c == 0 || c == 2)
157 {
158 color[c] = pc;
159 const float pym = fmaxf(0.0f, buf_in[-width * 1]);
160 const float pym2 = fmaxf(0.0f, buf_in[-width * 2]);
161 const float pym3 = fmaxf(0.0f, buf_in[-width * 3]);
162 const float pyM = fmaxf(0.0f, buf_in[+width * 1]);
163 const float pyM2 = fmaxf(0.0f, buf_in[+width * 2]);
164 const float pyM3 = fmaxf(0.0f, buf_in[+width * 3]);
165 const float pxm = fmaxf(0.0f, buf_in[-1]);
166 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
167 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
168 const float pxM = fmaxf(0.0f, buf_in[+1]);
169 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
170 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
171
172 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
173 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
174 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
175 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
176 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
177 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
178 if(diffx > diffy)
179 {
180 // use guessy
181 const float m = fminf(pym, pyM);
182 const float M = fmaxf(pym, pyM);
183 color[1] = fmaxf(fminf(guessy * .25f, M), m);
184 }
185 else
186 {
187 const float m = fminf(pxm, pxM);
188 const float M = fmaxf(pxm, pxM);
189 color[1] = fmaxf(fminf(guessx * .25f, M), m);
190 }
191 }
192 else
193 color[1] = pc;
194
195 color[3] = 0.0f;
196 memcpy(buf, color, sizeof(float) * 4);
197 buf += 4;
198 buf_in++;
199 }
200 }
201// for all pixels: interpolate colors into float array
202#ifdef _OPENMP
203#pragma omp parallel for default(none) \
204 dt_omp_firstprivate(filters, out, width, height, margin) \
205 schedule(static)
206#endif
207 for(int j = 1; j < height - 1; j++)
208 {
209 float *buf = out + (size_t)4 * width * j + 4;
210 for(int i = 1; i < width - 1; i++)
211 {
212 if(i == margin && j >= margin && j < height - margin)
213 {
214 i = width - margin;
215 buf = out + (size_t)4 * (width * j + i);
216 }
217 const int c = FC(j, i, filters);
218 dt_aligned_pixel_t color = { buf[0], buf[1], buf[2], buf[3] };
219 const int linesize = 4 * width;
220 // fill all four pixels with correctly interpolated stuff: r/b for green1/2
221 // b for r and r for b
222 if(__builtin_expect(c & 1, 1)) // c == 1 || c == 3)
223 {
224 // calculate red and blue for green pixels:
225 // need 4-nbhood:
226 const float *nt = buf - linesize;
227 const float *nb = buf + linesize;
228 const float *nl = buf - 4;
229 const float *nr = buf + 4;
230 if(FC(j, i + 1, filters) == 0) // red nb in same row
231 {
232 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
233 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
234 }
235 else
236 {
237 // blue nb
238 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
239 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
240 }
241 }
242 else
243 {
244 // get 4-star-nbhood:
245 const float *ntl = buf - 4 - linesize;
246 const float *ntr = buf + 4 - linesize;
247 const float *nbl = buf - 4 + linesize;
248 const float *nbr = buf + 4 + linesize;
249
250 if(c == 0)
251 {
252 // red pixel, fill blue:
253 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
254 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
255 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
256 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
257 if(diff1 > diff2)
258 color[2] = guess2 * .5f;
259 else if(diff1 < diff2)
260 color[2] = guess1 * .5f;
261 else
262 color[2] = (guess1 + guess2) * .25f;
263 }
264 else // c == 2, blue pixel, fill red:
265 {
266 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
267 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
268 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
269 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
270 if(diff1 > diff2)
271 color[0] = guess2 * .5f;
272 else if(diff1 < diff2)
273 color[0] = guess1 * .5f;
274 else
275 color[0] = (guess1 + guess2) * .25f;
276 }
277 }
278 memcpy(buf, color, sizeof(float) * 4);
279 buf += 4;
280 }
281 }
282}
283
284#ifdef _OPENMP
285 #pragma omp declare simd aligned(in, out)
286#endif
287static void rcd_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out,
288 const dt_iop_roi_t *const roi_in, const uint32_t filters)
289{
290 const int width = roi_in->width;
291 const int height = roi_in->height;
292
293 if((width < 16) || (height < 16))
294 {
295 dt_control_log(_("[rcd_demosaic] too small area"));
296 return;
297 }
298
299 rcd_ppg_border(out, in, width, height, filters, RCD_MARGIN);
300
301 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
302 const float revscaler = 1.0f / scaler;
303
304 const int num_vertical = 1 + (height - 2 * RCD_BORDER -1) / RCD_TILEVALID;
305 const int num_horizontal = 1 + (width - 2 * RCD_BORDER -1) / RCD_TILEVALID;
306
307#ifdef _OPENMP
308 #pragma omp parallel \
309 dt_omp_firstprivate(width, height, filters, out, in, scaler, revscaler)
310#endif
311 {
312 // FIXME: CRITICAL: need to handle the case where we couldn't alloc the memory,
313 // but in a parallel section, that's not going to be trivial.
314
316 // ensure that border elements which are read but never actually set below are zeroed out
317 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
320 float *P_CDiff_Hpf = dt_pixelpipe_cache_alloc_align_float_cache((size_t) RCD_TILESIZE * RCD_TILESIZE / 2, 0);
321 float *Q_CDiff_Hpf = dt_pixelpipe_cache_alloc_align_float_cache((size_t) RCD_TILESIZE * RCD_TILESIZE / 2, 0);
322
323 float (*rgb)[RCD_TILESIZE * RCD_TILESIZE] =
325
326 // No overlapping use so re-use same buffer
327 float *lpf = PQ_Dir;
328
329 // There has been a discussion about the schedule strategy, at least on the tested machines the
330 // dynamic scheduling seems to be slightly faster.
331#ifdef _OPENMP
332 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
333#endif
334 for(int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
335 {
336 for(int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
337 {
338 const int rowStart = tile_vertical * RCD_TILEVALID;
339 const int rowEnd = MIN(rowStart + RCD_TILESIZE, height);
340
341 const int colStart = tile_horizontal * RCD_TILEVALID;
342 const int colEnd = MIN(colStart + RCD_TILESIZE, width);
343
344 const int tileRows = MIN(rowEnd - rowStart, RCD_TILESIZE);
345 const int tileCols = MIN(colEnd - colStart, RCD_TILESIZE);
346
347 if (rowStart + RCD_TILESIZE > height || colStart + RCD_TILESIZE > width)
348 {
349 // VH_Dir is only filled for (4,4)..(height-4,width-4), but the refinement code reads (3,3)...(h-3,w-3),
350 // so we need to ensure that the border is zeroed for partial tiles to get consistent results
351 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
352 // TODO: figure out what part of rgb is being accessed without initialization on partial tiles
353 memset(rgb, 0, sizeof(float) * 3 * RCD_TILESIZE * RCD_TILESIZE);
354 }
355 // Step 0: fill data and make sure data are not negative.
356 for(int row = rowStart; row < rowEnd; row++)
357 {
358 const int c0 = FC(row, colStart, filters);
359 const int c1 = FC(row, colStart + 1, filters);
360 for(int col = colStart, indx = (row - rowStart) * RCD_TILESIZE, in_indx = row * width + colStart; col < colEnd; col++, indx++, in_indx++)
361 {
362 cfa[indx] = rgb[c0][indx] = rgb[c1][indx] = safe_in(in[in_indx], revscaler);
363 }
364 }
365
366 // STEP 1: Find vertical and horizontal interpolation directions
367 float bufferV[3][RCD_TILESIZE - 8];
368 // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter
369 for(int row = 3; row < MIN(tileRows - 3, 5); row++ )
370 {
371 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
372 {
373 bufferV[row - 3][col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
374 }
375 }
376
377 // Step 1.2: Obtain the vertical and horizontal directional discrimination strength
378 float DT_ALIGNED_PIXEL bufferH[RCD_TILESIZE];
379 // We start with V0, V1 and V2 pointing to row -1, row and row +1
380 // After row is processed V0 must point to the old V1, V1 must point to the old V2 and V2 must point to the old V0
381 // because the old V0 is not used anymore and will be filled with row + 1 data in next iteration
382 float* V0 = bufferV[0];
383 float* V1 = bufferV[1];
384 float* V2 = bufferV[2];
385 for(int row = 4; row < tileRows - 4; row++ )
386 {
387 for(int col = 3, indx = row * RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
388 {
389 bufferH[col - 3] = sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
390 }
391 for(int col = 4, indx = (row + 1) * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
392 {
393 V2[col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
394 }
395 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
396 {
397 const float V_Stat = fmaxf(epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
398 const float H_Stat = fmaxf(epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
399 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
400 }
401 // rolling the line pointers
402 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
403 }
404
405 // STEP 2: Calculate the low pass filter
406 // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data
407 for(int row = 2; row < tileRows - 2; row++)
408 {
409 for(int col = 2 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
410 {
411 lpf[lp_indx] = cfa[indx]
412 + 0.5f * (cfa[indx - w1] + cfa[indx + w1] + cfa[indx - 1] + cfa[indx + 1])
413 + 0.25f * (cfa[indx - w1 - 1] + cfa[indx - w1 + 1] + cfa[indx + w1 - 1] + cfa[indx + w1 + 1]);
414 }
415 }
416
417 // STEP 3: Populate the green channel
418 // Step 3.1: Populate the green channel at blue and red CFA positions
419 for(int row = 4; row < tileRows - 4; row++)
420 {
421 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
422 {
423 const float cfai = cfa[indx];
424
425 // Cardinal gradients
426 const float N_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx - w2]) + fabs(cfa[indx - w1] - cfa[indx - w3]) + fabs(cfa[indx - w2] - cfa[indx - w4]);
427 const float S_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx + w2]) + fabs(cfa[indx + w1] - cfa[indx + w3]) + fabs(cfa[indx + w2] - cfa[indx + w4]);
428 const float W_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
429 const float E_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
430
431 // Cardinal pixel estimations
432 const float lpfi = lpf[lpindx];
433 const float N_Est = cfa[indx - w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - w1]);
434 const float S_Est = cfa[indx + w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + w1]);
435 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - 1]);
436 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + 1]);
437
438 // Vertical and horizontal estimations
439 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
440 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
441
442 // G@B and G@R interpolation
443 // Refined vertical and horizontal local discrimination
444 const float VH_Central_Value = VH_Dir[indx];
445 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
446 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
447
448 rgb[1][indx] = intp(VH_Disc, H_Est, V_Est);
449 }
450 }
451
452 // STEP 4: Populate the red and blue channels
453
454 // Step 4.0: Calculate the square of the P/Q diagonals color difference high pass filter
455 for(int row = 3; row < tileRows - 3; row++)
456 {
457 for(int col = 3, indx = row * RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
458 {
459 P_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 - 3] - cfa[indx - w1 - 1] - cfa[indx + w1 + 1] + cfa[indx + w3 + 3]) - 3.0f * (cfa[indx - w2 - 2] + cfa[indx + w2 + 2]) + 6.0f * cfa[indx]);
460 Q_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 + 3] - cfa[indx - w1 + 1] - cfa[indx + w1 - 1] + cfa[indx + w3 - 3]) - 3.0f * (cfa[indx - w2 + 2] + cfa[indx + w2 - 2]) + 6.0f * cfa[indx]);
461 }
462 }
463 // Step 4.1: Obtain the P/Q diagonals directional discrimination strength
464 for(int row = 4; row < tileRows - 4; row++)
465 {
466 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
467 {
468 const float P_Stat = fmaxf(epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
469 const float Q_Stat = fmaxf(epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
470 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
471 }
472 }
473
474 // Step 4.2: Populate the red and blue channels at blue and red CFA positions
475 for(int row = 4; row < tileRows - 4; row++)
476 {
477 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, c = 2 - FC(row, col, filters), pqindx = indx / 2, pqindx2 = (indx - w1 - 1) / 2, pqindx3 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
478 {
479 // Refined P/Q diagonal local discrimination
480 const float PQ_Central_Value = PQ_Dir[pqindx];
481 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
482
483 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
484
485 // Diagonal gradients
486 const float NW_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx - w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 - 2]);
487 const float NE_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx - w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 + 2]);
488 const float SW_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx + w1 - 1] - rgb[c][indx + w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 - 2]);
489 const float SE_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx + w1 + 1] - rgb[c][indx + w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 + 2]);
490
491 // Diagonal colour differences
492 const float NW_Est = rgb[c][indx - w1 - 1] - rgb[1][indx - w1 - 1];
493 const float NE_Est = rgb[c][indx - w1 + 1] - rgb[1][indx - w1 + 1];
494 const float SW_Est = rgb[c][indx + w1 - 1] - rgb[1][indx + w1 - 1];
495 const float SE_Est = rgb[c][indx + w1 + 1] - rgb[1][indx + w1 + 1];
496
497 // P/Q estimations
498 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
499 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
500
501 // R@B and B@R interpolation
502 rgb[c][indx] = rgb[1][indx] + intp(PQ_Disc, Q_Est, P_Est);
503 }
504 }
505
506 // Step 4.3: Populate the red and blue channels at green CFA positions
507 for(int row = 4; row < tileRows - 4; row++)
508 {
509 for(int col = 4 + (FC(row, 1, filters) & 1), indx = row * RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
510 {
511 // Refined vertical and horizontal local discrimination
512 const float VH_Central_Value = VH_Dir[indx];
513 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
514 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
515 const float rgb1 = rgb[1][indx];
516 const float N1 = eps + fabs(rgb1 - rgb[1][indx - w2]);
517 const float S1 = eps + fabs(rgb1 - rgb[1][indx + w2]);
518 const float W1 = eps + fabs(rgb1 - rgb[1][indx - 2]);
519 const float E1 = eps + fabs(rgb1 - rgb[1][indx + 2]);
520
521 const float rgb1mw1 = rgb[1][indx - w1];
522 const float rgb1pw1 = rgb[1][indx + w1];
523 const float rgb1m1 = rgb[1][indx - 1];
524 const float rgb1p1 = rgb[1][indx + 1];
525
526 for(int c = 0; c <= 2; c += 2)
527 {
528 const float SNabs = fabs(rgb[c][indx - w1] - rgb[c][indx + w1]);
529 const float EWabs = fabs(rgb[c][indx - 1] - rgb[c][indx + 1]);
530
531 // Cardinal gradients
532 const float N_Grad = N1 + SNabs + fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
533 const float S_Grad = S1 + SNabs + fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);
534 const float W_Grad = W1 + EWabs + fabs(rgb[c][indx - 1] - rgb[c][indx - 3]);
535 const float E_Grad = E1 + EWabs + fabs(rgb[c][indx + 1] - rgb[c][indx + 3]);
536
537 // Cardinal colour differences
538 const float N_Est = rgb[c][indx - w1] - rgb1mw1;
539 const float S_Est = rgb[c][indx + w1] - rgb1pw1;
540 const float W_Est = rgb[c][indx - 1] - rgb1m1;
541 const float E_Est = rgb[c][indx + 1] - rgb1p1;
542
543 // Vertical and horizontal estimations
544 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
545 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
546
547 // R@G and B@G interpolation
548 rgb[c][indx] = rgb1 + intp(VH_Disc, H_Est, V_Est);
549 }
550 }
551 }
552
553 // For the outermost tiles in all directions we can use a smaller border margin
554 const int first_vertical = rowStart + ((tile_vertical == 0) ? RCD_MARGIN : RCD_BORDER);
555 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ? RCD_MARGIN : RCD_BORDER);
556 const int first_horizontal = colStart + ((tile_horizontal == 0) ? RCD_MARGIN : RCD_BORDER);
557 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ? RCD_MARGIN : RCD_BORDER);
558 for(int row = first_vertical; row < last_vertical; row++)
559 {
560 for(int col = first_horizontal, idx = (row - rowStart) * RCD_TILESIZE + col - colStart, o_idx = (row * width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
561 {
562 out[o_idx] = scaler * fmaxf(0.0f, rgb[0][idx]);
563 out[o_idx+1] = scaler * fmaxf(0.0f, rgb[1][idx]);
564 out[o_idx+2] = scaler * fmaxf(0.0f, rgb[2][idx]);
565 out[o_idx+3] = 0.0f;
566 }
567 }
568 }
569 }
576 }
577}
578
579#ifdef HAVE_OPENCL
580
581static int process_rcd_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe,
582 const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
583 const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
584 const gboolean smooth)
585{
588
589 const int devid = pipe->devid;
590
591 cl_mem dev_aux = NULL;
592 cl_mem dev_tmp = NULL;
593 cl_mem dev_green_eq = NULL;
594 cl_mem cfa = NULL;
595 cl_mem rgb0 = NULL;
596 cl_mem rgb1 = NULL;
597 cl_mem rgb2 = NULL;
598 cl_mem VH_dir = NULL;
599 cl_mem PQ_dir = NULL;
600 cl_mem VP_diff = NULL;
601 cl_mem HQ_diff = NULL;
602
603 cl_int err = -999;
604
605 int width = roi_out->width;
606 int height = roi_out->height;
607
608 // green equilibration
609 if(data->green_eq != DT_IOP_GREEN_EQ_NO)
610 {
611 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float));
612 if(dev_green_eq == NULL) goto error;
613 if(!green_equilibration_cl(self, pipe, piece, dev_in, dev_green_eq, roi_in)) goto error;
614 dev_in = dev_green_eq;
615 }
616
617 // need to reserve scaled auxiliary buffer or use dev_out
618 dev_aux = dev_out;
619
620 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
621 if(dev_tmp == NULL) goto error;
622
623 {
624 const int myborder = 3;
625 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
626 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 0, sizeof(cl_mem), &dev_in);
627 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 1, sizeof(cl_mem), &dev_tmp);
628 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 2, sizeof(int), (void *)&width);
629 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 3, sizeof(int), (void *)&height);
630 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
631 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 5, sizeof(int), (void *)&myborder);
633 if(err != CL_SUCCESS) goto error;
634 }
635
636 {
637 dt_opencl_local_buffer_t locopt
638 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
639 .cellsize = sizeof(float) * 1, .overhead = 0,
640 .sizex = 64, .sizey = 64 };
641
642 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_green, &locopt)) goto error;
643 const int myborder = 32;
644 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
645 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
646 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 0, sizeof(cl_mem), &dev_in);
647 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 1, sizeof(cl_mem), &dev_tmp);
648 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 2, sizeof(int), &width);
649 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 3, sizeof(int), &height);
650 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
651 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 5, sizeof(float) * (locopt.sizex + 2*3) * (locopt.sizey + 2*3), NULL);
652 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 6, sizeof(int), (void *)&myborder);
654 if(err != CL_SUCCESS) goto error;
655 }
656
657 {
658 dt_opencl_local_buffer_t locopt
659 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
660 .cellsize = 4 * sizeof(float), .overhead = 0,
661 .sizex = 64, .sizey = 64 };
662
663 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_redblue, &locopt)) goto error;
664 const int myborder = 16;
665 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
666 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
667 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 0, sizeof(cl_mem), &dev_tmp);
668 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 1, sizeof(cl_mem), &dev_aux);
669 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 2, sizeof(int), &width);
670 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 3, sizeof(int), &height);
671 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
672 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 5, sizeof(float) * 4 * (locopt.sizex + 2) * (locopt.sizey + 2), NULL);
673 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 6, sizeof(int), (void *)&myborder);
675 if(err != CL_SUCCESS) goto error;
676 }
678 dev_tmp = NULL;
679
680 cfa = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
681 if(cfa == NULL) goto error;
682 VH_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
683 if(VH_dir == NULL) goto error;
684 PQ_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
685 if(PQ_dir == NULL) goto error;
686 VP_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
687 if(VP_diff == NULL) goto error;
688 HQ_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
689 if(HQ_diff == NULL) goto error;
690 rgb0 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
691 if(rgb0 == NULL) goto error;
692 rgb1 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
693 if(rgb1 == NULL) goto error;
694 rgb2 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
695 if(rgb2 == NULL) goto error;
696
697 {
698 // populate data
699 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
700 const float scaler = 1.0f / fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
701 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 0, sizeof(cl_mem), &dev_in);
702 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 1, sizeof(cl_mem), &cfa);
703 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 2, sizeof(cl_mem), &rgb0);
704 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 3, sizeof(cl_mem), &rgb1);
705 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 4, sizeof(cl_mem), &rgb2);
706 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 5, sizeof(int), &width);
707 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 6, sizeof(int), &height);
708 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 7, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
709 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 8, sizeof(float), &scaler);
710 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_populate, sizes);
711 if(err != CL_SUCCESS) goto error;
712 }
713
714 {
715 // Step 1.1: Calculate a squared vertical and horizontal high pass filter on color differences
716 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
717 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 0, sizeof(cl_mem), &cfa);
718 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 1, sizeof(cl_mem), &VP_diff);
719 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 2, sizeof(cl_mem), &HQ_diff);
720 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 3, sizeof(int), &width);
721 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 4, sizeof(int), &height);
722 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_1_1, sizes);
723 if(err != CL_SUCCESS) goto error;
724 }
725
726 {
727 // Step 1.2: Calculate vertical and horizontal local discrimination
728 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
729 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 0, sizeof(cl_mem), &VH_dir);
730 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 1, sizeof(cl_mem), &VP_diff);
731 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 2, sizeof(cl_mem), &HQ_diff);
732 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 3, sizeof(int), &width);
733 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 4, sizeof(int), &height);
734 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_1_2, sizes);
735 if(err != CL_SUCCESS) goto error;
736 }
737
738 {
739 // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data
740 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
741 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 0, sizeof(cl_mem), &PQ_dir); // double-use PQ_dir also for lpf
742 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 1, sizeof(cl_mem), &cfa);
743 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 2, sizeof(int), &width);
744 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 3, sizeof(int), &height);
745 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
746 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_2_1, sizes);
747 if(err != CL_SUCCESS) goto error;
748 }
749
750 {
751 // Step 3.1: Populate the green channel at blue and red CFA positions
752 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
753 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 0, sizeof(cl_mem), &PQ_dir); // double-use PQ_dir also for lpf
754 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 1, sizeof(cl_mem), &cfa);
755 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 2, sizeof(cl_mem), &rgb1);
756 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 3, sizeof(cl_mem), &VH_dir);
757 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 4, sizeof(int), &width);
758 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 5, sizeof(int), &height);
759 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
760 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_3_1, sizes);
761 if(err != CL_SUCCESS) goto error;
762 }
763
764 {
765 // Step 4.1: Calculate a squared P/Q diagonals high pass filter on color differences
766 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
767 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 0, sizeof(cl_mem), &cfa);
768 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 1, sizeof(cl_mem), &VP_diff);
769 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 2, sizeof(cl_mem), &HQ_diff);
770 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 3, sizeof(int), &width);
771 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 4, sizeof(int), &height);
772 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 5, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
773 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_1, sizes);
774 if(err != CL_SUCCESS) goto error;
775 }
776
777 {
778 // Step 4.2: Calculate P/Q diagonal local discrimination
779 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
780 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 0, sizeof(cl_mem), &PQ_dir);
781 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 1, sizeof(cl_mem), &VP_diff);
782 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 2, sizeof(cl_mem), &HQ_diff);
783 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 3, sizeof(int), &width);
784 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 4, sizeof(int), &height);
785 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 5, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
786 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_2, sizes);
787 if(err != CL_SUCCESS) goto error;
788 }
789
790 {
791 // Step 4.3: Populate the red and blue channels at blue and red CFA positions
792 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
793 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 0, sizeof(cl_mem), &PQ_dir);
794 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 1, sizeof(cl_mem), &rgb0);
795 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 2, sizeof(cl_mem), &rgb1);
796 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 3, sizeof(cl_mem), &rgb2);
797 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 4, sizeof(int), &width);
798 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 5, sizeof(int), &height);
799 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
800 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_1, sizes);
801 if(err != CL_SUCCESS) goto error;
802 }
803
804 {
805 // Step 5.2: Populate the red and blue channels at green CFA positions
806 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
807 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 0, sizeof(cl_mem), &VH_dir);
808 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 1, sizeof(cl_mem), &rgb0);
809 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 2, sizeof(cl_mem), &rgb1);
810 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 3, sizeof(cl_mem), &rgb2);
811 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 4, sizeof(int), &width);
812 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 5, sizeof(int), &height);
813 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
814 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_2, sizes);
815 if(err != CL_SUCCESS) goto error;
816 }
817 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
818
819 {
820 // write output
821 const int myborder = 6;
822 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
823 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 0, sizeof(cl_mem), &dev_aux);
824 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 1, sizeof(cl_mem), &rgb0);
825 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 2, sizeof(cl_mem), &rgb1);
826 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 3, sizeof(cl_mem), &rgb2);
827 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 4, sizeof(int), &width);
828 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 5, sizeof(int), &height);
829 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 6, sizeof(float), &scaler);
830 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 7, sizeof(int), (void *)&myborder);
832 if(err != CL_SUCCESS) goto error;
833 }
834
843 dt_opencl_release_mem_object(dev_green_eq);
844 dev_green_eq = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
845
846 dt_dev_write_rawdetail_mask_cl(pipe, piece, dev_aux, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
847
848 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
849 dev_aux = NULL;
850
851 // color smoothing
852 if((data->color_smoothing) && smooth)
853 {
854 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
855 goto error;
856 }
857
858 return TRUE;
859
860error:
861 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
862 dt_opencl_release_mem_object(dev_green_eq);
872 dev_aux = dev_green_eq = dev_tmp = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
873 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] rcd couldn't enqueue kernel! %d\n", err);
874 return FALSE;
875}
876#endif //HAVE_OPENCL
877
878// revert rcd specific aggressive optimizing
879#ifdef __GNUC__
880 #pragma GCC pop_options
881#endif
882
883#undef RCD_BORDER
884#undef RCD_MARGIN
885#undef RCD_TILEVALID
886#undef w1
887#undef w2
888#undef w3
889#undef w4
890#undef eps
891#undef epssq
892//#undef RCD_TILESIZE
893
894
895// clang-format off
896// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
897// vim: shiftwidth=2 expandtab tabstop=2 cindent
898// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
899// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
#define m
Definition basecurve.c:277
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
#define INLINE
Definition cacorrect.c:167
static float intp(const float a, const float b, const float c)
Definition cacorrect.c:180
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
static const dt_colormatrix_t M
Definition colorspaces_inline_conversions.h:933
const float a
Definition colorspaces_inline_conversions.h:1292
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
static const int row
Definition colorspaces_inline_conversions.h:175
static dt_aligned_pixel_t rgb
Definition colorspaces_inline_conversions.h:530
void dt_control_log(const char *msg,...)
Definition control.c:530
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1530
#define DT_ALIGNED_PIXEL
Definition darktable.h:313
@ DT_DEBUG_OPENCL
Definition darktable.h:642
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:371
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
static int FC(const int row, const int col, const unsigned int filters)
Definition data/kernels/common.h:47
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:132
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:140
static const float x
Definition iop_profile.h:239
static float sqf(const float x)
Definition math.h:223
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:574
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:570
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:619
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:578
static INLINE float safe_in(float a, float scale)
Definition rcd.c:90
#define w2
Definition rcd.c:82
#define RCD_TILESIZE
Definition rcd.c:54
static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
Definition rcd.c:96
#define w1
Definition rcd.c:81
#define w4
Definition rcd.c:84
#define eps
Definition rcd.c:86
#define w3
Definition rcd.c:83
#define epssq
Definition rcd.c:87
static void rcd_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters)
Definition rcd.c:287
#define RCD_BORDER
Definition rcd.c:78
#define RCD_TILEVALID
Definition rcd.c:80
#define RCD_MARGIN
Definition rcd.c:79
Definition pixelpipe_hb.h:95
dt_iop_buffer_dsc_t dsc_in
Definition pixelpipe_hb.h:140
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:96
Definition pixelpipe_hb.h:216
dt_aligned_pixel_t processed_maximum
Definition develop/format.h:73
Definition demosaic.c:217
Definition demosaic.c:159
int kernel_rcd_step_5_1
Definition demosaic.c:206
int kernel_rcd_write_output
Definition demosaic.c:199
int kernel_rcd_step_1_1
Definition demosaic.c:200
int kernel_rcd_step_4_2
Definition demosaic.c:205
int kernel_rcd_step_5_2
Definition demosaic.c:207
int kernel_rcd_step_4_1
Definition demosaic.c:204
int kernel_rcd_border_redblue
Definition demosaic.c:208
int kernel_rcd_step_3_1
Definition demosaic.c:203
int kernel_rcd_step_1_2
Definition demosaic.c:201
int kernel_rcd_border_green
Definition demosaic.c:209
int kernel_rcd_step_2_1
Definition demosaic.c:202
int kernel_rcd_populate
Definition demosaic.c:198
int kernel_border_interpolate
Definition demosaic.c:172
Definition imageop.h:216
Definition imageop.h:67
int width
Definition imageop.h:68
int height
Definition imageop.h:68
#define c1
Definition colorspaces_inline_conversions.h:1054
#define MIN(a, b)
Definition thinplate.c:32