Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
rcd.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2010-2021 darktable developers.
4
5 darktable is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 darktable is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with darktable. If not, see <http://www.gnu.org/licenses/>.
17*/
18
19
20/*
21* RATIO CORRECTED DEMOSAICING
22* Luis Sanz Rodríguez (luis.sanz.rodriguez(at)gmail(dot)com)
23*
24* Release 2.3 @ 171125
25*
26* Original code from https://github.com/LuisSR/RCD-Demosaicing
27* Licensed under the GNU GPL version 3
28*/
29
30/*
31* The tiling coding was done by Ingo Weyrich (heckflosse67@gmx.de) from rawtherapee
32*/
33
34/*
35* Luis Sanz Rodríguez significantly optimised the v 2.3 code and simplified the directional
36* coefficients in an exact, shorter and more performant formula.
37* In cooperation with Ingo Weyrich and Luis Sanz Rodríguez this has been tuned for performance.
38* Hanno Schwalm (hanno@schwalm-bremen.de)
39*/
40
41/* Some notes about the algorithm
42* 1. The calculated data at the tiling borders RCD_BORDER must be at least 9 to be stable. Why does 8 **not** work?
43* 2. For the outermost tiles we only have to discard a 6 pixel border region interpolated otherwise.
44* 3. The tilesize has a significant influence on performance, the default is a good guess for modern
45* x86/64 machines, tested on Xeon E-2288G, i5-8250U.
46*/
47
48#ifndef RCD_TILESIZE
49 #define RCD_TILESIZE 112
50#endif
51
52/* We don't want to use the -Ofast option in dt as it effects are not well specified and there have been issues
53 leading to crashes.
54 But we can use the 'fast-math' option in code sections if input data and algorithms are well defined.
55
56 We have defined input data and make sure there are no divide-by-zero or overflows by chosen eps
57 Reordering of instructions might lead to a slight loss of presision whigh is not significant here.
58 Not necessary in this code section
59 threadsafe handling of errno
60 signed zero handling
61 handling of math interrupts
62 handling of rounding
63 handling of overflows
64
65 The 'fp-contract=fast' option enables fused multiply&add if available
66*/
67
68#ifdef __GNUC__
69 #pragma GCC push_options
70 #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
71#endif
72
73#define RCD_BORDER 9 // avoid tile-overlap errors
74#define RCD_MARGIN 6 // for the outermost tiles we can have a smaller outer border
75#define RCD_TILEVALID (RCD_TILESIZE - 2 * RCD_BORDER)
76#define w1 RCD_TILESIZE
77#define w2 (2 * RCD_TILESIZE)
78#define w3 (3 * RCD_TILESIZE)
79#define w4 (4 * RCD_TILESIZE)
80
81#define eps 1e-5f // Tolerance to avoid dividing by zero
82#define epssq 1e-10f
83
84// We might have negative data in input and also want to normalise
85static INLINE float safe_in(float a, float scale)
86{
87 return fmaxf(0.0f, a) * scale;
88}
89
91static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
92{
93 const int border = margin + 3;
94 // write approximatad 3-pixel border region to out
95 float sum[8];
96 for(int j = 0; j < height; j++)
97 {
98 for(int i = 0; i < width; i++)
99 {
100 if(i == 3 && j >= 3 && j < height - 3) i = width - 3;
101 if(i == width) break;
102 memset(sum, 0, sizeof(float) * 8);
103 for(int y = j - 1; y != j + 2; y++)
104 {
105 for(int x = i - 1; x != i + 2; x++)
106 {
107 if((y >= 0) && (x >= 0) && (y < height) && (x < width))
108 {
109 const int f = FC(y, x, filters);
110 sum[f] += fmaxf(0.0f, in[(size_t)y * width + x]);
111 sum[f + 4]++;
112 }
113 }
114 }
115 const int f = FC(j, i, filters);
116 for(int c = 0; c < 3; c++)
117 {
118 if(c != f && sum[c + 4] > 0.0f)
119 out[4 * ((size_t)j * width + i) + c] = sum[c] / sum[c + 4];
120 else
121 out[4 * ((size_t)j * width + i) + c] = fmaxf(0.0f, in[(size_t)j * width + i]);
122 }
123 }
124 }
125
126 const float *input = in;
127
128#ifdef _OPENMP
129#pragma omp parallel for default(none) \
130 dt_omp_firstprivate(filters, out, width, height, border) \
131 shared(input) \
132 schedule(static)
133#endif
134 for(int j = 3; j < height - 3; j++)
135 {
136 float *buf = out + (size_t)4 * width * j + 4 * 3;
137 const float *buf_in = input + (size_t)width * j + 3;
138 for(int i = 3; i < width - 3; i++)
139 {
140 if(i == border && j >= border && j < height - border)
141 {
142 i = width - border;
143 buf = out + (size_t)4 * width * j + 4 * i;
144 buf_in = input + (size_t)width * j + i;
145 }
146 if(i == width) break;
147
148 const int c = FC(j, i, filters);
149 dt_aligned_pixel_t color;
150 const float pc = fmaxf(0.0f, buf_in[0]);
151 if(c == 0 || c == 2)
152 {
153 color[c] = pc;
154 const float pym = fmaxf(0.0f, buf_in[-width * 1]);
155 const float pym2 = fmaxf(0.0f, buf_in[-width * 2]);
156 const float pym3 = fmaxf(0.0f, buf_in[-width * 3]);
157 const float pyM = fmaxf(0.0f, buf_in[+width * 1]);
158 const float pyM2 = fmaxf(0.0f, buf_in[+width * 2]);
159 const float pyM3 = fmaxf(0.0f, buf_in[+width * 3]);
160 const float pxm = fmaxf(0.0f, buf_in[-1]);
161 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
162 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
163 const float pxM = fmaxf(0.0f, buf_in[+1]);
164 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
165 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
166
167 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
168 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
169 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
170 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
171 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
172 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
173 if(diffx > diffy)
174 {
175 // use guessy
176 const float m = fminf(pym, pyM);
177 const float M = fmaxf(pym, pyM);
178 color[1] = fmaxf(fminf(guessy * .25f, M), m);
179 }
180 else
181 {
182 const float m = fminf(pxm, pxM);
183 const float M = fmaxf(pxm, pxM);
184 color[1] = fmaxf(fminf(guessx * .25f, M), m);
185 }
186 }
187 else
188 color[1] = pc;
189
190 color[3] = 0.0f;
191 memcpy(buf, color, sizeof(float) * 4);
192 buf += 4;
193 buf_in++;
194 }
195 }
196// for all pixels: interpolate colors into float array
197#ifdef _OPENMP
198#pragma omp parallel for default(none) \
199 dt_omp_firstprivate(filters, out, width, height, margin) \
200 schedule(static)
201#endif
202 for(int j = 1; j < height - 1; j++)
203 {
204 float *buf = out + (size_t)4 * width * j + 4;
205 for(int i = 1; i < width - 1; i++)
206 {
207 if(i == margin && j >= margin && j < height - margin)
208 {
209 i = width - margin;
210 buf = out + (size_t)4 * (width * j + i);
211 }
212 const int c = FC(j, i, filters);
213 dt_aligned_pixel_t color = { buf[0], buf[1], buf[2], buf[3] };
214 const int linesize = 4 * width;
215 // fill all four pixels with correctly interpolated stuff: r/b for green1/2
216 // b for r and r for b
217 if(__builtin_expect(c & 1, 1)) // c == 1 || c == 3)
218 {
219 // calculate red and blue for green pixels:
220 // need 4-nbhood:
221 const float *nt = buf - linesize;
222 const float *nb = buf + linesize;
223 const float *nl = buf - 4;
224 const float *nr = buf + 4;
225 if(FC(j, i + 1, filters) == 0) // red nb in same row
226 {
227 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
228 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
229 }
230 else
231 {
232 // blue nb
233 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
234 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
235 }
236 }
237 else
238 {
239 // get 4-star-nbhood:
240 const float *ntl = buf - 4 - linesize;
241 const float *ntr = buf + 4 - linesize;
242 const float *nbl = buf - 4 + linesize;
243 const float *nbr = buf + 4 + linesize;
244
245 if(c == 0)
246 {
247 // red pixel, fill blue:
248 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
249 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
250 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
251 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
252 if(diff1 > diff2)
253 color[2] = guess2 * .5f;
254 else if(diff1 < diff2)
255 color[2] = guess1 * .5f;
256 else
257 color[2] = (guess1 + guess2) * .25f;
258 }
259 else // c == 2, blue pixel, fill red:
260 {
261 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
262 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
263 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
264 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
265 if(diff1 > diff2)
266 color[0] = guess2 * .5f;
267 else if(diff1 < diff2)
268 color[0] = guess1 * .5f;
269 else
270 color[0] = (guess1 + guess2) * .25f;
271 }
272 }
273 memcpy(buf, color, sizeof(float) * 4);
274 buf += 4;
275 }
276 }
277}
278
279#ifdef _OPENMP
280 #pragma omp declare simd aligned(in, out)
281#endif
282static void rcd_demosaic(dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out,
283 const dt_iop_roi_t *const roi_in, const uint32_t filters)
284{
285 const int width = roi_in->width;
286 const int height = roi_in->height;
287
288 if((width < 16) || (height < 16))
289 {
290 dt_control_log(_("[rcd_demosaic] too small area"));
291 return;
292 }
293
294 rcd_ppg_border(out, in, width, height, filters, RCD_MARGIN);
295
296 const float scaler = fmaxf(piece->pipe->dsc.processed_maximum[0], fmaxf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2]));
297 const float revscaler = 1.0f / scaler;
298
299 const int num_vertical = 1 + (height - 2 * RCD_BORDER -1) / RCD_TILEVALID;
300 const int num_horizontal = 1 + (width - 2 * RCD_BORDER -1) / RCD_TILEVALID;
301
302#ifdef _OPENMP
303 #pragma omp parallel \
304 dt_omp_firstprivate(width, height, filters, out, in, scaler, revscaler)
305#endif
306 {
307 // FIXME: CRITICAL: need to handle the case where we couldn't alloc the memory,
308 // but in a parallel section, that's not going to be trivial.
309
310 float *const VH_Dir = dt_alloc_align_float((size_t) RCD_TILESIZE * RCD_TILESIZE);
311 // ensure that border elements which are read but never actually set below are zeroed out
312 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
313 float *const PQ_Dir = dt_alloc_align_float((size_t) RCD_TILESIZE * RCD_TILESIZE / 2);
314 float *const cfa = dt_alloc_align_float((size_t) RCD_TILESIZE * RCD_TILESIZE);
315 float *const P_CDiff_Hpf = dt_alloc_align_float((size_t) RCD_TILESIZE * RCD_TILESIZE / 2);
316 float *const Q_CDiff_Hpf = dt_alloc_align_float((size_t) RCD_TILESIZE * RCD_TILESIZE / 2);
317
318 float (*const rgb)[RCD_TILESIZE * RCD_TILESIZE] = (void *)dt_alloc_align_float((size_t)3 * RCD_TILESIZE * RCD_TILESIZE);
319
320 // No overlapping use so re-use same buffer
321 float *const lpf = PQ_Dir;
322
323 // There has been a discussion about the schedule strategy, at least on the tested machines the
324 // dynamic scheduling seems to be slightly faster.
325#ifdef _OPENMP
326 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
327#endif
328 for(int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
329 {
330 for(int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
331 {
332 const int rowStart = tile_vertical * RCD_TILEVALID;
333 const int rowEnd = MIN(rowStart + RCD_TILESIZE, height);
334
335 const int colStart = tile_horizontal * RCD_TILEVALID;
336 const int colEnd = MIN(colStart + RCD_TILESIZE, width);
337
338 const int tileRows = MIN(rowEnd - rowStart, RCD_TILESIZE);
339 const int tileCols = MIN(colEnd - colStart, RCD_TILESIZE);
340
341 if (rowStart + RCD_TILESIZE > height || colStart + RCD_TILESIZE > width)
342 {
343 // VH_Dir is only filled for (4,4)..(height-4,width-4), but the refinement code reads (3,3)...(h-3,w-3),
344 // so we need to ensure that the border is zeroed for partial tiles to get consistent results
345 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
346 // TODO: figure out what part of rgb is being accessed without initialization on partial tiles
347 memset(rgb, 0, sizeof(float) * 3 * RCD_TILESIZE * RCD_TILESIZE);
348 }
349 // Step 0: fill data and make sure data are not negative.
350 for(int row = rowStart; row < rowEnd; row++)
351 {
352 const int c0 = FC(row, colStart, filters);
353 const int c1 = FC(row, colStart + 1, filters);
354 for(int col = colStart, indx = (row - rowStart) * RCD_TILESIZE, in_indx = row * width + colStart; col < colEnd; col++, indx++, in_indx++)
355 {
356 cfa[indx] = rgb[c0][indx] = rgb[c1][indx] = safe_in(in[in_indx], revscaler);
357 }
358 }
359
360 // STEP 1: Find vertical and horizontal interpolation directions
361 float bufferV[3][RCD_TILESIZE - 8];
362 // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter
363 for(int row = 3; row < MIN(tileRows - 3, 5); row++ )
364 {
365 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
366 {
367 bufferV[row - 3][col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
368 }
369 }
370
371 // Step 1.2: Obtain the vertical and horizontal directional discrimination strength
372 float DT_ALIGNED_PIXEL bufferH[RCD_TILESIZE];
373 // We start with V0, V1 and V2 pointing to row -1, row and row +1
374 // After row is processed V0 must point to the old V1, V1 must point to the old V2 and V2 must point to the old V0
375 // because the old V0 is not used anymore and will be filled with row + 1 data in next iteration
376 float* V0 = bufferV[0];
377 float* V1 = bufferV[1];
378 float* V2 = bufferV[2];
379 for(int row = 4; row < tileRows - 4; row++ )
380 {
381 for(int col = 3, indx = row * RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
382 {
383 bufferH[col - 3] = sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
384 }
385 for(int col = 4, indx = (row + 1) * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
386 {
387 V2[col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
388 }
389 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
390 {
391 const float V_Stat = fmaxf(epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
392 const float H_Stat = fmaxf(epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
393 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
394 }
395 // rolling the line pointers
396 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
397 }
398
399 // STEP 2: Calculate the low pass filter
400 // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data
401 for(int row = 2; row < tileRows - 2; row++)
402 {
403 for(int col = 2 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
404 {
405 lpf[lp_indx] = cfa[indx]
406 + 0.5f * (cfa[indx - w1] + cfa[indx + w1] + cfa[indx - 1] + cfa[indx + 1])
407 + 0.25f * (cfa[indx - w1 - 1] + cfa[indx - w1 + 1] + cfa[indx + w1 - 1] + cfa[indx + w1 + 1]);
408 }
409 }
410
411 // STEP 3: Populate the green channel
412 // Step 3.1: Populate the green channel at blue and red CFA positions
413 for(int row = 4; row < tileRows - 4; row++)
414 {
415 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
416 {
417 const float cfai = cfa[indx];
418
419 // Cardinal gradients
420 const float N_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx - w2]) + fabs(cfa[indx - w1] - cfa[indx - w3]) + fabs(cfa[indx - w2] - cfa[indx - w4]);
421 const float S_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx + w2]) + fabs(cfa[indx + w1] - cfa[indx + w3]) + fabs(cfa[indx + w2] - cfa[indx + w4]);
422 const float W_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
423 const float E_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
424
425 // Cardinal pixel estimations
426 const float lpfi = lpf[lpindx];
427 const float N_Est = cfa[indx - w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - w1]);
428 const float S_Est = cfa[indx + w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + w1]);
429 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - 1]);
430 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + 1]);
431
432 // Vertical and horizontal estimations
433 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
434 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
435
436 // G@B and G@R interpolation
437 // Refined vertical and horizontal local discrimination
438 const float VH_Central_Value = VH_Dir[indx];
439 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
440 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
441
442 rgb[1][indx] = intp(VH_Disc, H_Est, V_Est);
443 }
444 }
445
446 // STEP 4: Populate the red and blue channels
447
448 // Step 4.0: Calculate the square of the P/Q diagonals color difference high pass filter
449 for(int row = 3; row < tileRows - 3; row++)
450 {
451 for(int col = 3, indx = row * RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
452 {
453 P_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 - 3] - cfa[indx - w1 - 1] - cfa[indx + w1 + 1] + cfa[indx + w3 + 3]) - 3.0f * (cfa[indx - w2 - 2] + cfa[indx + w2 + 2]) + 6.0f * cfa[indx]);
454 Q_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 + 3] - cfa[indx - w1 + 1] - cfa[indx + w1 - 1] + cfa[indx + w3 - 3]) - 3.0f * (cfa[indx - w2 + 2] + cfa[indx + w2 - 2]) + 6.0f * cfa[indx]);
455 }
456 }
457 // Step 4.1: Obtain the P/Q diagonals directional discrimination strength
458 for(int row = 4; row < tileRows - 4; row++)
459 {
460 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
461 {
462 const float P_Stat = fmaxf(epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
463 const float Q_Stat = fmaxf(epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
464 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
465 }
466 }
467
468 // Step 4.2: Populate the red and blue channels at blue and red CFA positions
469 for(int row = 4; row < tileRows - 4; row++)
470 {
471 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, c = 2 - FC(row, col, filters), pqindx = indx / 2, pqindx2 = (indx - w1 - 1) / 2, pqindx3 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
472 {
473 // Refined P/Q diagonal local discrimination
474 const float PQ_Central_Value = PQ_Dir[pqindx];
475 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
476
477 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
478
479 // Diagonal gradients
480 const float NW_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx - w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 - 2]);
481 const float NE_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx - w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 + 2]);
482 const float SW_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx + w1 - 1] - rgb[c][indx + w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 - 2]);
483 const float SE_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx + w1 + 1] - rgb[c][indx + w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 + 2]);
484
485 // Diagonal colour differences
486 const float NW_Est = rgb[c][indx - w1 - 1] - rgb[1][indx - w1 - 1];
487 const float NE_Est = rgb[c][indx - w1 + 1] - rgb[1][indx - w1 + 1];
488 const float SW_Est = rgb[c][indx + w1 - 1] - rgb[1][indx + w1 - 1];
489 const float SE_Est = rgb[c][indx + w1 + 1] - rgb[1][indx + w1 + 1];
490
491 // P/Q estimations
492 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
493 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
494
495 // R@B and B@R interpolation
496 rgb[c][indx] = rgb[1][indx] + intp(PQ_Disc, Q_Est, P_Est);
497 }
498 }
499
500 // Step 4.3: Populate the red and blue channels at green CFA positions
501 for(int row = 4; row < tileRows - 4; row++)
502 {
503 for(int col = 4 + (FC(row, 1, filters) & 1), indx = row * RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
504 {
505 // Refined vertical and horizontal local discrimination
506 const float VH_Central_Value = VH_Dir[indx];
507 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
508 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
509 const float rgb1 = rgb[1][indx];
510 const float N1 = eps + fabs(rgb1 - rgb[1][indx - w2]);
511 const float S1 = eps + fabs(rgb1 - rgb[1][indx + w2]);
512 const float W1 = eps + fabs(rgb1 - rgb[1][indx - 2]);
513 const float E1 = eps + fabs(rgb1 - rgb[1][indx + 2]);
514
515 const float rgb1mw1 = rgb[1][indx - w1];
516 const float rgb1pw1 = rgb[1][indx + w1];
517 const float rgb1m1 = rgb[1][indx - 1];
518 const float rgb1p1 = rgb[1][indx + 1];
519
520 for(int c = 0; c <= 2; c += 2)
521 {
522 const float SNabs = fabs(rgb[c][indx - w1] - rgb[c][indx + w1]);
523 const float EWabs = fabs(rgb[c][indx - 1] - rgb[c][indx + 1]);
524
525 // Cardinal gradients
526 const float N_Grad = N1 + SNabs + fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
527 const float S_Grad = S1 + SNabs + fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);
528 const float W_Grad = W1 + EWabs + fabs(rgb[c][indx - 1] - rgb[c][indx - 3]);
529 const float E_Grad = E1 + EWabs + fabs(rgb[c][indx + 1] - rgb[c][indx + 3]);
530
531 // Cardinal colour differences
532 const float N_Est = rgb[c][indx - w1] - rgb1mw1;
533 const float S_Est = rgb[c][indx + w1] - rgb1pw1;
534 const float W_Est = rgb[c][indx - 1] - rgb1m1;
535 const float E_Est = rgb[c][indx + 1] - rgb1p1;
536
537 // Vertical and horizontal estimations
538 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
539 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
540
541 // R@G and B@G interpolation
542 rgb[c][indx] = rgb1 + intp(VH_Disc, H_Est, V_Est);
543 }
544 }
545 }
546
547 // For the outermost tiles in all directions we can use a smaller border margin
548 const int first_vertical = rowStart + ((tile_vertical == 0) ? RCD_MARGIN : RCD_BORDER);
549 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ? RCD_MARGIN : RCD_BORDER);
550 const int first_horizontal = colStart + ((tile_horizontal == 0) ? RCD_MARGIN : RCD_BORDER);
551 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ? RCD_MARGIN : RCD_BORDER);
552 for(int row = first_vertical; row < last_vertical; row++)
553 {
554 for(int col = first_horizontal, idx = (row - rowStart) * RCD_TILESIZE + col - colStart, o_idx = (row * width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
555 {
556 out[o_idx] = scaler * fmaxf(0.0f, rgb[0][idx]);
557 out[o_idx+1] = scaler * fmaxf(0.0f, rgb[1][idx]);
558 out[o_idx+2] = scaler * fmaxf(0.0f, rgb[2][idx]);
559 out[o_idx+3] = 0.0f;
560 }
561 }
562 }
563 }
564 dt_free_align(cfa);
565 dt_free_align(rgb);
566 dt_free_align(VH_Dir);
567 dt_free_align(PQ_Dir);
568 dt_free_align(P_CDiff_Hpf);
569 dt_free_align(Q_CDiff_Hpf);
570 }
571}
572
573#ifdef HAVE_OPENCL
574
575static int process_rcd_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in,
576 cl_mem dev_out, const dt_iop_roi_t *const roi_in,
577 const dt_iop_roi_t *const roi_out, const gboolean smooth)
578{
581 const dt_image_t *img = &self->dev->image_storage;
582
583 const int devid = piece->pipe->devid;
584 const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
585
586 cl_mem dev_aux = NULL;
587 cl_mem dev_tmp = NULL;
588 cl_mem dev_green_eq = NULL;
589 cl_mem cfa = NULL;
590 cl_mem rgb0 = NULL;
591 cl_mem rgb1 = NULL;
592 cl_mem rgb2 = NULL;
593 cl_mem VH_dir = NULL;
594 cl_mem PQ_dir = NULL;
595 cl_mem VP_diff = NULL;
596 cl_mem HQ_diff = NULL;
597
598 cl_int err = -999;
599
600 if(qual_flags & DEMOSAIC_FULL_SCALE)
601 {
602 // Full demosaic and then scaling if needed
603 const int scaled = (roi_out->width != roi_in->width || roi_out->height != roi_in->height);
604
605 int width = roi_out->width;
606 int height = roi_out->height;
607
608 // green equilibration
609 if(data->green_eq != DT_IOP_GREEN_EQ_NO)
610 {
611 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float));
612 if(dev_green_eq == NULL) goto error;
613 if(!green_equilibration_cl(self, piece, dev_in, dev_green_eq, roi_in)) goto error;
614 dev_in = dev_green_eq;
615 }
616
617 // need to reserve scaled auxiliary buffer or use dev_out
618 if(scaled)
619 {
620 dev_aux = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
621 if(dev_aux == NULL) goto error;
622 width = roi_in->width;
623 height = roi_in->height;
624 }
625 else
626 dev_aux = dev_out;
627
628 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
629 if(dev_tmp == NULL) goto error;
630
631 {
632 const int myborder = 3;
633 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
634 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 0, sizeof(cl_mem), &dev_in);
635 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 1, sizeof(cl_mem), &dev_tmp);
636 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 2, sizeof(int), (void *)&width);
637 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 3, sizeof(int), (void *)&height);
638 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 4, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
639 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 5, sizeof(int), (void *)&myborder);
641 if(err != CL_SUCCESS) goto error;
642 }
643
644 {
645 dt_opencl_local_buffer_t locopt
646 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
647 .cellsize = sizeof(float) * 1, .overhead = 0,
648 .sizex = 64, .sizey = 64 };
649
650 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_green, &locopt)) goto error;
651 const int myborder = 32;
652 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
653 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
654 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 0, sizeof(cl_mem), &dev_in);
655 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 1, sizeof(cl_mem), &dev_tmp);
656 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 2, sizeof(int), &width);
657 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 3, sizeof(int), &height);
658 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 4, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
659 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 5, sizeof(float) * (locopt.sizex + 2*3) * (locopt.sizey + 2*3), NULL);
660 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 6, sizeof(int), (void *)&myborder);
662 if(err != CL_SUCCESS) goto error;
663 }
664
665 {
666 dt_opencl_local_buffer_t locopt
667 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
668 .cellsize = 4 * sizeof(float), .overhead = 0,
669 .sizex = 64, .sizey = 64 };
670
671 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_redblue, &locopt)) goto error;
672 const int myborder = 16;
673 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
674 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
675 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 0, sizeof(cl_mem), &dev_tmp);
676 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 1, sizeof(cl_mem), &dev_aux);
677 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 2, sizeof(int), &width);
678 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 3, sizeof(int), &height);
679 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 4, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
680 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 5, sizeof(float) * 4 * (locopt.sizex + 2) * (locopt.sizey + 2), NULL);
681 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 6, sizeof(int), (void *)&myborder);
683 if(err != CL_SUCCESS) goto error;
684 }
686 dev_tmp = NULL;
687
688 cfa = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
689 if(cfa == NULL) goto error;
690 VH_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
691 if(VH_dir == NULL) goto error;
692 PQ_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
693 if(PQ_dir == NULL) goto error;
694 VP_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
695 if(VP_diff == NULL) goto error;
696 HQ_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
697 if(HQ_diff == NULL) goto error;
698 rgb0 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
699 if(rgb0 == NULL) goto error;
700 rgb1 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
701 if(rgb1 == NULL) goto error;
702 rgb2 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
703 if(rgb2 == NULL) goto error;
704
705 {
706 // populate data
707 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
708 const float scaler = 1.0f / fmaxf(piece->pipe->dsc.processed_maximum[0], fmaxf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2]));
709 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 0, sizeof(cl_mem), &dev_in);
710 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 1, sizeof(cl_mem), &cfa);
711 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 2, sizeof(cl_mem), &rgb0);
712 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 3, sizeof(cl_mem), &rgb1);
713 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 4, sizeof(cl_mem), &rgb2);
714 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 5, sizeof(int), &width);
715 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 6, sizeof(int), &height);
716 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 7, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
717 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 8, sizeof(float), &scaler);
718 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_populate, sizes);
719 if(err != CL_SUCCESS) goto error;
720 }
721
722 {
723 // Step 1.1: Calculate a squared vertical and horizontal high pass filter on color differences
724 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
725 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 0, sizeof(cl_mem), &cfa);
726 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 1, sizeof(cl_mem), &VP_diff);
727 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 2, sizeof(cl_mem), &HQ_diff);
728 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 3, sizeof(int), &width);
729 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_1, 4, sizeof(int), &height);
730 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_1_1, sizes);
731 if(err != CL_SUCCESS) goto error;
732 }
733
734 {
735 // Step 1.2: Calculate vertical and horizontal local discrimination
736 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
737 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 0, sizeof(cl_mem), &VH_dir);
738 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 1, sizeof(cl_mem), &VP_diff);
739 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 2, sizeof(cl_mem), &HQ_diff);
740 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 3, sizeof(int), &width);
741 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1_2, 4, sizeof(int), &height);
742 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_1_2, sizes);
743 if(err != CL_SUCCESS) goto error;
744 }
745
746 {
747 // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data
748 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
749 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 0, sizeof(cl_mem), &PQ_dir); // double-use PQ_dir also for lpf
750 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 1, sizeof(cl_mem), &cfa);
751 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 2, sizeof(int), &width);
752 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 3, sizeof(int), &height);
753 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 4, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
754 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_2_1, sizes);
755 if(err != CL_SUCCESS) goto error;
756 }
757
758 {
759 // Step 3.1: Populate the green channel at blue and red CFA positions
760 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
761 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 0, sizeof(cl_mem), &PQ_dir); // double-use PQ_dir also for lpf
762 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 1, sizeof(cl_mem), &cfa);
763 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 2, sizeof(cl_mem), &rgb1);
764 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 3, sizeof(cl_mem), &VH_dir);
765 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 4, sizeof(int), &width);
766 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 5, sizeof(int), &height);
767 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 6, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
768 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_3_1, sizes);
769 if(err != CL_SUCCESS) goto error;
770 }
771
772 {
773 // Step 4.1: Calculate a squared P/Q diagonals high pass filter on color differences
774 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
775 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 0, sizeof(cl_mem), &cfa);
776 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 1, sizeof(cl_mem), &VP_diff);
777 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 2, sizeof(cl_mem), &HQ_diff);
778 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 3, sizeof(int), &width);
779 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 4, sizeof(int), &height);
780 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 5, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
781 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_1, sizes);
782 if(err != CL_SUCCESS) goto error;
783 }
784
785 {
786 // Step 4.2: Calculate P/Q diagonal local discrimination
787 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
788 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 0, sizeof(cl_mem), &PQ_dir);
789 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 1, sizeof(cl_mem), &VP_diff);
790 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 2, sizeof(cl_mem), &HQ_diff);
791 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 3, sizeof(int), &width);
792 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 4, sizeof(int), &height);
793 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 5, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
794 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_2, sizes);
795 if(err != CL_SUCCESS) goto error;
796 }
797
798 {
799 // Step 4.3: Populate the red and blue channels at blue and red CFA positions
800 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
801 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 0, sizeof(cl_mem), &PQ_dir);
802 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 1, sizeof(cl_mem), &rgb0);
803 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 2, sizeof(cl_mem), &rgb1);
804 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 3, sizeof(cl_mem), &rgb2);
805 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 4, sizeof(int), &width);
806 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 5, sizeof(int), &height);
807 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 6, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
808 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_1, sizes);
809 if(err != CL_SUCCESS) goto error;
810 }
811
812 {
813 // Step 5.2: Populate the red and blue channels at green CFA positions
814 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
815 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 0, sizeof(cl_mem), &VH_dir);
816 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 1, sizeof(cl_mem), &rgb0);
817 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 2, sizeof(cl_mem), &rgb1);
818 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 3, sizeof(cl_mem), &rgb2);
819 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 4, sizeof(int), &width);
820 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 5, sizeof(int), &height);
821 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 6, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
822 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_2, sizes);
823 if(err != CL_SUCCESS) goto error;
824 }
825 const float scaler = fmaxf(piece->pipe->dsc.processed_maximum[0], fmaxf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2]));
826
827 {
828 // write output
829 const int myborder = 6;
830 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
831 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 0, sizeof(cl_mem), &dev_aux);
832 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 1, sizeof(cl_mem), &rgb0);
833 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 2, sizeof(cl_mem), &rgb1);
834 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 3, sizeof(cl_mem), &rgb2);
835 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 4, sizeof(int), &width);
836 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 5, sizeof(int), &height);
837 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 6, sizeof(float), &scaler);
838 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 7, sizeof(int), (void *)&myborder);
840 if(err != CL_SUCCESS) goto error;
841 }
842
851 dt_opencl_release_mem_object(dev_green_eq);
852 dev_green_eq = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
853
854 dt_dev_write_rawdetail_mask_cl(piece, dev_aux, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
855
856 if(scaled)
857 {
858 // scale aux buffer to output buffer
859 err = dt_iop_clip_and_zoom_roi_cl(devid, dev_out, dev_aux, roi_out, roi_in);
860 if(err != CL_SUCCESS) goto error;
861 }
862 }
863 else
864 {
865 // sample half-size image:
866 const int zero = 0;
867 cl_mem dev_pix = dev_in;
868 const int width = roi_out->width;
869 const int height = roi_out->height;
870
871 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
872 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 0, sizeof(cl_mem), &dev_pix);
873 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 1, sizeof(cl_mem), &dev_out);
874 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 2, sizeof(int), &width);
875 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 3, sizeof(int), &height);
876 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 4, sizeof(int), (void *)&zero);
877 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 5, sizeof(int), (void *)&zero);
878 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 6, sizeof(int), (void *)&roi_in->width);
879 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 7, sizeof(int), (void *)&roi_in->height);
880 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 8, sizeof(float), (void *)&roi_out->scale);
881 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 9, sizeof(uint32_t), (void *)&piece->pipe->dsc.filters);
882 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_zoom_half_size, sizes);
883 if(err != CL_SUCCESS) goto error;
884 }
885
886 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
887 dev_aux = NULL;
888
889 // color smoothing
890 if((data->color_smoothing) && smooth)
891 {
892 if(!color_smoothing_cl(self, piece, dev_out, dev_out, roi_out, data->color_smoothing))
893 goto error;
894 }
895
896 return TRUE;
897
898error:
899 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
900 dt_opencl_release_mem_object(dev_green_eq);
910 dev_aux = dev_green_eq = dev_tmp = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
911 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] rcd couldn't enqueue kernel! %d\n", err);
912 return FALSE;
913}
914#endif //HAVE_OPENCL
915
916// revert rcd specific aggressive optimizing
917#ifdef __GNUC__
918 #pragma GCC pop_options
919#endif
920
921#undef RCD_BORDER
922#undef RCD_MARGIN
923#undef RCD_TILEVALID
924#undef w1
925#undef w2
926#undef w3
927#undef w4
928#undef eps
929#undef epssq
930//#undef RCD_TILESIZE
931
932
933// clang-format off
934// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
935// vim: shiftwidth=2 expandtab tabstop=2 cindent
936// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
937// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:191
#define TRUE
Definition ashift_lsd.c:151
#define FALSE
Definition ashift_lsd.c:147
#define m
Definition basecurve.c:231
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
#define INLINE
Definition cacorrect.c:128
static float intp(const float a, const float b, const float c)
Definition cacorrect.c:141
void dt_control_log(const char *msg,...)
Definition control.c:424
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1395
#define DT_ALIGNED_PIXEL
Definition darktable.h:271
@ DT_DEBUG_OPENCL
Definition darktable.h:478
static float * dt_alloc_align_float(size_t pixels)
Definition darktable.h:345
#define dt_free_align(A)
Definition darktable.h:334
static int FC(const int row, const int col, const unsigned int filters)
Definition data/kernels/common.h:43
@ DEMOSAIC_FULL_SCALE
Definition demosaic.c:103
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:93
static int demosaic_qual_flags(const dt_dev_pixelpipe_iop_t *const piece, const dt_image_t *const img, const dt_iop_roi_t *const roi_out)
Definition demosaic.c:239
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:110
static float f(const float t, const float c, const float x)
Definition graduatednd.c:173
static float sqf(const float x)
Definition math.h:215
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:560
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:556
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:601
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:564
static INLINE float safe_in(float a, float scale)
Definition rcd.c:85
#define w2
Definition rcd.c:77
#define RCD_TILESIZE
Definition rcd.c:49
static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
Definition rcd.c:91
static void rcd_demosaic(dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters)
Definition rcd.c:282
#define w1
Definition rcd.c:76
#define w4
Definition rcd.c:79
#define eps
Definition rcd.c:81
#define w3
Definition rcd.c:78
#define epssq
Definition rcd.c:82
#define RCD_BORDER
Definition rcd.c:73
#define RCD_TILEVALID
Definition rcd.c:75
#define RCD_MARGIN
Definition rcd.c:74
Definition pixelpipe_hb.h:46
struct dt_iop_module_t *struct dt_dev_pixelpipe_t * pipe
Definition pixelpipe_hb.h:48
void * data
Definition pixelpipe_hb.h:49
Definition common/image.h:195
Definition demosaic.c:187
Definition demosaic.c:129
int kernel_rcd_step_5_1
Definition demosaic.c:176
int kernel_rcd_write_output
Definition demosaic.c:169
int kernel_zoom_half_size
Definition demosaic.c:140
int kernel_rcd_step_1_1
Definition demosaic.c:170
int kernel_rcd_step_4_2
Definition demosaic.c:175
int kernel_rcd_step_5_2
Definition demosaic.c:177
int kernel_rcd_step_4_1
Definition demosaic.c:174
int kernel_rcd_border_redblue
Definition demosaic.c:178
int kernel_rcd_step_3_1
Definition demosaic.c:173
int kernel_rcd_step_1_2
Definition demosaic.c:171
int kernel_rcd_border_green
Definition demosaic.c:179
int kernel_rcd_step_2_1
Definition demosaic.c:172
int kernel_rcd_populate
Definition demosaic.c:168
int kernel_border_interpolate
Definition demosaic.c:142
Definition imageop.h:182
Definition imageop.h:32
int width
Definition imageop.h:33
int height
Definition imageop.h:33
#define c1
#define MIN(a, b)
Definition thinplate.c:23