Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
rcd.c
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2020-2021 Hanno Schwalm.
4 Copyright (C) 2021 Marco.
5 Copyright (C) 2021 Pascal Obry.
6 Copyright (C) 2021 Ralf Brown.
7 Copyright (C) 2022 Martin Bařinka.
8 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
9
10 darktable is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 darktable is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with darktable. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24
25/*
26* RATIO CORRECTED DEMOSAICING
27* Luis Sanz Rodríguez (luis.sanz.rodriguez(at)gmail(dot)com)
28*
29* Release 2.3 @ 171125
30*
31* Original code from https://github.com/LuisSR/RCD-Demosaicing
32* Licensed under the GNU GPL version 3
33*/
34
35/*
36* The tiling coding was done by Ingo Weyrich (heckflosse67@gmx.de) from rawtherapee
37*/
38
39/*
40* Luis Sanz Rodríguez significantly optimised the v 2.3 code and simplified the directional
41* coefficients in an exact, shorter and more performant formula.
42* In cooperation with Ingo Weyrich and Luis Sanz Rodríguez this has been tuned for performance.
43* Hanno Schwalm (hanno@schwalm-bremen.de)
44*/
45
46/* Some notes about the algorithm
47* 1. The calculated data at the tiling borders RCD_BORDER must be at least 9 to be stable. Why does 8 **not** work?
48* 2. For the outermost tiles we only have to discard a 6 pixel border region interpolated otherwise.
49* 3. The tilesize has a significant influence on performance, the default is a good guess for modern
50* x86/64 machines, tested on Xeon E-2288G, i5-8250U.
51*/
52
53#ifndef RCD_TILESIZE
54 #define RCD_TILESIZE 112
55#endif
56
57/* We don't want to use the -Ofast option in dt as it effects are not well specified and there have been issues
58 leading to crashes.
59 But we can use the 'fast-math' option in code sections if input data and algorithms are well defined.
60
61 We have defined input data and make sure there are no divide-by-zero or overflows by chosen eps
62 Reordering of instructions might lead to a slight loss of presision whigh is not significant here.
63 Not necessary in this code section
64 threadsafe handling of errno
65 signed zero handling
66 handling of math interrupts
67 handling of rounding
68 handling of overflows
69
70 The 'fp-contract=fast' option enables fused multiply&add if available
71*/
72
73#define RCD_BORDER 9 // avoid tile-overlap errors
74#define RCD_MARGIN 6 // for the outermost tiles we can have a smaller outer border
75#define RCD_TILEVALID (RCD_TILESIZE - 2 * RCD_BORDER)
76#define w1 RCD_TILESIZE
77#define w2 (2 * RCD_TILESIZE)
78#define w3 (3 * RCD_TILESIZE)
79#define w4 (4 * RCD_TILESIZE)
80
81#define eps 1e-5f // Tolerance to avoid dividing by zero
82#define epssq 1e-10f
83
84// We might have negative data in input and also want to normalise
85static INLINE float safe_in(float a, float scale)
86{
87 return fmaxf(0.0f, a) * scale;
88}
89
92static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
93{
94 const int border = margin + 3;
95 // write approximatad 3-pixel border region to out
96 float sum[8];
97 __OMP_PARALLEL_FOR__(collapse(2))
98 for(int j = 0; j < height; j++)
99 {
100 for(int i = 0; i < width; i++)
101 {
102 if(i == 3 && j >= 3 && j < height - 3) i = width - 3;
103 memset(sum, 0, sizeof(float) * 8);
104 for(int y = j - 1; y != j + 2; y++)
105 {
106 for(int x = i - 1; x != i + 2; x++)
107 {
108 if((y >= 0) && (x >= 0) && (y < height) && (x < width))
109 {
110 const int f = FC(y, x, filters);
111 sum[f] += fmaxf(0.0f, in[(size_t)y * width + x]);
112 sum[f + 4]++;
113 }
114 }
115 }
116 const int f = FC(j, i, filters);
117 for(int c = 0; c < 3; c++)
118 {
119 if(c != f && sum[c + 4] > 0.0f)
120 out[4 * ((size_t)j * width + i) + c] = sum[c] / sum[c + 4];
121 else
122 out[4 * ((size_t)j * width + i) + c] = fmaxf(0.0f, in[(size_t)j * width + i]);
123 }
124 }
125 }
126
127
128 const float *input = in;
130 for(int j = 3; j < height - 3; j++)
131 {
132 float *buf = out + (size_t)4 * width * j + 4 * 3;
133 const float *buf_in = input + (size_t)width * j + 3;
134 for(int i = 3; i < width - 3; i++)
135 {
136 if(i == border && j >= border && j < height - border)
137 {
138 i = width - border;
139 buf = out + (size_t)4 * width * j + 4 * i;
140 buf_in = input + (size_t)width * j + i;
141 }
142 if(i == width) break;
143
144 const int c = FC(j, i, filters);
145 dt_aligned_pixel_t color;
146 const float pc = fmaxf(0.0f, buf_in[0]);
147 if(c == 0 || c == 2)
148 {
149 color[c] = pc;
150 const float pym = fmaxf(0.0f, buf_in[-width * 1]);
151 const float pym2 = fmaxf(0.0f, buf_in[-width * 2]);
152 const float pym3 = fmaxf(0.0f, buf_in[-width * 3]);
153 const float pyM = fmaxf(0.0f, buf_in[+width * 1]);
154 const float pyM2 = fmaxf(0.0f, buf_in[+width * 2]);
155 const float pyM3 = fmaxf(0.0f, buf_in[+width * 3]);
156 const float pxm = fmaxf(0.0f, buf_in[-1]);
157 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
158 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
159 const float pxM = fmaxf(0.0f, buf_in[+1]);
160 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
161 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
162
163 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
164 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
165 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
166 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
167 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
168 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
169 if(diffx > diffy)
170 {
171 // use guessy
172 const float m = fminf(pym, pyM);
173 const float M = fmaxf(pym, pyM);
174 color[1] = fmaxf(fminf(guessy * .25f, M), m);
175 }
176 else
177 {
178 const float m = fminf(pxm, pxM);
179 const float M = fmaxf(pxm, pxM);
180 color[1] = fmaxf(fminf(guessx * .25f, M), m);
181 }
182 }
183 else
184 color[1] = pc;
185
186 color[3] = 0.0f;
187 memcpy(buf, color, sizeof(float) * 4);
188 buf += 4;
189 buf_in++;
190 }
191 }
192
193
194 // for all pixels: interpolate colors into float array
196 for(int j = 1; j < height - 1; j++)
197 {
198 float *buf = out + (size_t)4 * width * j + 4;
199 for(int i = 1; i < width - 1; i++)
200 {
201 if(i == margin && j >= margin && j < height - margin)
202 {
203 i = width - margin;
204 buf = out + (size_t)4 * (width * j + i);
205 }
206 const int c = FC(j, i, filters);
207 dt_aligned_pixel_t color = { buf[0], buf[1], buf[2], buf[3] };
208 const int linesize = 4 * width;
209 // fill all four pixels with correctly interpolated stuff: r/b for green1/2
210 // b for r and r for b
211 if(__builtin_expect(c & 1, 1)) // c == 1 || c == 3)
212 {
213 // calculate red and blue for green pixels:
214 // need 4-nbhood:
215 const float *nt = buf - linesize;
216 const float *nb = buf + linesize;
217 const float *nl = buf - 4;
218 const float *nr = buf + 4;
219 if(FC(j, i + 1, filters) == 0) // red nb in same row
220 {
221 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
222 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
223 }
224 else
225 {
226 // blue nb
227 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
228 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
229 }
230 }
231 else
232 {
233 // get 4-star-nbhood:
234 const float *ntl = buf - 4 - linesize;
235 const float *ntr = buf + 4 - linesize;
236 const float *nbl = buf - 4 + linesize;
237 const float *nbr = buf + 4 + linesize;
238
239 if(c == 0)
240 {
241 // red pixel, fill blue:
242 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
243 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
244 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
245 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
246 if(diff1 > diff2)
247 color[2] = guess2 * .5f;
248 else if(diff1 < diff2)
249 color[2] = guess1 * .5f;
250 else
251 color[2] = (guess1 + guess2) * .25f;
252 }
253 else // c == 2, blue pixel, fill red:
254 {
255 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
256 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
257 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
258 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
259 if(diff1 > diff2)
260 color[0] = guess2 * .5f;
261 else if(diff1 < diff2)
262 color[0] = guess1 * .5f;
263 else
264 color[0] = (guess1 + guess2) * .25f;
265 }
266 }
267 memcpy(buf, color, sizeof(float) * 4);
268 buf += 4;
269 }
270 }
271
272}
273
274static void rcd_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out,
275 const dt_iop_roi_t *const roi_in, const uint32_t filters)
276{
277 const int width = roi_in->width;
278 const int height = roi_in->height;
279
280 if((width < 16) || (height < 16))
281 {
282 dt_control_log(_("[rcd_demosaic] too small area"));
283 return;
284 }
285
286 rcd_ppg_border(out, in, width, height, filters, RCD_MARGIN);
287
288 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
289 const float revscaler = 1.0f / scaler;
290
291 const int num_vertical = 1 + (height - 2 * RCD_BORDER -1) / RCD_TILEVALID;
292 const int num_horizontal = 1 + (width - 2 * RCD_BORDER -1) / RCD_TILEVALID;
293
294#ifdef _OPENMP
295 #pragma omp parallel
296#endif
297 {
298 // FIXME: CRITICAL: need to handle the case where we couldn't alloc the memory,
299 // but in a parallel section, that's not going to be trivial.
300 dt_fp_init(DT_FP_MODE_FAST);
301
303 // ensure that border elements which are read but never actually set below are zeroed out
304 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
307 float *P_CDiff_Hpf = dt_pixelpipe_cache_alloc_align_float_cache((size_t) RCD_TILESIZE * RCD_TILESIZE / 2, 0);
308 float *Q_CDiff_Hpf = dt_pixelpipe_cache_alloc_align_float_cache((size_t) RCD_TILESIZE * RCD_TILESIZE / 2, 0);
309
310 float (*rgb)[RCD_TILESIZE * RCD_TILESIZE] =
312
313 // No overlapping use so re-use same buffer
314 float *lpf = PQ_Dir;
315
316 // There has been a discussion about the schedule strategy, at least on the tested machines the
317 // dynamic scheduling seems to be slightly faster.
318#ifdef _OPENMP
319 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
320#endif
321 for(int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
322 {
323 for(int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
324 {
325 const int rowStart = tile_vertical * RCD_TILEVALID;
326 const int rowEnd = MIN(rowStart + RCD_TILESIZE, height);
327
328 const int colStart = tile_horizontal * RCD_TILEVALID;
329 const int colEnd = MIN(colStart + RCD_TILESIZE, width);
330
331 const int tileRows = MIN(rowEnd - rowStart, RCD_TILESIZE);
332 const int tileCols = MIN(colEnd - colStart, RCD_TILESIZE);
333
334 if (rowStart + RCD_TILESIZE > height || colStart + RCD_TILESIZE > width)
335 {
336 // VH_Dir is only filled for (4,4)..(height-4,width-4), but the refinement code reads (3,3)...(h-3,w-3),
337 // so we need to ensure that the border is zeroed for partial tiles to get consistent results
338 memset(VH_Dir, 0, sizeof(*VH_Dir) * RCD_TILESIZE * RCD_TILESIZE);
339 // TODO: figure out what part of rgb is being accessed without initialization on partial tiles
340 memset(rgb, 0, sizeof(float) * 3 * RCD_TILESIZE * RCD_TILESIZE);
341 }
342 // Step 0: fill data and make sure data are not negative.
343 for(int row = rowStart; row < rowEnd; row++)
344 {
345 const int c0 = FC(row, colStart, filters);
346 const int c1 = FC(row, colStart + 1, filters);
347 for(int col = colStart, indx = (row - rowStart) * RCD_TILESIZE, in_indx = row * width + colStart; col < colEnd; col++, indx++, in_indx++)
348 {
349 cfa[indx] = rgb[c0][indx] = rgb[c1][indx] = safe_in(in[in_indx], revscaler);
350 }
351 }
352
353 // STEP 1: Find vertical and horizontal interpolation directions
354 float bufferV[3][RCD_TILESIZE - 8];
355 // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter
356 for(int row = 3; row < MIN(tileRows - 3, 5); row++ )
357 {
358 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
359 {
360 bufferV[row - 3][col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
361 }
362 }
363
364 // Step 1.2: Obtain the vertical and horizontal directional discrimination strength
365 float DT_ALIGNED_PIXEL bufferH[RCD_TILESIZE];
366 // We start with V0, V1 and V2 pointing to row -1, row and row +1
367 // After row is processed V0 must point to the old V1, V1 must point to the old V2 and V2 must point to the old V0
368 // because the old V0 is not used anymore and will be filled with row + 1 data in next iteration
369 float* V0 = bufferV[0];
370 float* V1 = bufferV[1];
371 float* V2 = bufferV[2];
372 for(int row = 4; row < tileRows - 4; row++ )
373 {
374 for(int col = 3, indx = row * RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
375 {
376 bufferH[col - 3] = sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
377 }
378 for(int col = 4, indx = (row + 1) * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
379 {
380 V2[col - 4] = sqf((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.0f * (cfa[indx - w2] + cfa[indx + w2]) + 6.0f * cfa[indx]);
381 }
382 for(int col = 4, indx = row * RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
383 {
384 const float V_Stat = fmaxf(epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
385 const float H_Stat = fmaxf(epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
386 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
387 }
388 // rolling the line pointers
389 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
390 }
391
392 // STEP 2: Calculate the low pass filter
393 // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data
394 for(int row = 2; row < tileRows - 2; row++)
395 {
396 for(int col = 2 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
397 {
398 lpf[lp_indx] = cfa[indx]
399 + 0.5f * (cfa[indx - w1] + cfa[indx + w1] + cfa[indx - 1] + cfa[indx + 1])
400 + 0.25f * (cfa[indx - w1 - 1] + cfa[indx - w1 + 1] + cfa[indx + w1 - 1] + cfa[indx + w1 + 1]);
401 }
402 }
403
404 // STEP 3: Populate the green channel
405 // Step 3.1: Populate the green channel at blue and red CFA positions
406 for(int row = 4; row < tileRows - 4; row++)
407 {
408 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
409 {
410 const float cfai = cfa[indx];
411
412 // Cardinal gradients
413 const float N_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx - w2]) + fabs(cfa[indx - w1] - cfa[indx - w3]) + fabs(cfa[indx - w2] - cfa[indx - w4]);
414 const float S_Grad = eps + fabs(cfa[indx - w1] - cfa[indx + w1]) + fabs(cfai - cfa[indx + w2]) + fabs(cfa[indx + w1] - cfa[indx + w3]) + fabs(cfa[indx + w2] - cfa[indx + w4]);
415 const float W_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
416 const float E_Grad = eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
417
418 // Cardinal pixel estimations
419 const float lpfi = lpf[lpindx];
420 const float N_Est = cfa[indx - w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - w1]);
421 const float S_Est = cfa[indx + w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + w1]);
422 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - 1]);
423 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + 1]);
424
425 // Vertical and horizontal estimations
426 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
427 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
428
429 // G@B and G@R interpolation
430 // Refined vertical and horizontal local discrimination
431 const float VH_Central_Value = VH_Dir[indx];
432 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
433 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
434
435 rgb[1][indx] = intp(VH_Disc, H_Est, V_Est);
436 }
437 }
438
439 // STEP 4: Populate the red and blue channels
440
441 // Step 4.0: Calculate the square of the P/Q diagonals color difference high pass filter
442 for(int row = 3; row < tileRows - 3; row++)
443 {
444 for(int col = 3, indx = row * RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
445 {
446 P_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 - 3] - cfa[indx - w1 - 1] - cfa[indx + w1 + 1] + cfa[indx + w3 + 3]) - 3.0f * (cfa[indx - w2 - 2] + cfa[indx + w2 + 2]) + 6.0f * cfa[indx]);
447 Q_CDiff_Hpf[indx2] = sqf((cfa[indx - w3 + 3] - cfa[indx - w1 + 1] - cfa[indx + w1 - 1] + cfa[indx + w3 - 3]) - 3.0f * (cfa[indx - w2 + 2] + cfa[indx + w2 - 2]) + 6.0f * cfa[indx]);
448 }
449 }
450 // Step 4.1: Obtain the P/Q diagonals directional discrimination strength
451 for(int row = 4; row < tileRows - 4; row++)
452 {
453 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
454 {
455 const float P_Stat = fmaxf(epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
456 const float Q_Stat = fmaxf(epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
457 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
458 }
459 }
460
461 // Step 4.2: Populate the red and blue channels at blue and red CFA positions
462 for(int row = 4; row < tileRows - 4; row++)
463 {
464 for(int col = 4 + (FC(row, 0, filters) & 1), indx = row * RCD_TILESIZE + col, c = 2 - FC(row, col, filters), pqindx = indx / 2, pqindx2 = (indx - w1 - 1) / 2, pqindx3 = (indx + w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
465 {
466 // Refined P/Q diagonal local discrimination
467 const float PQ_Central_Value = PQ_Dir[pqindx];
468 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
469
470 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
471
472 // Diagonal gradients
473 const float NW_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx - w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 - 2]);
474 const float NE_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx - w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx - w2 + 2]);
475 const float SW_Grad = eps + fabs(rgb[c][indx - w1 + 1] - rgb[c][indx + w1 - 1]) + fabs(rgb[c][indx + w1 - 1] - rgb[c][indx + w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 - 2]);
476 const float SE_Grad = eps + fabs(rgb[c][indx - w1 - 1] - rgb[c][indx + w1 + 1]) + fabs(rgb[c][indx + w1 + 1] - rgb[c][indx + w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx + w2 + 2]);
477
478 // Diagonal colour differences
479 const float NW_Est = rgb[c][indx - w1 - 1] - rgb[1][indx - w1 - 1];
480 const float NE_Est = rgb[c][indx - w1 + 1] - rgb[1][indx - w1 + 1];
481 const float SW_Est = rgb[c][indx + w1 - 1] - rgb[1][indx + w1 - 1];
482 const float SE_Est = rgb[c][indx + w1 + 1] - rgb[1][indx + w1 + 1];
483
484 // P/Q estimations
485 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
486 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
487
488 // R@B and B@R interpolation
489 rgb[c][indx] = rgb[1][indx] + intp(PQ_Disc, Q_Est, P_Est);
490 }
491 }
492
493 // Step 4.3: Populate the red and blue channels at green CFA positions
494 for(int row = 4; row < tileRows - 4; row++)
495 {
496 for(int col = 4 + (FC(row, 1, filters) & 1), indx = row * RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
497 {
498 // Refined vertical and horizontal local discrimination
499 const float VH_Central_Value = VH_Dir[indx];
500 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx - w1 - 1] + VH_Dir[indx - w1 + 1] + VH_Dir[indx + w1 - 1] + VH_Dir[indx + w1 + 1]);
501 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
502 const float rgb1 = rgb[1][indx];
503 const float N1 = eps + fabs(rgb1 - rgb[1][indx - w2]);
504 const float S1 = eps + fabs(rgb1 - rgb[1][indx + w2]);
505 const float W1 = eps + fabs(rgb1 - rgb[1][indx - 2]);
506 const float E1 = eps + fabs(rgb1 - rgb[1][indx + 2]);
507
508 const float rgb1mw1 = rgb[1][indx - w1];
509 const float rgb1pw1 = rgb[1][indx + w1];
510 const float rgb1m1 = rgb[1][indx - 1];
511 const float rgb1p1 = rgb[1][indx + 1];
512
513 for(int c = 0; c <= 2; c += 2)
514 {
515 const float SNabs = fabs(rgb[c][indx - w1] - rgb[c][indx + w1]);
516 const float EWabs = fabs(rgb[c][indx - 1] - rgb[c][indx + 1]);
517
518 // Cardinal gradients
519 const float N_Grad = N1 + SNabs + fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
520 const float S_Grad = S1 + SNabs + fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);
521 const float W_Grad = W1 + EWabs + fabs(rgb[c][indx - 1] - rgb[c][indx - 3]);
522 const float E_Grad = E1 + EWabs + fabs(rgb[c][indx + 1] - rgb[c][indx + 3]);
523
524 // Cardinal colour differences
525 const float N_Est = rgb[c][indx - w1] - rgb1mw1;
526 const float S_Est = rgb[c][indx + w1] - rgb1pw1;
527 const float W_Est = rgb[c][indx - 1] - rgb1m1;
528 const float E_Est = rgb[c][indx + 1] - rgb1p1;
529
530 // Vertical and horizontal estimations
531 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
532 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
533
534 // R@G and B@G interpolation
535 rgb[c][indx] = rgb1 + intp(VH_Disc, H_Est, V_Est);
536 }
537 }
538 }
539
540 // For the outermost tiles in all directions we can use a smaller border margin
541 const int first_vertical = rowStart + ((tile_vertical == 0) ? RCD_MARGIN : RCD_BORDER);
542 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ? RCD_MARGIN : RCD_BORDER);
543 const int first_horizontal = colStart + ((tile_horizontal == 0) ? RCD_MARGIN : RCD_BORDER);
544 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ? RCD_MARGIN : RCD_BORDER);
545 for(int row = first_vertical; row < last_vertical; row++)
546 {
547 for(int col = first_horizontal, idx = (row - rowStart) * RCD_TILESIZE + col - colStart, o_idx = (row * width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
548 {
549 out[o_idx] = scaler * fmaxf(0.0f, rgb[0][idx]);
550 out[o_idx+1] = scaler * fmaxf(0.0f, rgb[1][idx]);
551 out[o_idx+2] = scaler * fmaxf(0.0f, rgb[2][idx]);
552 out[o_idx+3] = 0.0f;
553 }
554 }
555 }
556 }
563 }
564}
565
566#ifdef HAVE_OPENCL
567
568static int process_rcd_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe,
569 const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
570 const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
571 const gboolean smooth)
572{
575
576 const int devid = pipe->devid;
577
578 cl_mem dev_aux = NULL;
579 cl_mem dev_tmp = NULL;
580 cl_mem dev_green_eq = NULL;
581 cl_mem cfa = NULL;
582 cl_mem rgb0 = NULL;
583 cl_mem rgb1 = NULL;
584 cl_mem rgb2 = NULL;
585 cl_mem VH_dir = NULL;
586 cl_mem PQ_dir = NULL;
587 cl_mem VP_diff = NULL;
588 cl_mem HQ_diff = NULL;
589
590 cl_int err = -999;
591
592 int width = roi_out->width;
593 int height = roi_out->height;
594
595 // green equilibration
596 if(data->green_eq != DT_IOP_GREEN_EQ_NO)
597 {
598 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float));
599 if(IS_NULL_PTR(dev_green_eq)) goto error;
600 if(!green_equilibration_cl(self, pipe, piece, dev_in, dev_green_eq, roi_in)) goto error;
601 dev_in = dev_green_eq;
602 }
603
604 // need to reserve scaled auxiliary buffer or use dev_out
605 dev_aux = dev_out;
606
607 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
608 if(IS_NULL_PTR(dev_tmp)) goto error;
609
610 {
611 const int myborder = 3;
612 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
613 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 0, sizeof(cl_mem), &dev_in);
614 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 1, sizeof(cl_mem), &dev_tmp);
615 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 2, sizeof(int), (void *)&width);
616 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 3, sizeof(int), (void *)&height);
617 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
618 dt_opencl_set_kernel_arg(devid, gd->kernel_border_interpolate, 5, sizeof(int), (void *)&myborder);
620 if(err != CL_SUCCESS) goto error;
621 }
622
623 {
625 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
626 .cellsize = sizeof(float) * 1, .overhead = 0,
627 .sizex = 64, .sizey = 64 };
628
629 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_green, &locopt)) goto error;
630 const int myborder = 32;
631 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
632 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
633 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 0, sizeof(cl_mem), &dev_in);
634 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 1, sizeof(cl_mem), &dev_tmp);
635 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 2, sizeof(int), &width);
636 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 3, sizeof(int), &height);
637 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
638 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 5, sizeof(float) * (locopt.sizex + 2*3) * (locopt.sizey + 2*3), NULL);
639 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_green, 6, sizeof(int), (void *)&myborder);
641 if(err != CL_SUCCESS) goto error;
642 }
643
644 {
646 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
647 .cellsize = 4 * sizeof(float), .overhead = 0,
648 .sizex = 64, .sizey = 64 };
649
650 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_border_redblue, &locopt)) goto error;
651 const int myborder = 16;
652 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
653 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
654 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 0, sizeof(cl_mem), &dev_tmp);
655 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 1, sizeof(cl_mem), &dev_aux);
656 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 2, sizeof(int), &width);
657 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 3, sizeof(int), &height);
658 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
659 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 5, sizeof(float) * 4 * (locopt.sizex + 2) * (locopt.sizey + 2), NULL);
660 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_border_redblue, 6, sizeof(int), (void *)&myborder);
662 if(err != CL_SUCCESS) goto error;
663 }
665 dev_tmp = NULL;
666
667 cfa = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
668 if(IS_NULL_PTR(cfa)) goto error;
669 VH_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
670 if(IS_NULL_PTR(VH_dir)) goto error;
671 PQ_dir = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
672 if(IS_NULL_PTR(PQ_dir)) goto error;
673 VP_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
674 if(IS_NULL_PTR(VP_diff)) goto error;
675 HQ_diff = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
676 if(IS_NULL_PTR(HQ_diff)) goto error;
677 rgb0 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
678 if(IS_NULL_PTR(rgb0)) goto error;
679 rgb1 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
680 if(IS_NULL_PTR(rgb1)) goto error;
681 rgb2 = dt_opencl_alloc_device_buffer(devid, sizeof(float) * roi_in->width * roi_in->height);
682 if(IS_NULL_PTR(rgb2)) goto error;
683
684 {
685 // populate data
686 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
687 const float scaler = 1.0f / fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
688 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 0, sizeof(cl_mem), &dev_in);
689 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 1, sizeof(cl_mem), &cfa);
690 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 2, sizeof(cl_mem), &rgb0);
691 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 3, sizeof(cl_mem), &rgb1);
692 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 4, sizeof(cl_mem), &rgb2);
693 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 5, sizeof(int), &width);
694 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 6, sizeof(int), &height);
695 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 7, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
696 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_populate, 8, sizeof(float), &scaler);
697 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_populate, sizes);
698 if(err != CL_SUCCESS) goto error;
699 }
700
701 {
702 // Fuse the vertical/horizontal high-pass and discrimination stages so the intermediate
703 // directional filters stay tile-local instead of being written to global memory.
705 = (dt_opencl_local_buffer_t){ .xoffset = 8, .xfactor = 1, .yoffset = 8, .yfactor = 1,
706 .cellsize = sizeof(float), .overhead = 0,
707 .sizex = 64, .sizey = 64 };
708
709 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_rcd_step_1, &locopt)) goto error;
710 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
711 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
712 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1, 0, sizeof(cl_mem), &cfa);
713 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1, 1, sizeof(cl_mem), &VH_dir);
714 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1, 2, sizeof(int), &width);
715 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_1, 3, sizeof(int), &height);
717 sizeof(float) * (locopt.sizex + 8) * (locopt.sizey + 8), NULL);
718 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_rcd_step_1, sizes, local);
719 if(err != CL_SUCCESS) goto error;
720 }
721
722 {
723 // Reuse PQ_dir as the packed low-pass field consumed by step 3, then overwrite the same
724 // half-width buffer later with the diagonal discriminator used by step 5.
725 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
726 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 0, sizeof(cl_mem), &PQ_dir);
727 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 1, sizeof(cl_mem), &cfa);
728 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 2, sizeof(int), &width);
729 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 3, sizeof(int), &height);
730 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_2_1, 4, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
731 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_2_1, sizes);
732 if(err != CL_SUCCESS) goto error;
733 }
734
735 {
736 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
737 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 0, sizeof(cl_mem), &PQ_dir);
738 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 1, sizeof(cl_mem), &cfa);
739 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 2, sizeof(cl_mem), &rgb1);
740 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 3, sizeof(cl_mem), &VH_dir);
741 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 4, sizeof(int), &width);
742 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 5, sizeof(int), &height);
743 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_3_1, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
744 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_3_1, sizes);
745 if(err != CL_SUCCESS) goto error;
746 }
747
748 {
749 size_t sizes[3] = { ROUNDUPDWD(width / 2, devid), ROUNDUPDHT(height, devid), 1 };
750 // Reuse the full-size scratch pair for the diagonal high-pass fields, then collapse
751 // them into PQ_dir before the red/blue reconstruction reads that compact field.
752 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 0, sizeof(cl_mem), &cfa);
753 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 1, sizeof(cl_mem), &VP_diff);
754 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 2, sizeof(cl_mem), &HQ_diff);
755 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 3, sizeof(int), &width);
756 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 4, sizeof(int), &height);
757 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_1, 5, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
758 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_1, sizes);
759 if(err != CL_SUCCESS) goto error;
760
761 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 0, sizeof(cl_mem), &PQ_dir);
762 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 1, sizeof(cl_mem), &VP_diff);
763 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 2, sizeof(cl_mem), &HQ_diff);
764 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 3, sizeof(int), &width);
765 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 4, sizeof(int), &height);
766 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_4_2, 5, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
767 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_4_2, sizes);
768 if(err != CL_SUCCESS) goto error;
769
770 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 0, sizeof(cl_mem), &PQ_dir);
771 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 1, sizeof(cl_mem), &rgb0);
772 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 2, sizeof(cl_mem), &rgb1);
773 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 3, sizeof(cl_mem), &rgb2);
774 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 4, sizeof(int), &width);
775 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 5, sizeof(int), &height);
776 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_1, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
777 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_1, sizes);
778 if(err != CL_SUCCESS) goto error;
779
780 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 0, sizeof(cl_mem), &VH_dir);
781 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 1, sizeof(cl_mem), &rgb0);
782 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 2, sizeof(cl_mem), &rgb1);
783 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 3, sizeof(cl_mem), &rgb2);
784 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 4, sizeof(int), &width);
785 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 5, sizeof(int), &height);
786 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_step_5_2, 6, sizeof(uint32_t), (void *)&piece->dsc_in.filters);
787 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_rcd_step_5_2, sizes);
788 if(err != CL_SUCCESS) goto error;
789 }
790 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
791
792 {
793 // write output
794 const int myborder = 6;
795 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
796 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 0, sizeof(cl_mem), &dev_aux);
797 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 1, sizeof(cl_mem), &rgb0);
798 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 2, sizeof(cl_mem), &rgb1);
799 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 3, sizeof(cl_mem), &rgb2);
800 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 4, sizeof(int), &width);
801 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 5, sizeof(int), &height);
802 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 6, sizeof(float), &scaler);
803 dt_opencl_set_kernel_arg(devid, gd->kernel_rcd_write_output, 7, sizeof(int), (void *)&myborder);
805 if(err != CL_SUCCESS) goto error;
806 }
807
816 dt_opencl_release_mem_object(dev_green_eq);
817 dev_green_eq = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
818
819 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
820 dev_aux = NULL;
821
822 // color smoothing
823 if((data->color_smoothing) && smooth)
824 {
825 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
826 goto error;
827 }
828
829 return TRUE;
830
831error:
832 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
833 dt_opencl_release_mem_object(dev_green_eq);
843 dev_aux = dev_green_eq = dev_tmp = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
844 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] rcd couldn't enqueue kernel! %d\n", err);
845 return FALSE;
846}
847#endif //HAVE_OPENCL
848
849#undef RCD_BORDER
850#undef RCD_MARGIN
851#undef RCD_TILEVALID
852#undef w1
853#undef w2
854#undef w3
855#undef w4
856#undef eps
857#undef epssq
858//#undef RCD_TILESIZE
859
860
861// clang-format off
862// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
863// vim: shiftwidth=2 expandtab tabstop=2 cindent
864// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
865// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
#define m
Definition basecurve.c:277
static int green_equilibration_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in)
Definition basic.c:400
static int color_smoothing_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_out, const int passes)
Definition basic.c:334
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
#define INLINE
Definition cacorrect.c:167
static float intp(const float a, const float b, const float c)
Definition cacorrect.c:180
const float i
Definition colorspaces_inline_conversions.h:440
static dt_aligned_pixel_t rgb
Definition colorspaces_inline_conversions.h:344
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:102
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42
static const int row
Definition colorspaces_inline_conversions.h:35
static const dt_colormatrix_t M
Definition colorspaces_inline_conversions.h:682
void dt_control_log(const char *msg,...)
Definition control.c:530
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1448
#define DT_ALIGNED_PIXEL
Definition darktable.h:389
@ DT_DEBUG_OPENCL
Definition darktable.h:721
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:447
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static int FC(const int row, const int col, const unsigned int filters)
Definition data/kernels/common.h:47
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:135
@ DT_FP_MODE_FAST
Definition fp_mode.h:39
static const float x
Definition iop_profile.h:235
float dt_aligned_pixel_t[4]
Definition noiseprofile.c:28
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:2970
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:1951
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2359
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2286
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:1942
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:1957
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2198
#define ROUNDUP(a, n)
Definition opencl.h:78
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
static int process_rcd_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const gboolean smooth)
Definition rcd.c:568
static __DT_CLONE_TARGETS__ void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
Definition rcd.c:92
static INLINE float safe_in(float a, float scale)
Definition rcd.c:85
#define w2
Definition rcd.c:77
#define RCD_TILESIZE
Definition rcd.c:54
#define w1
Definition rcd.c:76
#define w4
Definition rcd.c:79
#define eps
Definition rcd.c:81
#define w3
Definition rcd.c:78
#define epssq
Definition rcd.c:82
static void rcd_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters)
Definition rcd.c:274
#define RCD_BORDER
Definition rcd.c:73
#define RCD_TILEVALID
Definition rcd.c:75
#define RCD_MARGIN
Definition rcd.c:74
Definition pixelpipe_hb.h:96
dt_iop_buffer_dsc_t dsc_in
Definition pixelpipe_hb.h:142
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:97
Definition pixelpipe_hb.h:218
int devid
Definition pixelpipe_hb.h:308
uint32_t filters
Definition format.h:60
dt_aligned_pixel_t processed_maximum
Definition format.h:85
Definition demosaic.c:228
uint32_t green_eq
Definition demosaic.c:229
uint32_t color_smoothing
Definition demosaic.c:230
Definition demosaic.c:162
int kernel_rcd_step_5_1
Definition demosaic.c:217
int kernel_rcd_write_output
Definition demosaic.c:211
int kernel_rcd_step_4_2
Definition demosaic.c:216
int kernel_rcd_step_5_2
Definition demosaic.c:218
int kernel_rcd_step_4_1
Definition demosaic.c:215
int kernel_rcd_border_redblue
Definition demosaic.c:219
int kernel_rcd_step_3_1
Definition demosaic.c:214
int kernel_rcd_border_green
Definition demosaic.c:220
int kernel_rcd_step_2_1
Definition demosaic.c:213
int kernel_rcd_populate
Definition demosaic.c:210
int kernel_rcd_step_1
Definition demosaic.c:212
int kernel_border_interpolate
Definition demosaic.c:175
Definition imageop.h:246
dt_iop_global_data_t * global_data
Definition imageop.h:316
Region of interest passed through the pixelpipe.
Definition imageop.h:72
int width
Definition imageop.h:73
int height
Definition imageop.h:73
Definition opencl.h:269
const int xoffset
Definition opencl.h:270
int sizey
Definition opencl.h:277
int sizex
Definition opencl.h:276
#define c1
Definition colorspaces_inline_conversions.h:795
#define MIN(a, b)
Definition thinplate.c:32