Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
markesteijn.c
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
4
5 Ansel is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 Ansel is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
17*/
18
19//
20// x-trans specific demosaicing algorithms
21//
22
23// xtrans_interpolate adapted from dcraw 9.20
24
25#define SQR(x) ((x) * (x))
26// tile size, optimized to keep data in L2 cache
27#define TS 122
28
30static inline const short * hexmap(const int row, const int col, short (*const allhex)[3][8])
31{
32 // Row and column offsets may be negative, but C's modulo function
33 // is not useful here with a negative dividend. To be safe, add a
34 // fairly large multiple of 3. In current code row and col will
35 // never be less than -9 (1-pass) or -14 (3-pass).
36 int irow = row + 600;
37 int icol = col + 600;
38 assert(irow >= 0 && icol >= 0);
39 return allhex[irow % 3][icol % 3];
40}
41
42
43/*
44 Frank Markesteijn's algorithm for Fuji X-Trans sensors
45*/
47static void xtrans_markesteijn_interpolate(float *out, const float *const in,
48 const dt_iop_roi_t *const roi_out,
49 const dt_iop_roi_t *const roi_in,
50 const uint8_t (*const xtrans)[6], const int passes)
51{
52 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
53 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
54 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
55 dir[4] = { 1, TS, TS + 1, TS - 1 };
56
57 short allhex[3][3][8];
58 // sgrow/sgcol is the offset in the sensor matrix of the solitary
59 // green pixels (initialized here only to avoid compiler warning)
60 unsigned short sgrow = 0, sgcol = 0;
61
62 const int width = roi_out->width;
63 const int height = roi_out->height;
64 const int ndir = 4 << (passes > 1);
65
66 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 3) * sizeof(float);
67 size_t padded_buffer_size;
68 char *const all_buffers = (char *)dt_pixelpipe_cache_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
69 if(IS_NULL_PTR(all_buffers))
70 {
71 printf("[demosaic] not able to allocate Markesteijn buffers\n");
72 return;
73 }
74
75 /* Map a green hexagon around each non-green pixel and vice versa: */
76 for(int row = 0; row < 3; row++)
77 for(int col = 0; col < 3; col++)
78 for(int ng = 0, d = 0; d < 10; d += 2)
79 {
80 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
81 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
82 ng = 0;
83 else
84 ng++;
85 // if there are four non-green pixels adjacent in cardinal
86 // directions, this is the solitary green pixel
87 if(ng == 4)
88 {
89 sgrow = row;
90 sgcol = col;
91 }
92 if(ng == g + 1)
93 for(int c = 0; c < 8; c++)
94 {
95 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
96 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
97 // offset within TSxTS buffer
98 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
99 }
100 }
101
102 // extra passes propagates out errors at edges, hence need more padding
103 const int pad_tile = (passes == 1) ? 12 : 17;
105 // step through TSxTS cells of image, each tile overlapping the
106 // prior as interpolation needs a substantial border
107 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile*2))
108 {
109 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
110 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
111 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
112 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
113 // note that channels come before tiles to allow for a
114 // vectorization optimization when building drv[] from yuv[]
115 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
116 // drv points to ndir TSxTS tiles, each a single channel of derivatives
117 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
118 // gmin and gmax reuse memory which is used later by yuv buffer;
119 // each points to a TSxTS tile of single channel data
120 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
121 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
122 // homo and homosum reuse memory which is used earlier in the
123 // loop; each points to ndir single-channel TSxTS tiles
124 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
125 uint8_t (*const homosum)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float)
126 + TS * TS * ndir * sizeof(uint8_t));
127
128 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile*2))
129 {
130 int mrow = MIN(top + TS, height + pad_tile);
131 int mcol = MIN(left + TS, width + pad_tile);
132
133 // Copy current tile from in to image buffer. If border goes
134 // beyond edges of image, fill with mirrored/interpolated edges.
135 // The extra border avoids discontinuities at image edges.
136 for(int row = top; row < mrow; row++)
137 for(int col = left; col < mcol; col++)
138 {
139 float(*const pix) = rgb[0][row - top][col - left];
140 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
141 {
142 const int f = FCxtrans(row, col, roi_in, xtrans);
143 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
144 }
145 else
146 {
147 // mirror a border pixel if beyond image edge
148 const int c = FCxtrans(row, col, roi_in, xtrans);
149 for(int cc = 0; cc < 3; cc++)
150 {
151 if(cc != c)
152 pix[cc] = 0.0f;
153 else
154 {
155#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
156 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
157 if(c == FCxtrans(cy, cx, roi_in, xtrans))
158 pix[c] = in[roi_in->width * cy + cx];
159 else
160 {
161 // interpolate if mirror pixel is a different color
162 float sum = 0.0f;
163 uint8_t count = 0;
164 for(int y = row - 1; y <= row + 1; y++)
165 for(int x = col - 1; x <= col + 1; x++)
166 {
167 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
168 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
169 if(ff == c)
170 {
171 sum += in[roi_in->width * yy + xx];
172 count++;
173 }
174 }
175 pix[c] = sum / count;
176 }
177 }
178 }
179 }
180 }
181
182 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
183 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
184
185 // note that successive calculations are inset within the tile
186 // so as to give enough border data, and there needs to be a 6
187 // pixel border initially to allow allhex to find neighboring
188 // pixels
189
190 /* Set green1 and green3 to the minimum and maximum allowed values: */
191 // Run through each red/blue or blue/red pair, setting their g1
192 // and g3 values to the min/max of green pixels surrounding the
193 // pair. Use a 3 pixel border as gmin/gmax is used by
194 // interpolate green which has a 3 pixel border.
195 const int pad_g1_g3 = 3;
196 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
197 {
198 // setting max to 0.0f signifies that this is a new pair, which
199 // requires a new min/max calculation of its neighboring greens
200 float min = FLT_MAX, max = 0.0f;
201 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
202 {
203 // if in row of horizontal red & blue pairs (or processing
204 // vertical red & blue pairs near image bottom), reset min/max
205 // between each pair
206 if(FCxtrans(row, col, roi_in, xtrans) == 1)
207 {
208 min = FLT_MAX, max = 0.0f;
209 continue;
210 }
211 // if at start of red & blue pair, calculate min/max of green
212 // pixels surrounding it; note that while normally using == to
213 // compare floats is suspect, here the check is if 0.0f has
214 // explicitly been assigned to max (which signifies a new
215 // red/blue pair)
216 if(max == 0.0f)
217 {
218 float (*const pix)[3] = &rgb[0][row - top][col - left];
219 const short *const hex = hexmap(row,col,allhex);
220 for(int c = 0; c < 6; c++)
221 {
222 const float val = pix[hex[c]][1];
223 if(min > val) min = val;
224 if(max < val) max = val;
225 }
226 }
227 gmin[row - top][col - left] = min;
228 gmax[row - top][col - left] = max;
229 // handle vertical red/blue pairs
230 switch((row - sgrow) % 3)
231 {
232 // hop down a row to second pixel in vertical pair
233 case 1:
234 if(row < mrow - 4) row++, col--;
235 break;
236 // then if not done with the row hop up and right to next
237 // vertical red/blue pair, resetting min/max
238 case 2:
239 min = FLT_MAX, max = 0.0f;
240 if((col += 2) < mcol - 4 && row > top + 3) row--;
241 }
242 }
243 }
244
245 /* Interpolate green horizontally, vertically, and along both diagonals: */
246 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
247 const int pad_g_interp = 3;
248 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
249 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
250 {
251 float color[8];
252 const int f = FCxtrans(row, col, roi_in, xtrans);
253 if(f == 1) continue;
254 float (*const pix)[3] = &rgb[0][row - top][col - left];
255 const short *const hex = hexmap(row,col,allhex);
256 // TODO: these constants come from integer math constants in
257 // dcraw -- calculate them instead from interpolation math
258 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
259 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
260 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
261 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
262 for(int c = 0; c < 2; c++)
263 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
264 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
265 for(int c = 0; c < 4; c++)
266 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
267 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
268 }
269
270 for(int pass = 0; pass < passes; pass++)
271 {
272 if(pass == 1)
273 {
274 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7],
275 // and process that second set of buffers
276 memcpy(rgb + 4, rgb, sizeof(*rgb) * 4);
277 rgb += 4;
278 }
279
280 /* Recalculate green from interpolated values of closer pixels: */
281 if(pass)
282 {
283 const int pad_g_recalc = 6;
284 for(int row = top + pad_g_recalc; row < mrow - pad_g_recalc; row++)
285 for(int col = left + pad_g_recalc; col < mcol - pad_g_recalc; col++)
286 {
287 const int f = FCxtrans(row, col, roi_in, xtrans);
288 if(f == 1) continue;
289 const short *const hex = hexmap(row,col,allhex);
290 for(int d = 3; d < 6; d++)
291 {
292 float(*rfx)[3] = &rgb[(d - 2) ^ !((row - sgrow) % 3)][row - top][col - left];
293 const float val = rfx[-2 * hex[d]][1]
294 + 2 * rfx[hex[d]][1] - rfx[-2 * hex[d]][f]
295 - 2 * rfx[hex[d]][f] + 3 * rfx[0][f];
296 rfx[0][1] = CLAMPS(val / 3.0f, gmin[row - top][col - left], gmax[row - top][col - left]);
297 }
298 }
299 }
300
301 /* Interpolate red and blue values for solitary green pixels: */
302 const int pad_rb_g = (passes == 1) ? 6 : 5;
303 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
304 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
305 {
306 float(*rfx)[3] = &rgb[0][row - top][col - left];
307 int h = FCxtrans(row, col + 1, roi_in, xtrans);
308 float diff[6] = { 0.0f };
309 // interplated color: first index is red/blue, second is
310 // pass, is double actual result
311 float color[2][6];
312 // Six passes, alternating hori/vert interp (i),
313 // starting with R or B (h) depending on which is closest.
314 // Passes 0,1 to rgb[0], rgb[1] of hori/vert interp. Pass
315 // 3,5 to rgb[2], rgb[3] of best of interp hori/vert
316 // results. Each pass which outputs moves on to the next
317 // rgb[] for input of interp greens.
318 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
319 {
320 // look 1 and 2 pixels distance from solitary green to
321 // red then blue or blue then red
322 for(int c = 0; c < 2; c++, h ^= 2)
323 {
324 // rate of change in greens between current pixel and
325 // interpolated pixels 1 or 2 distant: a quick
326 // derivative which will be divided by two later to be
327 // rate of luminance change for red/blue between known
328 // red/blue neighbors and the current unknown pixel
329 const float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
330 // color is halved before being stored in rgb, hence
331 // this becomes green rate of change plus the average
332 // of the near red or blue pixels on current axis
333 color[h != 0][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
334 // Note that diff will become the slope for both red
335 // and blue differentials in the current direction.
336 // For 2nd and 3rd hori+vert passes, create a sum of
337 // steepness for both cardinal directions.
338 if(d > 1)
339 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h])
340 + SQR(g);
341 }
342 if((d < 2) || (d & 1))
343 { // output for passes 0, 1, 3, 5
344 // for 0, 1 just use hori/vert, for 3, 5 use best of x/y dir
345 const int d_out = d - ((d > 1) && (diff[d-1] < diff[d]));
346 rfx[0][0] = color[0][d_out] / 2.f;
347 rfx[0][2] = color[1][d_out] / 2.f;
348 rfx += TS * TS;
349 }
350 }
351 }
352
353 /* Interpolate red for blue pixels and vice versa: */
354 const int pad_rb_br = (passes == 1) ? 6 : 5;
355 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
356 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
357 {
358 const int f = 2 - FCxtrans(row, col, roi_in, xtrans);
359 if(f == 1) continue;
360 float(*rfx)[3] = &rgb[0][row - top][col - left];
361 const int c = (row - sgrow) % 3 ? TS : 1;
362 const int h = 3 * (c ^ TS ^ 1);
363 for(int d = 0; d < 4; d++, rfx += TS * TS)
364 {
365 const int i = d > 1 || ((d ^ c) & 1) ||
366 ((fabsf(rfx[0][1]-rfx[c][1]) + fabsf(rfx[0][1]-rfx[-c][1])) <
367 2.f*(fabsf(rfx[0][1]-rfx[h][1]) + fabsf(rfx[0][1]-rfx[-h][1]))) ? c:h;
368 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
369 }
370 }
371
372 /* Fill in red and blue for 2x2 blocks of green: */
373 const int pad_g22 = (passes == 1) ? 8 : 4;
374 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
375 {
376 if((row - sgrow) % 3)
377 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
378 if((col - sgcol) % 3)
379 {
380 float(*rfx)[3] = &rgb[0][row - top][col - left];
381 const short *const hex = hexmap(row,col,allhex);
382 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
383 if(hex[d] + hex[d + 1])
384 {
385 const float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
386 for(int c = 0; c < 4; c += 2)
387 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
388 }
389 else
390 {
391 const float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
392 for(int c = 0; c < 4; c += 2)
393 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
394 }
395 }
396 }
397 } // end of multipass loop
398
399 // jump back to the first set of rgb buffers (this is a nop
400 // unless on the second pass)
401 rgb = (float(*)[TS][TS][3])buffer;
402 // from here on out, mainly are working within the current tile
403 // rather than in reference to the image, so don't offset
404 // mrow/mcol by top/left of tile
405 mrow -= top;
406 mcol -= left;
407
408 /* Convert to perceptual colorspace and differentiate in all directions: */
409 // Original dcraw algorithm uses CIELab as perceptual space
410 // (presumably coming from original AHD) and converts taking
411 // camera matrix into account. Now use YPbPr which requires much
412 // less code and is nearly indistinguishable. It assumes the
413 // camera RGB is roughly linear.
414 for(int d = 0; d < ndir; d++)
415 {
416 const int pad_yuv = (passes == 1) ? 8 : 13;
417 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
418 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
419 {
420 const float *rx = rgb[d][row][col];
421 // use ITU-R BT.2020 YPbPr, which is great, but could use
422 // a better/simpler choice? note that imageop.h provides
423 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
424 // which appears less good with specular highlights
425 const float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
426 yuv[0][row][col] = y;
427 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
428 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
429 }
430 // Note that f can offset by a column (-1 or +1) and by a row
431 // (-TS or TS). The row-wise offsets cause the undefined
432 // behavior sanitizer to warn of an out of bounds index, but
433 // as yfx is multi-dimensional and there is sufficient
434 // padding, that is not actually so.
435 const int f = dir[d & 3];
436 const int pad_drv = (passes == 1) ? 9 : 14;
437 for(int row = pad_drv; row < mrow - pad_drv; row++)
438 for(int col = pad_drv; col < mcol - pad_drv; col++)
439 {
440 const float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
441 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
442 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
443 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
444 }
445 }
446
447 /* Build homogeneity maps from the derivatives: */
448 memset_zero(homo, sizeof(uint8_t) * ndir * TS * TS);
449 const int pad_homo = (passes == 1) ? 10 : 15;
450 for(int row = pad_homo; row < mrow - pad_homo; row++)
451 for(int col = pad_homo; col < mcol - pad_homo; col++)
452 {
453 float tr = FLT_MAX;
454 for(int d = 0; d < ndir; d++)
455 if(tr > drv[d][row][col]) tr = drv[d][row][col];
456 tr *= 8;
457 for(int d = 0; d < ndir; d++)
458 for(int v = -1; v <= 1; v++)
459 for(int h = -1; h <= 1; h++)
460 homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
461 }
462
463 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
464 for(int d = 0; d < ndir; d++)
465 for(int row = pad_tile; row < mrow - pad_tile; row++)
466 {
467 // start before first column where homo[d][row][col+2] != 0,
468 // so can know v5sum and homosum[d][row][col] will be 0
469 int col = pad_tile-5;
470 uint8_t v5sum[5] = { 0 };
471 homosum[d][row][col] = 0;
472 // calculate by rolling through column sums
473 for(col++; col < mcol - pad_tile; col++)
474 {
475 uint8_t colsum = 0;
476 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
477 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
478 v5sum[col % 5] = colsum;
479 }
480 }
481
482 /* Average the most homogeneous pixels for the final result: */
483 for(int row = pad_tile; row < mrow - pad_tile; row++)
484 for(int col = pad_tile; col < mcol - pad_tile; col++)
485 {
486 uint8_t hm[8] = { 0 };
487 uint8_t maxval = 0;
488 for(int d = 0; d < ndir; d++)
489 {
490 hm[d] = homosum[d][row][col];
491 maxval = (maxval < hm[d] ? hm[d] : maxval);
492 }
493 maxval -= maxval >> 3;
494 for(int d = 0; d < ndir - 4; d++)
495 {
496 if(hm[d] < hm[d + 4])
497 hm[d] = 0;
498 else if(hm[d] > hm[d + 4])
499 hm[d + 4] = 0;
500 }
501 dt_aligned_pixel_t avg = { 0.0f };
502 for(int d = 0; d < ndir; d++)
503 {
504 if(hm[d] >= maxval)
505 {
506 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
507 avg[3]++;
508 }
509 }
510 for(int c = 0; c < 3; c++)
511 out[4 * (width * (row + top) + col + left) + c] = avg[c]/avg[3];
512 }
513 }
514 }
515
516
518}
519
520#undef TS
521
522#define TS 122
524static void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in,
525 const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in,
526 const uint8_t (*const xtrans)[6])
527{
528
529 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
530 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
531 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
532 dir[4] = { 1, TS, TS + 1, TS - 1 };
533
534 static const float directionality[8] = { 1.0f, 0.0f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f };
535
536 short allhex[3][3][8];
537 // sgrow/sgcol is the offset in the sensor matrix of the solitary
538 // green pixels (initialized here only to avoid compiler warning)
539 unsigned short sgrow = 0, sgcol = 0;
540
541 const int width = roi_out->width;
542 const int height = roi_out->height;
543 static const int ndir = 4;
544
545 static const float complex Minv[3][8] = {
546 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
547 -1.000000e+00f, 7.500000e-01f - 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
548 7.500000e-01f + 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
549 { 1.000000e+00f, -2.000000e-01f + 3.464102e-01f * _Complex_I, 2.000000e-01f + 3.464102e-01f * _Complex_I,
550 8.000000e-01f, 0.0f, 2.000000e-01f - 3.464102e-01f * _Complex_I, 0.0f,
551 -2.000000e-01f - 3.464102e-01f * _Complex_I },
552 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
553 -1.000000e+00f, -7.500000e-01f + 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
554 -7.500000e-01f - 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
555 };
556
557 static const float complex modarr[6][6][8] = {
558 { { 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
559 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
560 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
561 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
562 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
563 -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
564 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
565 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
566 { 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
567 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
568 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
569 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
570 { -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
571 -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f - 1.224647e-16f * _Complex_I,
572 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
573 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
574 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
575 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
576 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
577 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
578 { -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
579 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
580 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
581 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
582 { { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
583 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
584 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
585 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
586 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
587 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
588 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
589 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
590 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
591 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
592 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
593 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
594 { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
595 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
596 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
597 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
598 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
599 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
600 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
601 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
602 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
603 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
604 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
605 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
606 { { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
607 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
608 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
609 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
610 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
611 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
612 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
613 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
614 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
615 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
616 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
617 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
618 { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
619 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
620 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
621 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
622 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
623 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
624 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
625 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
626 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
627 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
628 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
629 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 1.133108e-15f * _Complex_I } },
630 { { -1.000000e+00f + 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
631 -1.000000e+00f - 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
632 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
633 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
634 { 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
635 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
636 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
637 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
638 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
639 -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
640 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
641 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
642 { 1.000000e+00f + 2.449294e-16f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
643 1.000000e+00f + 4.898587e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
644 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
645 1.000000e+00f - 4.898587e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
646 { -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
647 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
648 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
649 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
650 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
651 1.000000e+00f + 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
652 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
653 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
654 { { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
655 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
656 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
657 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
658 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
659 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
660 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
661 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
662 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
663 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
664 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
665 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
666 { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
667 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
668 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
669 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
670 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
671 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
672 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
673 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
674 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
675 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
676 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
677 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
678 { { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
679 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
680 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
681 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
682 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
683 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
684 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
685 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
686 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
687 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
688 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
689 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 1.133108e-15f * _Complex_I },
690 { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 7.347881e-16f * _Complex_I,
691 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
692 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
693 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
694 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
695 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
696 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
697 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
698 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
699 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
700 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
701 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I } },
702 };
703
704 static const float complex harr[4][13][13]
705 = { { { 1.326343e-03f - 1.299441e-18f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
706 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f + 9.920263e-19f * _Complex_I,
707 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
708 1.225183e-03f - 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
709 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f + 7.085902e-19f * _Complex_I,
710 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
711 1.326343e-03f - 6.497206e-19f * _Complex_I },
712 { -1.980815e-03f + 1.698059e-18f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
713 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f - 1.378892e-18f * _Complex_I,
714 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
715 -3.273971e-03f + 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
716 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f - 9.192611e-19f * _Complex_I,
717 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
718 -1.980815e-03f + 7.277398e-19f * _Complex_I },
719 { 1.457023e-03f - 1.070603e-18f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
720 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f + 1.633884e-18f * _Complex_I,
721 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
722 7.074895e-03f - 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
723 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f + 9.803302e-19f * _Complex_I,
724 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
725 1.457023e-03f - 3.568678e-19f * _Complex_I },
726 { -1.017660e-03f + 6.231370e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
727 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f - 1.812375e-18f * _Complex_I,
728 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
729 -1.391431e-02f + 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
730 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f - 9.061876e-19f * _Complex_I,
731 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
732 -1.017660e-03f + 1.246274e-19f * _Complex_I },
733 { 9.198983e-04f - 4.506202e-19f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
734 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f + 1.845735e-18f * _Complex_I,
735 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
736 2.504795e-02f - 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
737 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f + 6.152449e-19f * _Complex_I,
738 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
739 9.198983e-04f + 0.000000e+00f * _Complex_I },
740 { -7.972663e-04f + 2.929109e-19f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
741 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f - 1.648398e-18f * _Complex_I,
742 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
743 -3.906939e-02f + 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
744 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
745 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
746 -7.972663e-04f - 9.763696e-20f * _Complex_I },
747 { 8.625458e-04f - 2.112628e-19f * _Complex_I, 1.431113e-03f - 2.478760e-03f * _Complex_I,
748 -2.310309e-03f - 4.001572e-03f * _Complex_I, -7.706486e-03f + 9.437723e-19f * _Complex_I,
749 -7.220186e-03f + 1.250573e-02f * _Complex_I, 1.587118e-02f + 2.748969e-02f * _Complex_I,
750 4.765675e-02f + 0.000000e+00f * _Complex_I, 1.587118e-02f - 2.748969e-02f * _Complex_I,
751 -7.220186e-03f - 1.250573e-02f * _Complex_I, -7.706486e-03f - 9.437723e-19f * _Complex_I,
752 -2.310309e-03f + 4.001572e-03f * _Complex_I, 1.431113e-03f + 2.478760e-03f * _Complex_I,
753 8.625458e-04f + 2.112628e-19f * _Complex_I },
754 { -7.972663e-04f + 9.763696e-20f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
755 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
756 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
757 -3.906939e-02f - 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
758 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 1.648398e-18f * _Complex_I,
759 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
760 -7.972663e-04f - 2.929109e-19f * _Complex_I },
761 { 9.198983e-04f + 0.000000e+00f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
762 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f - 6.152449e-19f * _Complex_I,
763 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
764 2.504795e-02f + 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
765 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f - 1.845735e-18f * _Complex_I,
766 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
767 9.198983e-04f + 4.506202e-19f * _Complex_I },
768 { -1.017660e-03f - 1.246274e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
769 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f + 9.061876e-19f * _Complex_I,
770 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
771 -1.391431e-02f - 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
772 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f + 1.812375e-18f * _Complex_I,
773 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
774 -1.017660e-03f - 6.231370e-19f * _Complex_I },
775 { 1.457023e-03f + 3.568678e-19f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
776 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f - 9.803302e-19f * _Complex_I,
777 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
778 7.074895e-03f + 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
779 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f - 1.633884e-18f * _Complex_I,
780 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
781 1.457023e-03f + 1.070603e-18f * _Complex_I },
782 { -1.980815e-03f - 7.277398e-19f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
783 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f + 9.192611e-19f * _Complex_I,
784 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
785 -3.273971e-03f - 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
786 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f + 1.378892e-18f * _Complex_I,
787 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
788 -1.980815e-03f - 1.698059e-18f * _Complex_I },
789 { 1.326343e-03f + 6.497206e-19f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
790 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f - 7.085902e-19f * _Complex_I,
791 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
792 1.225183e-03f + 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
793 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f - 9.920263e-19f * _Complex_I,
794 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
795 1.326343e-03f + 1.299441e-18f * _Complex_I } },
796 { { 9.129120e-04f - 8.943958e-19f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
797 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f - 8.514393e-19f * _Complex_I,
798 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
799 2.730635e-03f - 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
800 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f - 2.838131e-19f * _Complex_I,
801 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
802 9.129120e-04f + 0.000000e+00f * _Complex_I },
803 { -5.588854e-04f - 9.680179e-04f * _Complex_I, -6.474856e-04f + 1.121478e-03f * _Complex_I,
804 1.536588e-03f - 1.129066e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
805 -1.541434e-03f + 2.669842e-03f * _Complex_I, 4.379825e-03f - 9.925627e-18f * _Complex_I,
806 -2.394173e-03f - 4.146830e-03f * _Complex_I, -2.189912e-03f + 3.793039e-03f * _Complex_I,
807 3.082869e-03f - 3.493222e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
808 -7.682939e-04f + 1.330724e-03f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
809 -5.588854e-04f - 9.680179e-04f * _Complex_I },
810 { -5.883876e-04f + 1.019117e-03f * _Complex_I, 1.714796e-03f - 1.260012e-18f * _Complex_I,
811 -1.180365e-03f - 2.044451e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
812 4.933362e-03f - 2.416651e-18f * _Complex_I, -3.296542e-03f - 5.709779e-03f * _Complex_I,
813 -3.546477e-03f + 6.142678e-03f * _Complex_I, 6.593085e-03f - 1.614840e-18f * _Complex_I,
814 -2.466681e-03f - 4.272417e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
815 2.360729e-03f + 0.000000e+00f * _Complex_I, -8.573982e-04f - 1.485057e-03f * _Complex_I,
816 -5.883876e-04f + 1.019117e-03f * _Complex_I },
817 { 1.483526e-03f - 1.090077e-18f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
818 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f - 1.936126e-18f * _Complex_I,
819 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
820 1.070518e-02f - 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
821 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
822 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
823 1.483526e-03f + 3.633590e-19f * _Complex_I },
824 { -9.966429e-04f - 1.726236e-03f * _Complex_I, -1.478281e-03f + 2.560458e-03f * _Complex_I,
825 4.306274e-03f - 2.109466e-18f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
826 -5.436890e-03f + 9.416970e-03f * _Complex_I, 1.556418e-02f - 3.812124e-18f * _Complex_I,
827 -8.842875e-03f - 1.531631e-02f * _Complex_I, -7.782088e-03f + 1.347897e-02f * _Complex_I,
828 1.087378e-02f + 0.000000e+00f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
829 -2.153137e-03f + 3.729342e-03f * _Complex_I, 2.956562e-03f + 3.350104e-18f * _Complex_I,
830 -9.966429e-04f - 1.726236e-03f * _Complex_I },
831 { -1.291288e-03f + 2.236576e-03f * _Complex_I, 3.942788e-03f - 8.935208e-18f * _Complex_I,
832 -2.798347e-03f - 4.846880e-03f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
833 1.522441e-02f - 3.728906e-18f * _Complex_I, -1.175443e-02f - 2.035927e-02f * _Complex_I,
834 -1.417872e-02f + 2.455826e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
835 -7.612206e-03f - 1.318473e-02f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
836 5.596695e-03f + 1.370795e-18f * _Complex_I, -1.971394e-03f - 3.414555e-03f * _Complex_I,
837 -1.291288e-03f + 2.236576e-03f * _Complex_I },
838 { 2.779286e-03f - 1.361458e-18f * _Complex_I, -2.194126e-03f - 3.800338e-03f * _Complex_I,
839 -3.057720e-03f + 5.296126e-03f * _Complex_I, 9.725261e-03f - 2.382002e-18f * _Complex_I,
840 -8.649261e-03f - 1.498096e-02f * _Complex_I, -1.417667e-02f + 2.455472e-02f * _Complex_I,
841 3.552610e-02f + 0.000000e+00f * _Complex_I, -1.417667e-02f - 2.455472e-02f * _Complex_I,
842 -8.649261e-03f + 1.498096e-02f * _Complex_I, 9.725261e-03f + 2.382002e-18f * _Complex_I,
843 -3.057720e-03f - 5.296126e-03f * _Complex_I, -2.194126e-03f + 3.800338e-03f * _Complex_I,
844 2.779286e-03f + 1.361458e-18f * _Complex_I },
845 { -1.291288e-03f - 2.236576e-03f * _Complex_I, -1.971394e-03f + 3.414555e-03f * _Complex_I,
846 5.596695e-03f - 1.370795e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
847 -7.612206e-03f + 1.318473e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
848 -1.417872e-02f - 2.455826e-02f * _Complex_I, -1.175443e-02f + 2.035927e-02f * _Complex_I,
849 1.522441e-02f + 3.728906e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
850 -2.798347e-03f + 4.846880e-03f * _Complex_I, 3.942788e-03f + 8.935208e-18f * _Complex_I,
851 -1.291288e-03f - 2.236576e-03f * _Complex_I },
852 { -9.966429e-04f + 1.726236e-03f * _Complex_I, 2.956562e-03f - 3.350104e-18f * _Complex_I,
853 -2.153137e-03f - 3.729342e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
854 1.087378e-02f + 0.000000e+00f * _Complex_I, -7.782088e-03f - 1.347897e-02f * _Complex_I,
855 -8.842875e-03f + 1.531631e-02f * _Complex_I, 1.556418e-02f + 3.812124e-18f * _Complex_I,
856 -5.436890e-03f - 9.416970e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
857 4.306274e-03f + 2.109466e-18f * _Complex_I, -1.478281e-03f - 2.560458e-03f * _Complex_I,
858 -9.966429e-04f + 1.726236e-03f * _Complex_I },
859 { 1.483526e-03f - 3.633590e-19f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
860 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
861 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
862 1.070518e-02f + 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
863 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 1.936126e-18f * _Complex_I,
864 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
865 1.483526e-03f + 1.090077e-18f * _Complex_I },
866 { -5.883876e-04f - 1.019117e-03f * _Complex_I, -8.573982e-04f + 1.485057e-03f * _Complex_I,
867 2.360729e-03f + 0.000000e+00f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
868 -2.466681e-03f + 4.272417e-03f * _Complex_I, 6.593085e-03f + 1.614840e-18f * _Complex_I,
869 -3.546477e-03f - 6.142678e-03f * _Complex_I, -3.296542e-03f + 5.709779e-03f * _Complex_I,
870 4.933362e-03f + 2.416651e-18f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
871 -1.180365e-03f + 2.044451e-03f * _Complex_I, 1.714796e-03f + 1.260012e-18f * _Complex_I,
872 -5.883876e-04f - 1.019117e-03f * _Complex_I },
873 { -5.588854e-04f + 9.680179e-04f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
874 -7.682939e-04f - 1.330724e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
875 3.082869e-03f + 3.493222e-18f * _Complex_I, -2.189912e-03f - 3.793039e-03f * _Complex_I,
876 -2.394173e-03f + 4.146830e-03f * _Complex_I, 4.379825e-03f + 9.925627e-18f * _Complex_I,
877 -1.541434e-03f - 2.669842e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
878 1.536588e-03f + 1.129066e-18f * _Complex_I, -6.474856e-04f - 1.121478e-03f * _Complex_I,
879 -5.588854e-04f + 9.680179e-04f * _Complex_I },
880 { 9.129120e-04f + 0.000000e+00f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
881 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f + 2.838131e-19f * _Complex_I,
882 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
883 2.730635e-03f + 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
884 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f + 8.514393e-19f * _Complex_I,
885 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
886 9.129120e-04f + 8.943958e-19f * _Complex_I } },
887 { { 8.228091e-04f + 0.000000e+00f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
888 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f - 3.061346e-19f * _Complex_I,
889 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
890 2.080486e-03f - 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
891 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f - 9.184039e-19f * _Complex_I,
892 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
893 8.228091e-04f - 8.061204e-19f * _Complex_I },
894 { -5.616336e-04f - 9.727779e-04f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
895 -8.694311e-04f + 1.505899e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
896 2.446785e-03f - 2.772471e-18f * _Complex_I, -1.605471e-03f + 2.780758e-03f * _Complex_I,
897 -1.832781e-03f - 3.174469e-03f * _Complex_I, 3.210942e-03f - 7.276687e-18f * _Complex_I,
898 -1.223392e-03f + 2.118978e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
899 1.738862e-03f - 1.277695e-18f * _Complex_I, -6.914471e-04f + 1.197621e-03f * _Complex_I,
900 -5.616336e-04f - 9.727779e-04f * _Complex_I },
901 { -5.723872e-04f + 9.914038e-04f * _Complex_I, -8.302721e-04f - 1.438073e-03f * _Complex_I,
902 2.445280e-03f + 0.000000e+00f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
903 -1.882898e-03f - 3.261274e-03f * _Complex_I, 4.921549e-03f - 1.205432e-18f * _Complex_I,
904 -2.760152e-03f + 4.780723e-03f * _Complex_I, -2.460774e-03f - 4.262186e-03f * _Complex_I,
905 3.765795e-03f - 1.844708e-18f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
906 -1.222640e-03f - 2.117675e-03f * _Complex_I, 1.660544e-03f - 1.220148e-18f * _Complex_I,
907 -5.723872e-04f + 9.914038e-04f * _Complex_I },
908 { 1.226482e-03f + 3.004015e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
909 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
910 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
911 9.412791e-03f - 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
912 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f - 1.877877e-18f * _Complex_I,
913 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
914 1.226482e-03f - 9.012046e-19f * _Complex_I },
915 { -9.898007e-04f - 1.714385e-03f * _Complex_I, 3.215120e-03f + 3.643077e-18f * _Complex_I,
916 -2.507621e-03f + 4.343327e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
917 1.105198e-02f + 0.000000e+00f * _Complex_I, -7.691179e-03f + 1.332151e-02f * _Complex_I,
918 -8.705793e-03f - 1.507888e-02f * _Complex_I, 1.538236e-02f - 3.767591e-18f * _Complex_I,
919 -5.525988e-03f + 9.571292e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
920 5.015242e-03f - 2.456760e-18f * _Complex_I, -1.607560e-03f + 2.784375e-03f * _Complex_I,
921 -9.898007e-04f - 1.714385e-03f * _Complex_I },
922 { -1.414655e-03f + 2.450254e-03f * _Complex_I, -2.341263e-03f - 4.055186e-03f * _Complex_I,
923 6.915775e-03f + 1.693876e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
924 -8.062191e-03f - 1.396412e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
925 -1.451128e-02f + 2.513428e-02f * _Complex_I, -1.207667e-02f - 2.091740e-02f * _Complex_I,
926 1.612438e-02f - 3.949335e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
927 -3.457887e-03f - 5.989237e-03f * _Complex_I, 4.682526e-03f - 1.061161e-17f * _Complex_I,
928 -1.414655e-03f + 2.450254e-03f * _Complex_I },
929 { 3.039574e-03f + 1.488962e-18f * _Complex_I, -2.598226e-03f + 4.500260e-03f * _Complex_I,
930 -3.750909e-03f - 6.496765e-03f * _Complex_I, 1.119776e-02f + 2.742661e-18f * _Complex_I,
931 -9.210579e-03f + 1.595319e-02f * _Complex_I, -1.464762e-02f - 2.537042e-02f * _Complex_I,
932 3.672076e-02f + 0.000000e+00f * _Complex_I, -1.464762e-02f + 2.537042e-02f * _Complex_I,
933 -9.210579e-03f - 1.595319e-02f * _Complex_I, 1.119776e-02f - 2.742661e-18f * _Complex_I,
934 -3.750909e-03f + 6.496765e-03f * _Complex_I, -2.598226e-03f - 4.500260e-03f * _Complex_I,
935 3.039574e-03f - 1.488962e-18f * _Complex_I },
936 { -1.414655e-03f - 2.450254e-03f * _Complex_I, 4.682526e-03f + 1.061161e-17f * _Complex_I,
937 -3.457887e-03f + 5.989237e-03f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
938 1.612438e-02f + 3.949335e-18f * _Complex_I, -1.207667e-02f + 2.091740e-02f * _Complex_I,
939 -1.451128e-02f - 2.513428e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
940 -8.062191e-03f + 1.396412e-02f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
941 6.915775e-03f - 1.693876e-18f * _Complex_I, -2.341263e-03f + 4.055186e-03f * _Complex_I,
942 -1.414655e-03f - 2.450254e-03f * _Complex_I },
943 { -9.898007e-04f + 1.714385e-03f * _Complex_I, -1.607560e-03f - 2.784375e-03f * _Complex_I,
944 5.015242e-03f + 2.456760e-18f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
945 -5.525988e-03f - 9.571292e-03f * _Complex_I, 1.538236e-02f + 3.767591e-18f * _Complex_I,
946 -8.705793e-03f + 1.507888e-02f * _Complex_I, -7.691179e-03f - 1.332151e-02f * _Complex_I,
947 1.105198e-02f + 0.000000e+00f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
948 -2.507621e-03f - 4.343327e-03f * _Complex_I, 3.215120e-03f - 3.643077e-18f * _Complex_I,
949 -9.898007e-04f + 1.714385e-03f * _Complex_I },
950 { 1.226482e-03f + 9.012046e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
951 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 1.877877e-18f * _Complex_I,
952 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
953 9.412791e-03f + 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
954 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
955 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
956 1.226482e-03f - 3.004015e-19f * _Complex_I },
957 { -5.723872e-04f - 9.914038e-04f * _Complex_I, 1.660544e-03f + 1.220148e-18f * _Complex_I,
958 -1.222640e-03f + 2.117675e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
959 3.765795e-03f + 1.844708e-18f * _Complex_I, -2.460774e-03f + 4.262186e-03f * _Complex_I,
960 -2.760152e-03f - 4.780723e-03f * _Complex_I, 4.921549e-03f + 1.205432e-18f * _Complex_I,
961 -1.882898e-03f + 3.261274e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
962 2.445280e-03f + 0.000000e+00f * _Complex_I, -8.302721e-04f + 1.438073e-03f * _Complex_I,
963 -5.723872e-04f - 9.914038e-04f * _Complex_I },
964 { -5.616336e-04f + 9.727779e-04f * _Complex_I, -6.914471e-04f - 1.197621e-03f * _Complex_I,
965 1.738862e-03f + 1.277695e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
966 -1.223392e-03f - 2.118978e-03f * _Complex_I, 3.210942e-03f + 7.276687e-18f * _Complex_I,
967 -1.832781e-03f + 3.174469e-03f * _Complex_I, -1.605471e-03f - 2.780758e-03f * _Complex_I,
968 2.446785e-03f + 2.772471e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
969 -8.694311e-04f - 1.505899e-03f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
970 -5.616336e-04f + 9.727779e-04f * _Complex_I },
971 { 8.228091e-04f + 8.061204e-19f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
972 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f + 9.184039e-19f * _Complex_I,
973 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
974 2.080486e-03f + 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
975 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f + 3.061346e-19f * _Complex_I,
976 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
977 8.228091e-04f + 0.000000e+00f * _Complex_I } },
978 { { 1.221201e-03f + 5.982162e-19f * _Complex_I, -1.773498e-03f - 6.515727e-19f * _Complex_I,
979 1.246697e-03f + 3.053526e-19f * _Complex_I, -8.215306e-04f - 1.006085e-19f * _Complex_I,
980 7.609372e-04f + 0.000000e+00f * _Complex_I, -4.863927e-04f + 5.956592e-20f * _Complex_I,
981 4.882100e-04f - 1.195770e-19f * _Complex_I, -4.863927e-04f + 1.786978e-19f * _Complex_I,
982 7.609372e-04f - 3.727517e-19f * _Complex_I, -8.215306e-04f + 5.030424e-19f * _Complex_I,
983 1.246697e-03f - 9.160579e-19f * _Complex_I, -1.773498e-03f + 1.520336e-18f * _Complex_I,
984 1.221201e-03f - 1.196432e-18f * _Complex_I },
985 { 7.406884e-04f - 1.282910e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
986 7.186273e-04f - 1.244699e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
987 5.908383e-04f - 1.023362e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
988 1.432695e-03f - 2.481501e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
989 5.908383e-04f - 1.023362e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
990 7.186273e-04f - 1.244699e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
991 7.406884e-04f - 1.282910e-03f * _Complex_I },
992 { -7.162255e-04f - 1.240539e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
993 -6.705589e-04f - 1.161442e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
994 -1.165433e-03f - 2.018589e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
995 -2.297663e-03f - 3.979669e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
996 -1.165433e-03f - 2.018589e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
997 -6.705589e-04f - 1.161442e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
998 -7.162255e-04f - 1.240539e-03f * _Complex_I },
999 { -1.280260e-03f - 7.839331e-19f * _Complex_I, 1.987108e-03f + 9.734024e-19f * _Complex_I,
1000 -2.614019e-03f - 9.603749e-19f * _Complex_I, 3.635167e-03f + 8.903590e-19f * _Complex_I,
1001 -4.954867e-03f - 6.067962e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
1002 -7.600546e-03f + 9.307984e-19f * _Complex_I, 6.653220e-03f - 1.629569e-18f * _Complex_I,
1003 -4.954867e-03f + 1.820389e-18f * _Complex_I, 3.635167e-03f - 1.780718e-18f * _Complex_I,
1004 -2.614019e-03f + 1.600625e-18f * _Complex_I, 1.987108e-03f - 1.460104e-18f * _Complex_I,
1005 -1.280260e-03f + 1.097506e-18f * _Complex_I },
1006 { -5.756945e-04f + 9.971322e-04f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
1007 -2.421407e-03f + 4.194000e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
1008 -5.527367e-03f + 9.573681e-03f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
1009 -7.288212e-03f + 1.262355e-02f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
1010 -5.527367e-03f + 9.573681e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
1011 -2.421407e-03f + 4.194000e-03f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
1012 -5.756945e-04f + 9.971322e-04f * _Complex_I },
1013 { 7.349896e-04f + 1.273039e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
1014 3.332671e-03f + 5.772355e-03f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
1015 9.842376e-03f + 1.704749e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1016 1.598601e-02f + 2.768858e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1017 9.842376e-03f + 1.704749e-02f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
1018 3.332671e-03f + 5.772355e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
1019 7.349896e-04f + 1.273039e-03f * _Complex_I },
1020 { 1.400383e-03f + 1.028985e-18f * _Complex_I, -3.545886e-03f - 2.171229e-18f * _Complex_I,
1021 7.289370e-03f + 3.570761e-18f * _Complex_I, -1.418908e-02f - 5.212982e-18f * _Complex_I,
1022 2.520839e-02f + 6.174275e-18f * _Complex_I, -3.934772e-02f - 4.818706e-18f * _Complex_I,
1023 4.797481e-02f + 0.000000e+00f * _Complex_I, -3.934772e-02f + 4.818706e-18f * _Complex_I,
1024 2.520839e-02f - 6.174275e-18f * _Complex_I, -1.418908e-02f + 5.212982e-18f * _Complex_I,
1025 7.289370e-03f - 3.570761e-18f * _Complex_I, -3.545886e-03f + 2.171229e-18f * _Complex_I,
1026 1.400383e-03f - 1.028985e-18f * _Complex_I },
1027 { 7.349896e-04f - 1.273039e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1028 3.332671e-03f - 5.772355e-03f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1029 9.842376e-03f - 1.704749e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1030 1.598601e-02f - 2.768858e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1031 9.842376e-03f - 1.704749e-02f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1032 3.332671e-03f - 5.772355e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1033 7.349896e-04f - 1.273039e-03f * _Complex_I },
1034 { -5.756945e-04f - 9.971322e-04f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1035 -2.421407e-03f - 4.194000e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1036 -5.527367e-03f - 9.573681e-03f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1037 -7.288212e-03f - 1.262355e-02f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1038 -5.527367e-03f - 9.573681e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1039 -2.421407e-03f - 4.194000e-03f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1040 -5.756945e-04f - 9.971322e-04f * _Complex_I },
1041 { -1.280260e-03f - 1.097506e-18f * _Complex_I, 1.987108e-03f + 1.460104e-18f * _Complex_I,
1042 -2.614019e-03f - 1.600625e-18f * _Complex_I, 3.635167e-03f + 1.780718e-18f * _Complex_I,
1043 -4.954867e-03f - 1.820389e-18f * _Complex_I, 6.653220e-03f + 1.629569e-18f * _Complex_I,
1044 -7.600546e-03f - 9.307984e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
1045 -4.954867e-03f + 6.067962e-19f * _Complex_I, 3.635167e-03f - 8.903590e-19f * _Complex_I,
1046 -2.614019e-03f + 9.603749e-19f * _Complex_I, 1.987108e-03f - 9.734024e-19f * _Complex_I,
1047 -1.280260e-03f + 7.839331e-19f * _Complex_I },
1048 { -7.162255e-04f + 1.240539e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1049 -6.705589e-04f + 1.161442e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1050 -1.165433e-03f + 2.018589e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1051 -2.297663e-03f + 3.979669e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1052 -1.165433e-03f + 2.018589e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1053 -6.705589e-04f + 1.161442e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1054 -7.162255e-04f + 1.240539e-03f * _Complex_I },
1055 { 7.406884e-04f + 1.282910e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1056 7.186273e-04f + 1.244699e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1057 5.908383e-04f + 1.023362e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1058 1.432695e-03f + 2.481501e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1059 5.908383e-04f + 1.023362e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1060 7.186273e-04f + 1.244699e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1061 7.406884e-04f + 1.282910e-03f * _Complex_I },
1062 { 1.221201e-03f + 1.196432e-18f * _Complex_I, -1.773498e-03f - 1.520336e-18f * _Complex_I,
1063 1.246697e-03f + 9.160579e-19f * _Complex_I, -8.215306e-04f - 5.030424e-19f * _Complex_I,
1064 7.609372e-04f + 3.727517e-19f * _Complex_I, -4.863927e-04f - 1.786978e-19f * _Complex_I,
1065 4.882100e-04f + 1.195770e-19f * _Complex_I, -4.863927e-04f - 5.956592e-20f * _Complex_I,
1066 7.609372e-04f + 0.000000e+00f * _Complex_I, -8.215306e-04f + 1.006085e-19f * _Complex_I,
1067 1.246697e-03f - 3.053526e-19f * _Complex_I, -1.773498e-03f + 6.515727e-19f * _Complex_I,
1068 1.221201e-03f - 5.982162e-19f * _Complex_I } } };
1069
1070 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 7) * sizeof(float);
1071 size_t padded_buffer_size;
1072 char *const all_buffers = (char *)dt_pixelpipe_cache_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
1073 if(IS_NULL_PTR(all_buffers))
1074 {
1075 fprintf(stderr, "[demosaic] not able to allocate FDC base buffers\n");
1076 return;
1077 }
1078
1079 /* Map a green hexagon around each non-green pixel and vice versa: */
1080 for(int row = 0; row < 3; row++)
1081 for(int col = 0; col < 3; col++)
1082 for(int ng = 0, d = 0; d < 10; d += 2)
1083 {
1084 int g = FCxtrans(row, col, NULL, xtrans) == 1;
1085 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
1086 ng = 0;
1087 else
1088 ng++;
1089 // if there are four non-green pixels adjacent in cardinal
1090 // directions, this is the solitary green pixel
1091 if(ng == 4)
1092 {
1093 sgrow = row;
1094 sgcol = col;
1095 }
1096 if(ng == g + 1)
1097 for(int c = 0; c < 8; c++)
1098 {
1099 int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1100 int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1101 // offset within TSxTS buffer
1102 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
1103 }
1104 }
1105
1106 // extra passes propagates out errors at edges, hence need more padding
1107 const int pad_tile = 13;
1108
1109 // calculate offsets for this roi
1110 int rowoffset = 0;
1111 int coloffset = 0;
1112 for(int row = 0; row < 6; row++)
1113 {
1114 if(!((row - sgrow) % 3))
1115 {
1116 for(int col = 0; col < 6; col++)
1117 {
1118 if(!((col - sgcol) % 3) && (FCxtrans(row, col + 1, roi_in, xtrans) == 0))
1119 {
1120 rowoffset = 37 - row - pad_tile; // 1 plus a generous multiple of 6
1121 coloffset = 37 - col - pad_tile; // to avoid that this value gets negative
1122 break;
1123 }
1124 }
1125 break;
1126 }
1127 }
1128
1129 // depending on the iso, use either a hybrid approach for chroma, or pure fdc
1130 float hybrid_fdc[2] = { 1.0f, 0.0f };
1131 const int xover_iso = dt_conf_get_int("plugins/darkroom/demosaic/fdc_xover_iso");
1132 int iso = self->dev->image_storage.exif_iso;
1133 if(iso > xover_iso)
1134 {
1135 hybrid_fdc[0] = 0.0f;
1136 hybrid_fdc[1] = 1.0f;
1137 }
1139 // step through TSxTS cells of image, each tile overlapping the
1140 // prior as interpolation needs a substantial border
1141 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile * 2))
1142 {
1143 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
1144 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
1145 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
1146 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
1147 // note that channels come before tiles to allow for a
1148 // vectorization optimization when building drv[] from yuv[]
1149 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1150 // drv points to ndir TSxTS tiles, each a single channel of derivatives
1151 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
1152 // gmin and gmax reuse memory which is used later by yuv buffer;
1153 // each points to a TSxTS tile of single channel data
1154 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1155 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
1156 // homo and homosum reuse memory which is used earlier in the
1157 // loop; each points to ndir single-channel TSxTS tiles
1158 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1159 uint8_t (*const homosum)[TS][TS]
1160 = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float) + TS * TS * ndir * sizeof(uint8_t));
1161 // append all fdc related buffers
1162 float complex *fdc_buf_start = (float complex *)(buffer + TS * TS * (ndir * 4 + 3) * sizeof(float));
1163 const int fdc_buf_size = TS * TS;
1164 float(*const i_src) = (float *)fdc_buf_start;
1165 float complex(*const o_src) = fdc_buf_start + fdc_buf_size;
1166 // by the time the chroma values are calculated, o_src can be overwritten.
1167 float(*const fdc_chroma) = (float *)o_src;
1168
1169 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile * 2))
1170 {
1171 int mrow = MIN(top + TS, height + pad_tile);
1172 int mcol = MIN(left + TS, width + pad_tile);
1173
1174 // Copy current tile from in to image buffer. If border goes
1175 // beyond edges of image, fill with mirrored/interpolated edges.
1176 // The extra border avoids discontinuities at image edges.
1177 for(int row = top; row < mrow; row++)
1178 for(int col = left; col < mcol; col++)
1179 {
1180 float(*const pix) = rgb[0][row - top][col - left];
1181 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
1182 {
1183 const int f = FCxtrans(row, col, roi_in, xtrans);
1184 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
1185 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * row + col];
1186 }
1187 else
1188 {
1189 // mirror a border pixel if beyond image edge
1190 const int c = FCxtrans(row, col, roi_in, xtrans);
1191 for(int cc = 0; cc < 3; cc++)
1192 if(cc != c)
1193 pix[cc] = 0.0f;
1194 else
1195 {
1196#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
1197 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
1198 if(c == FCxtrans(cy, cx, roi_in, xtrans))
1199 {
1200 pix[c] = in[roi_in->width * cy + cx];
1201 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * cy + cx];
1202 }
1203 else
1204 {
1205 // interpolate if mirror pixel is a different color
1206 float sum = 0.0f;
1207 uint8_t count = 0;
1208 for(int y = row - 1; y <= row + 1; y++)
1209 for(int x = col - 1; x <= col + 1; x++)
1210 {
1211 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
1212 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
1213 if(ff == c)
1214 {
1215 sum += in[roi_in->width * yy + xx];
1216 count++;
1217 }
1218 }
1219 pix[c] = sum / count;
1220 *(i_src + TS * (row - top) + (col - left)) = pix[c];
1221 }
1222 }
1223 }
1224 }
1225
1226 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
1227 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
1228
1229 // note that successive calculations are inset within the tile
1230 // so as to give enough border data, and there needs to be a 6
1231 // pixel border initially to allow allhex to find neighboring
1232 // pixels
1233
1234 /* Set green1 and green3 to the minimum and maximum allowed values: */
1235 // Run through each red/blue or blue/red pair, setting their g1
1236 // and g3 values to the min/max of green pixels surrounding the
1237 // pair. Use a 3 pixel border as gmin/gmax is used by
1238 // interpolate green which has a 3 pixel border.
1239 const int pad_g1_g3 = 3;
1240 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
1241 {
1242 // setting max to 0.0f signifies that this is a new pair, which
1243 // requires a new min/max calculation of its neighboring greens
1244 float min = FLT_MAX, max = 0.0f;
1245 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
1246 {
1247 // if in row of horizontal red & blue pairs (or processing
1248 // vertical red & blue pairs near image bottom), reset min/max
1249 // between each pair
1250 if(FCxtrans(row, col, roi_in, xtrans) == 1)
1251 {
1252 min = FLT_MAX, max = 0.0f;
1253 continue;
1254 }
1255 // if at start of red & blue pair, calculate min/max of green
1256 // pixels surrounding it; note that while normally using == to
1257 // compare floats is suspect, here the check is if 0.0f has
1258 // explicitly been assigned to max (which signifies a new
1259 // red/blue pair)
1260 if(max == 0.0f)
1261 {
1262 float (*const pix)[3] = &rgb[0][row - top][col - left];
1263 const short *const hex = hexmap(row, col, allhex);
1264 for(int c = 0; c < 6; c++)
1265 {
1266 const float val = pix[hex[c]][1];
1267 if(min > val) min = val;
1268 if(max < val) max = val;
1269 }
1270 }
1271 gmin[row - top][col - left] = min;
1272 gmax[row - top][col - left] = max;
1273 // handle vertical red/blue pairs
1274 switch((row - sgrow) % 3)
1275 {
1276 // hop down a row to second pixel in vertical pair
1277 case 1:
1278 if(row < mrow - 4) row++, col--;
1279 break;
1280 // then if not done with the row hop up and right to next
1281 // vertical red/blue pair, resetting min/max
1282 case 2:
1283 min = FLT_MAX, max = 0.0f;
1284 if((col += 2) < mcol - 4 && row > top + 3) row--;
1285 }
1286 }
1287 }
1288
1289 /* Interpolate green horizontally, vertically, and along both diagonals: */
1290 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
1291 const int pad_g_interp = 3;
1292 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
1293 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
1294 {
1295 float color[8];
1296 int f = FCxtrans(row, col, roi_in, xtrans);
1297 if(f == 1) continue;
1298 float (*const pix)[3] = &rgb[0][row - top][col - left];
1299 const short *const hex = hexmap(row, col, allhex);
1300 // TODO: these constants come from integer math constants in
1301 // dcraw -- calculate them instead from interpolation math
1302 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
1303 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
1304 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
1305 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
1306 for(int c = 0; c < 2; c++)
1307 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
1308 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
1309 for(int c = 0; c < 4; c++)
1310 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
1311 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
1312 }
1313
1314 /* Interpolate red and blue values for solitary green pixels: */
1315 const int pad_rb_g = 6;
1316 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
1317 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
1318 {
1319 float(*rfx)[3] = &rgb[0][row - top][col - left];
1320 int h = FCxtrans(row, col + 1, roi_in, xtrans);
1321 float diff[6] = { 0.0f };
1322 float color[3][8];
1323 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
1324 {
1325 for(int c = 0; c < 2; c++, h ^= 2)
1326 {
1327 float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
1328 color[h][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
1329 if(d > 1)
1330 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h]) + SQR(g);
1331 }
1332 if(d > 1 && (d & 1))
1333 if(diff[d - 1] < diff[d])
1334 for(int c = 0; c < 2; c++) color[c * 2][d] = color[c * 2][d - 1];
1335 if(d < 2 || (d & 1))
1336 {
1337 for(int c = 0; c < 2; c++) rfx[0][c * 2] = color[c * 2][d] / 2.f;
1338 rfx += TS * TS;
1339 }
1340 }
1341 }
1342
1343 /* Interpolate red for blue pixels and vice versa: */
1344 const int pad_rb_br = 6;
1345 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
1346 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
1347 {
1348 int f = 2 - FCxtrans(row, col, roi_in, xtrans);
1349 if(f == 1) continue;
1350 float(*rfx)[3] = &rgb[0][row - top][col - left];
1351 int c = (row - sgrow) % 3 ? TS : 1;
1352 int h = 3 * (c ^ TS ^ 1);
1353 for(int d = 0; d < 4; d++, rfx += TS * TS)
1354 {
1355 int i = d > 1 || ((d ^ c) & 1)
1356 || ((fabsf(rfx[0][1] - rfx[c][1]) + fabsf(rfx[0][1] - rfx[-c][1]))
1357 < 2.f * (fabsf(rfx[0][1] - rfx[h][1]) + fabsf(rfx[0][1] - rfx[-h][1]))) ? c : h;
1358 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
1359 }
1360 }
1361
1362 /* Fill in red and blue for 2x2 blocks of green: */
1363 const int pad_g22 = 8;
1364 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
1365 if((row - sgrow) % 3)
1366 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
1367 if((col - sgcol) % 3)
1368 {
1369 float redblue[3][3];
1370 float(*rfx)[3] = &rgb[0][row - top][col - left];
1371 const short *const hex = hexmap(row, col, allhex);
1372 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1373 if(hex[d] + hex[d + 1])
1374 {
1375 float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1376 for(int c = 0; c < 4; c += 2)
1377 {
1378 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
1379 redblue[d][c] = rfx[0][c];
1380 }
1381 }
1382 else
1383 {
1384 float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1385 for(int c = 0; c < 4; c += 2)
1386 {
1387 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
1388 redblue[d][c] = rfx[0][c];
1389 }
1390 }
1391 // to fill in red and blue also for diagonal directions
1392 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1393 for(int c = 0; c < 4; c += 2) rfx[0][c] = (redblue[0][c] + redblue[2][c]) * 0.5f;
1394 }
1395
1396 // jump back to the first set of rgb buffers (this is a nop
1397 // unless on the second pass)
1398 rgb = (float(*)[TS][TS][3])buffer;
1399 // from here on out, mainly are working within the current tile
1400 // rather than in reference to the image, so don't offset
1401 // mrow/mcol by top/left of tile
1402 mrow -= top;
1403 mcol -= left;
1404
1405 /* Convert to perceptual colorspace and differentiate in all directions: */
1406 // Original dcraw algorithm uses CIELab as perceptual space
1407 // (presumably coming from original AHD) and converts taking
1408 // camera matrix into account. Now use YPbPr which requires much
1409 // less code and is nearly indistinguishable. It assumes the
1410 // camera RGB is roughly linear.
1411 for(int d = 0; d < ndir; d++)
1412 {
1413 const int pad_yuv = 8;
1414 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
1415 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
1416 {
1417 float *rx = rgb[d][row][col];
1418 // use ITU-R BT.2020 YPbPr, which is great, but could use
1419 // a better/simpler choice? note that imageop.h provides
1420 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
1421 // which appears less good with specular highlights
1422 float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
1423 yuv[0][row][col] = y;
1424 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
1425 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
1426 }
1427 // Note that f can offset by a column (-1 or +1) and by a row
1428 // (-TS or TS). The row-wise offsets cause the undefined
1429 // behavior sanitizer to warn of an out of bounds index, but
1430 // as yfx is multi-dimensional and there is sufficient
1431 // padding, that is not actually so.
1432 const int f = dir[d & 3];
1433 const int pad_drv = 9;
1434 for(int row = pad_drv; row < mrow - pad_drv; row++)
1435 for(int col = pad_drv; col < mcol - pad_drv; col++)
1436 {
1437 float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
1438 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
1439 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
1440 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
1441 }
1442 }
1443
1444 /* Build homogeneity maps from the derivatives: */
1445 memset(homo, 0, sizeof(uint8_t) * ndir * TS * TS);
1446 const int pad_homo = 10;
1447 for(int row = pad_homo; row < mrow - pad_homo; row++)
1448 for(int col = pad_homo; col < mcol - pad_homo; col++)
1449 {
1450 float tr = FLT_MAX;
1451 for(int d = 0; d < ndir; d++)
1452 if(tr > drv[d][row][col]) tr = drv[d][row][col];
1453 tr *= 8;
1454 for(int d = 0; d < ndir; d++)
1455 for(int v = -1; v <= 1; v++)
1456 for(int h = -1; h <= 1; h++) homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
1457 }
1458
1459 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
1460 for(int d = 0; d < ndir; d++)
1461 for(int row = pad_tile; row < mrow - pad_tile; row++)
1462 {
1463 // start before first column where homo[d][row][col+2] != 0,
1464 // so can know v5sum and homosum[d][row][col] will be 0
1465 int col = pad_tile - 5;
1466 uint8_t v5sum[5] = { 0 };
1467 homosum[d][row][col] = 0;
1468 // calculate by rolling through column sums
1469 for(col++; col < mcol - pad_tile; col++)
1470 {
1471 uint8_t colsum = 0;
1472 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
1473 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
1474 v5sum[col % 5] = colsum;
1475 }
1476 }
1477
1478 /* Calculate chroma values in fdc: */
1479 const int pad_fdc = 6;
1480 for(int row = pad_fdc; row < mrow - pad_fdc; row++)
1481 for(int col = pad_fdc; col < mcol - pad_fdc; col++)
1482 {
1483 int myrow, mycol;
1484 uint8_t hm[8] = { 0 };
1485 uint8_t maxval = 0;
1486 for(int d = 0; d < ndir; d++)
1487 {
1488 hm[d] = homosum[d][row][col];
1489 maxval = (maxval < hm[d] ? hm[d] : maxval);
1490 }
1491 maxval -= maxval >> 3;
1492 float dircount = 0;
1493 float dirsum = 0.f;
1494 for(int d = 0; d < ndir; d++)
1495 if(hm[d] >= maxval)
1496 {
1497 dircount++;
1498 dirsum += directionality[d];
1499 }
1500 float w = dirsum / (float)dircount;
1501 int fdc_row, fdc_col;
1502 float complex C2m, C5m, C7m, C10m;
1503#define CONV_FILT(VAR, FILT) \
1504 VAR = 0.0f + 0.0f * _Complex_I; \
1505 for(fdc_row = 0, myrow = row - 6; fdc_row < 13; fdc_row++, myrow++) \
1506 for(fdc_col = 0, mycol = col - 6; fdc_col < 13; fdc_col++, mycol++) \
1507 VAR += FILT[12 - fdc_row][12 - fdc_col] * *(i_src + TS * myrow + mycol);
1508 CONV_FILT(C2m, harr[0])
1509 CONV_FILT(C5m, harr[1])
1510 CONV_FILT(C7m, harr[2])
1511 CONV_FILT(C10m, harr[3])
1512#undef CONV_FILT
1513 // build the q vector components
1514 myrow = (row + rowoffset) % 6;
1515 mycol = (col + coloffset) % 6;
1516 float complex modulator[8];
1517 for(int c = 0; c < 8; c++) modulator[c] = modarr[myrow][mycol][c];
1518 float complex qmat[8];
1519 qmat[4] = w * C10m * modulator[0] - (1.0f - w) * C2m * modulator[1];
1520 qmat[6] = conjf(qmat[4]);
1521 qmat[1] = C5m * modulator[6];
1522 qmat[2] = conjf(-0.5f * qmat[1]);
1523 qmat[5] = conjf(qmat[2]);
1524 qmat[3] = C7m * modulator[7];
1525 qmat[7] = conjf(qmat[1]);
1526 // get L
1527 C2m = qmat[4] * (conjf(modulator[0]) - conjf(modulator[1]));
1528 float complex C3m = qmat[6] * (modulator[2] - modulator[3]);
1529 float complex C6m = qmat[2] * (conjf(modulator[4]) + conjf(modulator[5]));
1530 float complex C12m = qmat[5] * (modulator[4] + modulator[5]);
1531 float complex C18m = qmat[7] * modulator[6];
1532 qmat[0] = *(i_src + row * TS + col) - C2m - C3m - C5m - C6m - 2.0f * C7m - C12m - C18m;
1533 // get the rgb components from fdc
1534 dt_aligned_pixel_t rgbpix = { 0.f, 0.f, 0.f };
1535 // multiply with the inverse matrix of M
1536 for(int color = 0; color < 3; color++)
1537 for(int c = 0; c < 8; c++)
1538 {
1539 rgbpix[color] += Minv[color][c] * qmat[c];
1540 }
1541 // now separate luma and chroma for
1542 // frequency domain chroma
1543 // and store it in fdc_chroma
1544 float uv[2];
1545 float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1546 uv[0] = (rgbpix[2] - y) * 0.56433f;
1547 uv[1] = (rgbpix[0] - y) * 0.67815f;
1548 for(int c = 0; c < 2; c++) *(fdc_chroma + c * TS * TS + row * TS + col) = uv[c];
1549 }
1550
1551 /* Average the most homogeneous pixels for the final result: */
1552 for(int row = pad_tile; row < mrow - pad_tile; row++)
1553 for(int col = pad_tile; col < mcol - pad_tile; col++)
1554 {
1555 uint8_t hm[8] = { 0 };
1556 uint8_t maxval = 0;
1557 for(int d = 0; d < ndir; d++)
1558 {
1559 hm[d] = homosum[d][row][col];
1560 maxval = (maxval < hm[d] ? hm[d] : maxval);
1561 }
1562 maxval -= maxval >> 3;
1563 for(int d = 0; d < ndir - 4; d++)
1564 {
1565 if(hm[d] < hm[d + 4])
1566 hm[d] = 0;
1567 else if(hm[d] > hm[d + 4])
1568 hm[d + 4] = 0;
1569 }
1570
1571 dt_aligned_pixel_t avg = { 0.f };
1572 for(int d = 0; d < ndir; d++)
1573 {
1574 if(hm[d] >= maxval)
1575 {
1576 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
1577 avg[3]++;
1578 }
1579 }
1580 dt_aligned_pixel_t rgbpix;
1581 for(int c = 0; c < 3; c++) rgbpix[c] = avg[c] / avg[3];
1582 // preserve all components of Markesteijn for this pixel
1583 const float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1584 const float um = (rgbpix[2] - y) * 0.56433f;
1585 const float vm = (rgbpix[0] - y) * 0.67815f;
1586 float uvf[2];
1587 // macros for fast meadian filtering
1588#define PIX_SWAP(a, b) \
1589 { \
1590 tempf = (a); \
1591 (a) = (b); \
1592 (b) = tempf; \
1593 }
1594#define PIX_SORT(a, b) \
1595 { \
1596 if((a) > (b)) PIX_SWAP((a), (b)); \
1597 }
1598 // instead of merely reading the values, perform 5 pixel median filter
1599 // one median filter is required to avoid textile artifacts
1600 for(int chrm = 0; chrm < 2; chrm++)
1601 {
1602 float temp[5];
1603 float tempf;
1604 // load the window into temp
1605 memcpy(&temp[0], fdc_chroma + chrm * TS * TS + (row - 1) * TS + (col), sizeof(float) * 1);
1606 memcpy(&temp[1], fdc_chroma + chrm * TS * TS + (row)*TS + (col - 1), sizeof(float) * 3);
1607 memcpy(&temp[4], fdc_chroma + chrm * TS * TS + (row + 1) * TS + (col), sizeof(float) * 1);
1608 PIX_SORT(temp[0], temp[1]);
1609 PIX_SORT(temp[3], temp[4]);
1610 PIX_SORT(temp[0], temp[3]);
1611 PIX_SORT(temp[1], temp[4]);
1612 PIX_SORT(temp[1], temp[2]);
1613 PIX_SORT(temp[2], temp[3]);
1614 PIX_SORT(temp[1], temp[2]);
1615 uvf[chrm] = temp[2];
1616 }
1617 // use hybrid or pure fdc, depending on what was set above.
1618 // in case of hybrid, use the chroma that has the smallest
1619 // absolute value
1620 float uv[2];
1621 uv[0] = (((ABS(uvf[0]) < ABS(um)) & (ABS(uvf[1]) < (1.02f * ABS(vm)))) ? uvf[0] : um) * hybrid_fdc[0] + uvf[0] * hybrid_fdc[1];
1622 uv[1] = (((ABS(uvf[1]) < ABS(vm)) & (ABS(uvf[0]) < (1.02f * ABS(vm)))) ? uvf[1] : vm) * hybrid_fdc[0] + uvf[1] * hybrid_fdc[1];
1623 // combine the luma from Markesteijn with the chroma from above
1624 rgbpix[0] = y + 1.474600014746f * uv[1];
1625 rgbpix[1] = y - 0.15498578286403f * uv[0] - 0.571353132557189f * uv[1];
1626 rgbpix[2] = y + 1.77201282937288f * uv[0];
1627 for(int c = 0; c < 3; c++) out[4 * (width * (row + top) + col + left) + c] = rgbpix[c];
1628 }
1629 }
1630 }
1631
1632 dt_pixelpipe_cache_free_align(all_buffers);
1633}
1634
1635#ifdef HAVE_OPENCL
1636
1637static int process_markesteijn_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe,
1638 const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
1639 const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
1640 const gboolean smooth)
1641{
1644
1645 const int devid = pipe->devid;
1646 const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->dsc_in.xtrans;
1647
1648 const float processed_maximum[4]
1649 = { piece->dsc_in.processed_maximum[0], piece->dsc_in.processed_maximum[1],
1650 piece->dsc_in.processed_maximum[2], 1.0f };
1651
1652 cl_mem dev_tmp = NULL;
1653 cl_mem dev_tmptmp = NULL;
1654 cl_mem dev_xtrans = NULL;
1655 cl_mem dev_green_eq = NULL;
1656 cl_mem dev_rgbv[8] = { NULL };
1657 cl_mem dev_drv[8] = { NULL };
1658 cl_mem dev_homo[8] = { NULL };
1659 cl_mem dev_homosum[8] = { NULL };
1660 cl_mem dev_gminmax = NULL;
1661 cl_mem dev_allhex = NULL;
1662 cl_mem dev_aux = NULL;
1663 cl_mem dev_edge_in = NULL;
1664 cl_mem dev_edge_out = NULL;
1665 cl_int err = -999;
1666
1667 cl_mem *dev_rgb = dev_rgbv;
1668
1669 dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->dsc_in.xtrans), (void *)piece->dsc_in.xtrans);
1670 if(IS_NULL_PTR(dev_xtrans)) goto error;
1671
1672 int width = roi_in->width;
1673 int height = roi_in->height;
1674 const int passes = ((data->demosaicing_method & ~DEMOSAIC_DUAL) == DT_IOP_DEMOSAIC_MARKESTEIJN_3) ? 3 : 1;
1675 const int ndir = 4 << (passes > 1);
1676 const int pad_tile = (passes == 1) ? 12 : 17;
1677
1678 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
1679 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
1680 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } };
1681
1682 // allhex contains the offset coordinates (x,y) of a green hexagon around each
1683 // non-green pixel and vice versa
1684 char allhex[3][3][8][2];
1685 // sgreen is the offset in the sensor matrix of the solitary
1686 // green pixels (initialized here only to avoid compiler warning)
1687 char sgreen[2] = { 0 };
1688
1689 // Map a green hexagon around each non-green pixel and vice versa:
1690 for(int row = 0; row < 3; row++)
1691 for(int col = 0; col < 3; col++)
1692 for(int ng = 0, d = 0; d < 10; d += 2)
1693 {
1694 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
1695 if(FCxtrans(row + orth[d] + 6, col + orth[d + 2] + 6, NULL, xtrans) == 1)
1696 ng = 0;
1697 else
1698 ng++;
1699 // if there are four non-green pixels adjacent in cardinal
1700 // directions, this is the solitary green pixel
1701 if(ng == 4)
1702 {
1703 sgreen[0] = col;
1704 sgreen[1] = row;
1705 }
1706 if(ng == g + 1)
1707 for(int c = 0; c < 8; c++)
1708 {
1709 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1710 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1711
1712 allhex[row][col][c ^ (g * 2 & d)][0] = h;
1713 allhex[row][col][c ^ (g * 2 & d)][1] = v;
1714 }
1715 }
1716
1717 dev_allhex = dt_opencl_copy_host_to_device_constant(devid, sizeof(allhex), allhex);
1718 if(IS_NULL_PTR(dev_allhex)) goto error;
1719
1720 for(int n = 0; n < ndir; n++)
1721 {
1722 dev_rgbv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1723 if(dev_rgbv[n] == NULL) goto error;
1724 }
1725
1726 dev_gminmax = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 2 * width * height);
1727 if(IS_NULL_PTR(dev_gminmax)) goto error;
1728
1729 dev_aux = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1730 if(IS_NULL_PTR(dev_aux)) goto error;
1731
1732 dev_tmp = dev_out;
1733
1734 {
1735 // copy from dev_in to first rgb image buffer.
1736 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1737 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 0, sizeof(cl_mem), (void *)&dev_in);
1738 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 1, sizeof(cl_mem), (void *)&dev_rgb[0]);
1739 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 2, sizeof(int), (void *)&width);
1740 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 3, sizeof(int), (void *)&height);
1741 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 4, sizeof(int), (void *)&roi_in->x);
1742 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 5, sizeof(int), (void *)&roi_in->y);
1743 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 6, sizeof(cl_mem), (void *)&dev_xtrans);
1745 if(err != CL_SUCCESS) goto error;
1746 }
1747
1748
1749 // duplicate dev_rgb[0] to dev_rgb[1], dev_rgb[2], and dev_rgb[3]
1750 for(int c = 1; c <= 3; c++)
1751 {
1752 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[0], dev_rgb[c], 0, 0,
1753 sizeof(float) * 4 * width * height);
1754 if(err != CL_SUCCESS) goto error;
1755 }
1756
1757 // find minimum and maximum allowed green values of red/blue pixel pairs
1758 const int pad_g1_g3 = 3;
1759 dt_opencl_local_buffer_t locopt_g1_g3
1760 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1761 .cellsize = 1 * sizeof(float), .overhead = 0,
1762 .sizex = 1 << 8, .sizey = 1 << 8 };
1763
1764 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_green_minmax, &locopt_g1_g3))
1765 goto error;
1766
1767 {
1768 size_t sizes[3] = { ROUNDUP(width, locopt_g1_g3.sizex), ROUNDUP(height, locopt_g1_g3.sizey), 1 };
1769 size_t local[3] = { locopt_g1_g3.sizex, locopt_g1_g3.sizey, 1 };
1770 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1771 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 1, sizeof(cl_mem), (void *)&dev_gminmax);
1772 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 2, sizeof(int), (void *)&width);
1773 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 3, sizeof(int), (void *)&height);
1774 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 4, sizeof(int), (void *)&pad_g1_g3);
1775 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 5, sizeof(int), (void *)&roi_in->x);
1776 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 6, sizeof(int), (void *)&roi_in->y);
1777 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 7, 2 * sizeof(char), (void *)sgreen);
1778 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1779 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 9, sizeof(cl_mem), (void *)&dev_allhex);
1781 sizeof(float) * (locopt_g1_g3.sizex + 2*3) * (locopt_g1_g3.sizey + 2*3), NULL);
1783 if(err != CL_SUCCESS) goto error;
1784 }
1785
1786 // interpolate green horizontally, vertically, and along both diagonals
1787 const int pad_g_interp = 3;
1788 dt_opencl_local_buffer_t locopt_g_interp
1789 = (dt_opencl_local_buffer_t){ .xoffset = 2*6, .xfactor = 1, .yoffset = 2*6, .yfactor = 1,
1790 .cellsize = 4 * sizeof(float), .overhead = 0,
1791 .sizex = 1 << 8, .sizey = 1 << 8 };
1792
1793 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_interpolate_green, &locopt_g_interp))
1794 goto error;
1795
1796 {
1797 size_t sizes[3] = { ROUNDUP(width, locopt_g_interp.sizex), ROUNDUP(height, locopt_g_interp.sizey), 1 };
1798 size_t local[3] = { locopt_g_interp.sizex, locopt_g_interp.sizey, 1 };
1799 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1800 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1801 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1802 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1803 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1804 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 5, sizeof(int), (void *)&width);
1805 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 6, sizeof(int), (void *)&height);
1806 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 7, sizeof(int), (void *)&pad_g_interp);
1807 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 8, sizeof(int), (void *)&roi_in->x);
1808 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 9, sizeof(int), (void *)&roi_in->y);
1809 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 10, 2 * sizeof(char), (void *)sgreen);
1810 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1811 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1813 sizeof(float) * 4 * (locopt_g_interp.sizex + 2*6) * (locopt_g_interp.sizey + 2*6), NULL);
1815 if(err != CL_SUCCESS) goto error;
1816 }
1817
1818 // multi-pass loop: one pass for Markesteijn-1 and three passes for Markesteijn-3
1819 for(int pass = 0; pass < passes; pass++)
1820 {
1821
1822 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7] ....
1823 if(pass == 1)
1824 {
1825 for(int c = 0; c < 4; c++)
1826 {
1827 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[c], dev_rgb[c + 4], 0, 0,
1828 sizeof(float) * 4 * width * height);
1829 if(err != CL_SUCCESS) goto error;
1830 }
1831 // ... and process that second set of buffers
1832 dev_rgb += 4;
1833 }
1834
1835 // second and third pass (only Markesteijn-3)
1836 if(pass)
1837 {
1838 // recalculate green from interpolated values of closer pixels
1839 const int pad_g_recalc = 6;
1840 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1841 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1842 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1843 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1844 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1845 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1846 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 5, sizeof(int), (void *)&width);
1847 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 6, sizeof(int), (void *)&height);
1848 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 7, sizeof(int), (void *)&pad_g_recalc);
1849 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 8, sizeof(int), (void *)&roi_in->x);
1850 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 9, sizeof(int), (void *)&roi_in->y);
1851 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 10, 2 * sizeof(char), (void *)sgreen);
1852 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1853 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1855 if(err != CL_SUCCESS) goto error;
1856 }
1857
1858 // interpolate red and blue values for solitary green pixels
1859 const int pad_rb_g = (passes == 1) ? 6 : 5;
1860 dt_opencl_local_buffer_t locopt_rb_g
1861 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1862 .cellsize = 4 * sizeof(float), .overhead = 0,
1863 .sizex = 1 << 8, .sizey = 1 << 8 };
1864
1866 goto error;
1867
1868 cl_mem *dev_trgb = dev_rgb;
1869 for(int d = 0, i = 1, h = 0; d < 6; d++, i ^= 1, h ^= 2)
1870 {
1871 const char dir[2] = { i, i ^ 1 };
1872
1873 // we use dev_aux to transport intermediate results from one loop run to the next
1874 size_t sizes[3] = { ROUNDUP(width, locopt_rb_g.sizex), ROUNDUP(height, locopt_rb_g.sizey), 1 };
1875 size_t local[3] = { locopt_rb_g.sizex, locopt_rb_g.sizey, 1 };
1876 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 0, sizeof(cl_mem), (void *)&dev_trgb[0]);
1877 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 1, sizeof(cl_mem), (void *)&dev_aux);
1878 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 2, sizeof(int), (void *)&width);
1879 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 3, sizeof(int), (void *)&height);
1880 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 4, sizeof(int), (void *)&pad_rb_g);
1881 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 5, sizeof(int), (void *)&roi_in->x);
1882 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 6, sizeof(int), (void *)&roi_in->y);
1883 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 7, sizeof(int), (void *)&d);
1884 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 8, 2 * sizeof(char), (void *)dir);
1885 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 9, sizeof(int), (void *)&h);
1886 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 10, 2 * sizeof(char), (void *)sgreen);
1887 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1889 sizeof(float) * 4 * (locopt_rb_g.sizex + 2*2) * (locopt_rb_g.sizey + 2*2), NULL);
1891 if(err != CL_SUCCESS) goto error;
1892
1893 if((d < 2) || (d & 1)) dev_trgb++;
1894 }
1895
1896 // interpolate red for blue pixels and vice versa
1897 const int pad_rb_br = (passes == 1) ? 6 : 5;
1898 dt_opencl_local_buffer_t locopt_rb_br
1899 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1900 .cellsize = 4 * sizeof(float), .overhead = 0,
1901 .sizex = 1 << 8, .sizey = 1 << 8 };
1902
1903 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_red_and_blue, &locopt_rb_br))
1904 goto error;
1905
1906 for(int d = 0; d < 4; d++)
1907 {
1908 size_t sizes[3] = { ROUNDUP(width, locopt_rb_br.sizex), ROUNDUP(height, locopt_rb_br.sizey), 1 };
1909 size_t local[3] = { locopt_rb_br.sizex, locopt_rb_br.sizey, 1 };
1910 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1911 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 1, sizeof(int), (void *)&width);
1912 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 2, sizeof(int), (void *)&height);
1913 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 3, sizeof(int), (void *)&pad_rb_br);
1914 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 4, sizeof(int), (void *)&roi_in->x);
1915 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 5, sizeof(int), (void *)&roi_in->y);
1916 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 6, sizeof(int), (void *)&d);
1917 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 7, 2 * sizeof(char), (void *)sgreen);
1918 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1920 sizeof(float) * 4 * (locopt_rb_br.sizex + 2*3) * (locopt_rb_br.sizey + 2*3), NULL);
1922 if(err != CL_SUCCESS) goto error;
1923 }
1924
1925 // interpolate red and blue for 2x2 blocks of green
1926 const int pad_g22 = (passes == 1) ? 8 : 4;
1927 dt_opencl_local_buffer_t locopt_g22
1928 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1929 .cellsize = 4 * sizeof(float), .overhead = 0,
1930 .sizex = 1 << 8, .sizey = 1 << 8 };
1931
1933 goto error;
1934
1935 for(int d = 0, n = 0; d < ndir; d += 2, n++)
1936 {
1937 size_t sizes[3] = { ROUNDUP(width, locopt_g22.sizex), ROUNDUP(height, locopt_g22.sizey), 1 };
1938 size_t local[3] = { locopt_g22.sizex, locopt_g22.sizey, 1 };
1939 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 0, sizeof(cl_mem), (void *)&dev_rgb[n]);
1940 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 1, sizeof(int), (void *)&width);
1941 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 2, sizeof(int), (void *)&height);
1942 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 3, sizeof(int), (void *)&pad_g22);
1943 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 4, sizeof(int), (void *)&roi_in->x);
1944 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 5, sizeof(int), (void *)&roi_in->y);
1945 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 6, sizeof(int), (void *)&d);
1946 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 7, 2 * sizeof(char), (void *)sgreen);
1947 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1948 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 9, sizeof(cl_mem), (void *)&dev_allhex);
1950 sizeof(float) * 4 * (locopt_g22.sizex + 2*2) * (locopt_g22.sizey + 2*2), NULL);
1952 if(err != CL_SUCCESS) goto error;
1953 }
1954 }
1955 // end of multi pass
1956
1957 // gminmax data no longer needed
1958 dt_opencl_release_mem_object(dev_gminmax);
1959 dev_gminmax = NULL;
1960
1961 // jump back to the first set of rgb buffers (this is a noop for Markesteijn-1)
1962 dev_rgb = dev_rgbv;
1963
1964 // prepare derivatives buffers
1965 for(int n = 0; n < ndir; n++)
1966 {
1967 dev_drv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * width * height);
1968 if(dev_drv[n] == NULL) goto error;
1969 }
1970
1971 // convert to perceptual colorspace and differentiate in all directions
1972 const int pad_yuv = (passes == 1) ? 8 : 13;
1973 dt_opencl_local_buffer_t locopt_diff
1974 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
1975 .cellsize = 4 * sizeof(float), .overhead = 0,
1976 .sizex = 1 << 8, .sizey = 1 << 8 };
1977
1979 goto error;
1980
1981 for(int d = 0; d < ndir; d++)
1982 {
1983 // convert to perceptual YPbPr colorspace
1984 size_t sizes_yuv[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1985 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1986 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 1, sizeof(cl_mem), (void *)&dev_aux);
1987 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 2, sizeof(int), (void *)&width);
1988 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 3, sizeof(int), (void *)&height);
1989 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 4, sizeof(int), (void *)&pad_yuv);
1991 if(err != CL_SUCCESS) goto error;
1992
1993
1994 // differentiate in all directions
1995 size_t sizes_diff[3] = { ROUNDUP(width, locopt_diff.sizex), ROUNDUP(height, locopt_diff.sizey), 1 };
1996 size_t local_diff[3] = { locopt_diff.sizex, locopt_diff.sizey, 1 };
1997 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 0, sizeof(cl_mem), (void *)&dev_aux);
1998 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 1, sizeof(cl_mem), (void *)&dev_drv[d]);
1999 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 2, sizeof(int), (void *)&width);
2000 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 3, sizeof(int), (void *)&height);
2001 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 4, sizeof(int), (void *)&pad_yuv);
2002 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 5, sizeof(int), (void *)&d);
2004 sizeof(float) * 4 * (locopt_diff.sizex + 2*1) * (locopt_diff.sizey + 2*1), NULL);
2005 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_differentiate, sizes_diff, local_diff);
2006 if(err != CL_SUCCESS) goto error;
2007 }
2008
2009 // reserve buffers for homogeneity maps and sum maps
2010 for(int n = 0; n < ndir; n++)
2011 {
2012 dev_homo[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2013 if(dev_homo[n] == NULL) goto error;
2014
2015 dev_homosum[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2016 if(dev_homosum[n] == NULL) goto error;
2017 }
2018
2019 // get thresholds for homogeneity map (store them in dev_aux)
2020 for(int d = 0; d < ndir; d++)
2021 {
2022 const int pad_homo = (passes == 1) ? 10 : 15;
2023 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2024 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2025 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 1, sizeof(cl_mem), (void *)&dev_aux);
2026 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 2, sizeof(int), (void *)&width);
2027 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 3, sizeof(int), (void *)&height);
2028 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 4, sizeof(int), (void *)&pad_homo);
2029 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 5, sizeof(int), (void *)&d);
2031 if(err != CL_SUCCESS) goto error;
2032 }
2033
2034 // set homogeneity maps
2035 const int pad_homo = (passes == 1) ? 10 : 15;
2036 dt_opencl_local_buffer_t locopt_homo
2037 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
2038 .cellsize = 1 * sizeof(float), .overhead = 0,
2039 .sizex = 1 << 8, .sizey = 1 << 8 };
2040
2041 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_set, &locopt_homo))
2042 goto error;
2043
2044 for(int d = 0; d < ndir; d++)
2045 {
2046 size_t sizes[3] = { ROUNDUP(width, locopt_homo.sizex),ROUNDUP(height, locopt_homo.sizey), 1 };
2047 size_t local[3] = { locopt_homo.sizex, locopt_homo.sizey, 1 };
2048 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2049 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 1, sizeof(cl_mem), (void *)&dev_aux);
2050 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 2, sizeof(cl_mem), (void *)&dev_homo[d]);
2051 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 3, sizeof(int), (void *)&width);
2052 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 4, sizeof(int), (void *)&height);
2053 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 5, sizeof(int), (void *)&pad_homo);
2055 sizeof(float) * (locopt_homo.sizex + 2*1) * (locopt_homo.sizey + 2*1), NULL);
2057 if(err != CL_SUCCESS) goto error;
2058 }
2059
2060 // get rid of dev_drv buffers
2061 for(int n = 0; n < 8; n++)
2062 {
2064 dev_drv[n] = NULL;
2065 }
2066
2067 // build 5x5 sum of homogeneity maps for each pixel and direction
2068 dt_opencl_local_buffer_t locopt_homo_sum
2069 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
2070 .cellsize = 1 * sizeof(float), .overhead = 0,
2071 .sizex = 1 << 8, .sizey = 1 << 8 };
2072
2073 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_sum, &locopt_homo_sum))
2074 goto error;
2075
2076 for(int d = 0; d < ndir; d++)
2077 {
2078 size_t sizes[3] = { ROUNDUP(width, locopt_homo_sum.sizex), ROUNDUP(height, locopt_homo_sum.sizey), 1 };
2079 size_t local[3] = { locopt_homo_sum.sizex, locopt_homo_sum.sizey, 1 };
2080 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 0, sizeof(cl_mem), (void *)&dev_homo[d]);
2081 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 1, sizeof(cl_mem), (void *)&dev_homosum[d]);
2082 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 2, sizeof(int), (void *)&width);
2083 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 3, sizeof(int), (void *)&height);
2084 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 4, sizeof(int), (void *)&pad_tile);
2086 sizeof(char) * (locopt_homo_sum.sizex + 2*2) * (locopt_homo_sum.sizey + 2*2), NULL);
2088 if(err != CL_SUCCESS) goto error;
2089 }
2090
2091 // get maximum of homogeneity maps (store in dev_aux)
2092 for(int d = 0; d < ndir; d++)
2093 {
2094 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2095 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2096 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 1, sizeof(cl_mem), (void *)&dev_aux);
2097 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 2, sizeof(int), (void *)&width);
2098 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 3, sizeof(int), (void *)&height);
2099 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 4, sizeof(int), (void *)&pad_tile);
2100 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 5, sizeof(int), (void *)&d);
2102 if(err != CL_SUCCESS) goto error;
2103 }
2104
2105 {
2106 // adjust maximum value
2107 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2108 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 0, sizeof(cl_mem), (void *)&dev_aux);
2109 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 1, sizeof(int), (void *)&width);
2110 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 2, sizeof(int), (void *)&height);
2111 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 3, sizeof(int), (void *)&pad_tile);
2113 if(err != CL_SUCCESS) goto error;
2114 }
2115
2116 // for Markesteijn-3: use only one of two directions if there is a difference in homogeneity
2117 for(int d = 0; d < ndir - 4; d++)
2118 {
2119 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2120 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2121 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 1, sizeof(cl_mem), (void *)&dev_homosum[d + 4]);
2122 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 2, sizeof(int), (void *)&width);
2123 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 3, sizeof(int), (void *)&height);
2124 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 4, sizeof(int), (void *)&pad_tile);
2126 if(err != CL_SUCCESS) goto error;
2127 }
2128
2129 {
2130 // initialize output buffer to zero
2131 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2132 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 0, sizeof(cl_mem), (void *)&dev_tmp);
2133 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 1, sizeof(int), (void *)&width);
2134 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 2, sizeof(int), (void *)&height);
2135 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 3, sizeof(int), (void *)&pad_tile);
2137 if(err != CL_SUCCESS) goto error;
2138 }
2139
2140 // need to get another temp buffer for the output image (may use the space of dev_drv[] freed earlier)
2141 dev_tmptmp = dt_opencl_alloc_device(devid, (size_t)width, height, sizeof(float) * 4);
2142 if(IS_NULL_PTR(dev_tmptmp)) goto error;
2143
2144 cl_mem dev_t1 = dev_tmp;
2145 cl_mem dev_t2 = dev_tmptmp;
2146
2147 // accumulate all contributions
2148 for(int d = 0; d < ndir; d++)
2149 {
2150 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2151 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 0, sizeof(cl_mem), (void *)&dev_t1);
2152 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 1, sizeof(cl_mem), (void *)&dev_t2);
2153 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 2, sizeof(cl_mem), (void *)&dev_rgbv[d]);
2154 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 3, sizeof(cl_mem), (void *)&dev_homosum[d]);
2155 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 4, sizeof(cl_mem), (void *)&dev_aux);
2156 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 5, sizeof(int), (void *)&width);
2157 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 6, sizeof(int), (void *)&height);
2158 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 7, sizeof(int), (void *)&pad_tile);
2160 if(err != CL_SUCCESS) goto error;
2161
2162 // swap buffers
2163 cl_mem dev_t = dev_t2;
2164 dev_t2 = dev_t1;
2165 dev_t1 = dev_t;
2166 }
2167
2168 // copy output to dev_tmptmp (if not already there)
2169 // note: we need to take swap of buffers into account, so current output lies in dev_t1
2170 if(dev_t1 != dev_tmptmp)
2171 {
2172 size_t origin[] = { 0, 0, 0 };
2173 size_t region[] = { width, height, 1 };
2174 err = dt_opencl_enqueue_copy_image(devid, dev_t1, dev_tmptmp, origin, origin, region);
2175 if(err != CL_SUCCESS) goto error;
2176 }
2177
2178 {
2179 // process the final image
2180 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2181 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 0, sizeof(cl_mem), (void *)&dev_tmptmp);
2182 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 1, sizeof(cl_mem), (void *)&dev_tmp);
2183 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 2, sizeof(int), (void *)&width);
2184 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 3, sizeof(int), (void *)&height);
2185 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 4, sizeof(int), (void *)&pad_tile);
2186 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 5, 4*sizeof(float), (void *)processed_maximum);
2188 if(err != CL_SUCCESS) goto error;
2189 }
2190
2191 // now it's time to get rid of most of the temporary buffers (except of dev_tmp and dev_xtrans)
2192 for(int n = 0; n < 8; n++)
2193 {
2195 dev_rgbv[n] = NULL;
2196 }
2197
2198 for(int n = 0; n < 8; n++)
2199 {
2201 dev_homo[n] = NULL;
2202 }
2203
2204 for(int n = 0; n < 8; n++)
2205 {
2206 dt_opencl_release_mem_object(dev_homosum[n]);
2207 dev_homosum[n] = NULL;
2208 }
2209
2211 dev_aux = NULL;
2212
2213 dt_opencl_release_mem_object(dev_xtrans);
2214 dev_xtrans = NULL;
2215
2216 dt_opencl_release_mem_object(dev_allhex);
2217 dev_allhex = NULL;
2218
2219 dt_opencl_release_mem_object(dev_green_eq);
2220 dev_green_eq = NULL;
2221
2222 dt_opencl_release_mem_object(dev_tmptmp);
2223 dev_tmptmp = NULL;
2224
2225 // take care of image borders. the algorithm above leaves an unprocessed border of pad_tile pixels.
2226 // strategy: take the four edges and process them each with process_vng_cl(). as VNG produces
2227 // an image with a border with only linear interpolation we process edges of pad_tile+3px and
2228 // drop 3px on the inner side if possible
2229
2230 // take care of some degenerate cases (which might happen if we are called in a tiling context)
2231 const int wd = (width > pad_tile+3) ? pad_tile+3 : width;
2232 const int ht = (height > pad_tile+3) ? pad_tile+3 : height;
2233 const int wdc = (wd >= pad_tile+3) ? 3 : 0;
2234 const int htc = (ht >= pad_tile+3) ? 3 : 0;
2235
2236 // the data of all four edges:
2237 // total edge: x-offset, y-offset, width, height,
2238 // after dropping: x-offset adjust, y-offset adjust, width adjust, height adjust
2239 const int edges[4][8] = { { 0, 0, wd, height, 0, 0, -wdc, 0 },
2240 { 0, 0, width, ht, 0, 0, 0, -htc },
2241 { width - wd, 0, wd, height, wdc, 0, -wdc, 0 },
2242 { 0, height - ht, width, ht, 0, htc, 0, -htc } };
2243
2244 for(int n = 0; n < 4; n++)
2245 {
2246 dt_iop_roi_t roi = { roi_in->x + edges[n][0], roi_in->y + edges[n][1], edges[n][2], edges[n][3], 1.0f };
2247
2248 size_t iorigin[] = { edges[n][0], edges[n][1], 0 };
2249 size_t oorigin[] = { 0, 0, 0 };
2250 size_t region[] = { edges[n][2], edges[n][3], 1 };
2251
2252 // reserve input buffer for image edge
2253 dev_edge_in = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float));
2254 if(IS_NULL_PTR(dev_edge_in)) goto error;
2255
2256 // reserve output buffer for VNG processing of edge
2257 dev_edge_out = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float) * 4);
2258 if(IS_NULL_PTR(dev_edge_out)) goto error;
2259
2260 // copy edge to input buffer
2261 err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_edge_in, iorigin, oorigin, region);
2262 if(err != CL_SUCCESS) goto error;
2263
2264 // VNG processing
2265 if(!process_vng_cl(self, pipe, piece, dev_edge_in, dev_edge_out, &roi, &roi, smooth, FALSE))
2266 goto error;
2267
2268 // adjust for "good" part, dropping linear border where possible
2269 iorigin[0] += edges[n][4];
2270 iorigin[1] += edges[n][5];
2271 oorigin[0] += edges[n][4];
2272 oorigin[1] += edges[n][5];
2273 region[0] += edges[n][6];
2274 region[1] += edges[n][7];
2275
2276 // copy output
2277 err = dt_opencl_enqueue_copy_image(devid, dev_edge_out, dev_tmp, oorigin, iorigin, region);
2278 if(err != CL_SUCCESS) goto error;
2279
2280 // release intermediate buffers
2281 dt_opencl_release_mem_object(dev_edge_in);
2282 dt_opencl_release_mem_object(dev_edge_out);
2283 dev_edge_in = dev_edge_out = NULL;
2284 }
2285 // free remaining temporary buffers
2286 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2287 dev_tmp = NULL;
2288
2289 dt_opencl_release_mem_object(dev_xtrans);
2290 dev_xtrans = NULL;
2291
2292
2293 // color smoothing
2294 if(data->color_smoothing)
2295 {
2296 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
2297 goto error;
2298 }
2299
2300 return TRUE;
2301
2302error:
2303 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2304
2305 for(int n = 0; n < 8; n++)
2307 for(int n = 0; n < 8; n++)
2309 for(int n = 0; n < 8; n++)
2311 for(int n = 0; n < 8; n++)
2312 dt_opencl_release_mem_object(dev_homosum[n]);
2313 dt_opencl_release_mem_object(dev_gminmax);
2314 dt_opencl_release_mem_object(dev_tmptmp);
2315 dt_opencl_release_mem_object(dev_xtrans);
2316 dt_opencl_release_mem_object(dev_allhex);
2317 dt_opencl_release_mem_object(dev_green_eq);
2319 dt_opencl_release_mem_object(dev_edge_in);
2320 dt_opencl_release_mem_object(dev_edge_out);
2321 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] couldn't enqueue kernel! %d\n", err);
2322 return FALSE;
2323}
2324
2325#endif // OPENCL
2326
2327#undef PIX_SWAP
2328#undef PIX_SORT
2329#undef CCLIP
2330#undef TS
2331
2332// clang-format off
2333// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
2334// vim: shiftwidth=2 expandtab tabstop=2 cindent
2335// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
2336// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
static int color_smoothing_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_out, const int passes)
Definition basic.c:334
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const float i
Definition colorspaces_inline_conversions.h:440
static dt_aligned_pixel_t rgb
Definition colorspaces_inline_conversions.h:344
const float g
Definition colorspaces_inline_conversions.h:674
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:102
const float d
Definition colorspaces_inline_conversions.h:680
static const float const float const float min
Definition colorspaces_inline_conversions.h:438
const float max
Definition colorspaces_inline_conversions.h:490
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42
const float n
Definition colorspaces_inline_conversions.h:678
const float top
Definition colorspaces_inline_conversions.h:443
static const int row
Definition colorspaces_inline_conversions.h:35
int dt_conf_get_int(const char *name)
Definition control/conf.c:241
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1448
static void memset_zero(void *const buffer, size_t size)
Set the memory buffer to zero as a pack of unsigned char.
Definition darktable.h:880
#define dt_pixelpipe_cache_alloc_perthread(n, objsize, padded_size)
Definition darktable.h:1005
@ DT_DEBUG_OPENCL
Definition darktable.h:721
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
#define __DT_CLONE_TARGETS__
Definition darktable.h:367
#define dt_get_perthread(buf, padsize)
Definition darktable.h:1034
#define __OMP_PARALLEL_FOR__(...)
Definition darktable.h:258
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
Definition darktable.h:281
static int FCxtrans(const int row, const int col, global const unsigned char(*const xtrans)[6])
Definition data/kernels/common.h:54
@ DT_IOP_DEMOSAIC_MARKESTEIJN_3
Definition demosaic.c:125
static const float x
Definition iop_profile.h:235
const float v
Definition iop_profile.h:221
#define TRANSLATE(n, size)
#define CONV_FILT(VAR, FILT)
static int process_markesteijn_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const gboolean smooth)
Definition markesteijn.c:1637
static const short * hexmap(const int row, const int col, short(*const allhex)[3][8])
Definition markesteijn.c:30
#define SQR(x)
Definition markesteijn.c:25
#define TS
Definition markesteijn.c:27
#define PIX_SORT(a, b)
static __DT_CLONE_TARGETS__ void xtrans_markesteijn_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6], const int passes)
Definition markesteijn.c:47
static __DT_CLONE_TARGETS__ void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6])
Definition markesteijn.c:524
#define CLAMPS(A, L, H)
Definition math.h:76
float dt_aligned_pixel_t[4]
Definition noiseprofile.c:28
int dt_opencl_local_buffer_opt(const int devid, const int kernel, dt_opencl_local_buffer_t *factors)
Definition opencl.c:2970
int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.c:1951
void * dt_opencl_alloc_device_buffer(const int devid, const size_t size)
Definition opencl.c:2359
void * dt_opencl_alloc_device(const int devid, const int width, const int height, const int bpp)
Definition opencl.c:2286
void * dt_opencl_copy_host_to_device_constant(const int devid, const size_t size, void *host)
Definition opencl.c:2147
int dt_opencl_enqueue_copy_image(const int devid, cl_mem src, cl_mem dst, size_t *orig_src, size_t *orig_dst, size_t *region)
Definition opencl.c:2076
int dt_opencl_set_kernel_arg(const int dev, const int kernel, const int num, const size_t size, const void *arg)
Definition opencl.c:1942
int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.c:1957
int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, cl_mem src_buffer, cl_mem dst_buffer, size_t srcoffset, size_t dstoffset, size_t size)
Definition opencl.c:2111
void dt_opencl_release_mem_object(cl_mem mem)
Definition opencl.c:2198
#define ROUNDUP(a, n)
Definition opencl.h:78
#define ROUNDUPDHT(a, b)
Definition opencl.h:82
#define ROUNDUPDWD(a, b)
Definition opencl.h:81
#define ABS(n)
Definition polar_decomposition.h:82
Definition pixelpipe_hb.h:96
dt_iop_buffer_dsc_t dsc_in
Definition pixelpipe_hb.h:142
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:97
Definition pixelpipe_hb.h:218
int devid
Definition pixelpipe_hb.h:308
dt_image_t image_storage
Definition develop.h:248
float exif_iso
Definition image.h:254
uint8_t xtrans[6][6]
Definition format.h:70
dt_aligned_pixel_t processed_maximum
Definition format.h:85
Definition demosaic.c:228
uint32_t demosaicing_method
Definition demosaic.c:231
uint32_t color_smoothing
Definition demosaic.c:230
Definition demosaic.c:162
int kernel_markesteijn_accu
Definition demosaic.c:208
int kernel_markesteijn_solitary_green
Definition demosaic.c:195
int kernel_markesteijn_homo_quench
Definition demosaic.c:206
int kernel_markesteijn_initial_copy
Definition demosaic.c:192
int kernel_markesteijn_final
Definition demosaic.c:209
int kernel_markesteijn_homo_max_corr
Definition demosaic.c:205
int kernel_markesteijn_interpolate_green
Definition demosaic.c:194
int kernel_markesteijn_convert_yuv
Definition demosaic.c:199
int kernel_markesteijn_homo_threshold
Definition demosaic.c:201
int kernel_markesteijn_homo_sum
Definition demosaic.c:203
int kernel_markesteijn_green_minmax
Definition demosaic.c:193
int kernel_markesteijn_recalculate_green
Definition demosaic.c:196
int kernel_markesteijn_interpolate_twoxtwo
Definition demosaic.c:198
int kernel_markesteijn_homo_set
Definition demosaic.c:202
int kernel_markesteijn_red_and_blue
Definition demosaic.c:197
int kernel_markesteijn_differentiate
Definition demosaic.c:200
int kernel_markesteijn_homo_max
Definition demosaic.c:204
int kernel_markesteijn_zero
Definition demosaic.c:207
Definition imageop.h:246
struct dt_develop_t * dev
Definition imageop.h:298
dt_iop_global_data_t * global_data
Definition imageop.h:316
Region of interest passed through the pixelpipe.
Definition imageop.h:72
int x
Definition imageop.h:73
int width
Definition imageop.h:73
int height
Definition imageop.h:73
int y
Definition imageop.h:73
Definition opencl.h:269
const int xoffset
Definition opencl.h:270
int sizey
Definition opencl.h:277
int sizex
Definition opencl.h:276
#define MIN(a, b)
Definition thinplate.c:32
static int process_vng_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe, const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const gboolean smooth, const int only_vng_linear)
Definition vng.c:206