Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
markesteijn.c
Go to the documentation of this file.
1
2//
3// x-trans specific demosaicing algorithms
4//
5
6// xtrans_interpolate adapted from dcraw 9.20
7
8#define SQR(x) ((x) * (x))
9// tile size, optimized to keep data in L2 cache
10#define TS 122
11
13static inline const short * hexmap(const int row, const int col, short (*const allhex)[3][8])
14{
15 // Row and column offsets may be negative, but C's modulo function
16 // is not useful here with a negative dividend. To be safe, add a
17 // fairly large multiple of 3. In current code row and col will
18 // never be less than -9 (1-pass) or -14 (3-pass).
19 int irow = row + 600;
20 int icol = col + 600;
21 assert(irow >= 0 && icol >= 0);
22 return allhex[irow % 3][icol % 3];
23}
24
25
26/*
27 Frank Markesteijn's algorithm for Fuji X-Trans sensors
28 */
29static void xtrans_markesteijn_interpolate(float *out, const float *const in,
30 const dt_iop_roi_t *const roi_out,
31 const dt_iop_roi_t *const roi_in,
32 const uint8_t (*const xtrans)[6], const int passes)
33{
34 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
35 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
36 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
37 dir[4] = { 1, TS, TS + 1, TS - 1 };
38
39 short allhex[3][3][8];
40 // sgrow/sgcol is the offset in the sensor matrix of the solitary
41 // green pixels (initialized here only to avoid compiler warning)
42 unsigned short sgrow = 0, sgcol = 0;
43
44 const int width = roi_out->width;
45 const int height = roi_out->height;
46 const int ndir = 4 << (passes > 1);
47
48 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 3) * sizeof(float);
49 size_t padded_buffer_size;
50 char *const all_buffers = (char *)dt_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
51 if(!all_buffers)
52 {
53 printf("[demosaic] not able to allocate Markesteijn buffers\n");
54 return;
55 }
56
57 /* Map a green hexagon around each non-green pixel and vice versa: */
58 for(int row = 0; row < 3; row++)
59 for(int col = 0; col < 3; col++)
60 for(int ng = 0, d = 0; d < 10; d += 2)
61 {
62 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
63 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
64 ng = 0;
65 else
66 ng++;
67 // if there are four non-green pixels adjacent in cardinal
68 // directions, this is the solitary green pixel
69 if(ng == 4)
70 {
71 sgrow = row;
72 sgcol = col;
73 }
74 if(ng == g + 1)
75 for(int c = 0; c < 8; c++)
76 {
77 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
78 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
79 // offset within TSxTS buffer
80 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
81 }
82 }
83
84 // extra passes propagates out errors at edges, hence need more padding
85 const int pad_tile = (passes == 1) ? 12 : 17;
86#ifdef _OPENMP
87#pragma omp parallel for default(none) \
88 dt_omp_firstprivate(all_buffers, padded_buffer_size, dir, height, in, ndir, pad_tile, passes, roi_in, width, xtrans) \
89 shared(sgrow, sgcol, allhex, out) \
90 schedule(static)
91#endif
92 // step through TSxTS cells of image, each tile overlapping the
93 // prior as interpolation needs a substantial border
94 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile*2))
95 {
96 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
97 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
98 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
99 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
100 // note that channels come before tiles to allow for a
101 // vectorization optimization when building drv[] from yuv[]
102 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
103 // drv points to ndir TSxTS tiles, each a single channel of derivatives
104 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
105 // gmin and gmax reuse memory which is used later by yuv buffer;
106 // each points to a TSxTS tile of single channel data
107 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
108 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
109 // homo and homosum reuse memory which is used earlier in the
110 // loop; each points to ndir single-channel TSxTS tiles
111 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
112 uint8_t (*const homosum)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float)
113 + TS * TS * ndir * sizeof(uint8_t));
114
115 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile*2))
116 {
117 int mrow = MIN(top + TS, height + pad_tile);
118 int mcol = MIN(left + TS, width + pad_tile);
119
120 // Copy current tile from in to image buffer. If border goes
121 // beyond edges of image, fill with mirrored/interpolated edges.
122 // The extra border avoids discontinuities at image edges.
123 for(int row = top; row < mrow; row++)
124 for(int col = left; col < mcol; col++)
125 {
126 float(*const pix) = rgb[0][row - top][col - left];
127 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
128 {
129 const int f = FCxtrans(row, col, roi_in, xtrans);
130 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
131 }
132 else
133 {
134 // mirror a border pixel if beyond image edge
135 const int c = FCxtrans(row, col, roi_in, xtrans);
136 for(int cc = 0; cc < 3; cc++)
137 {
138 if(cc != c)
139 pix[cc] = 0.0f;
140 else
141 {
142#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
143 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
144 if(c == FCxtrans(cy, cx, roi_in, xtrans))
145 pix[c] = in[roi_in->width * cy + cx];
146 else
147 {
148 // interpolate if mirror pixel is a different color
149 float sum = 0.0f;
150 uint8_t count = 0;
151 for(int y = row - 1; y <= row + 1; y++)
152 for(int x = col - 1; x <= col + 1; x++)
153 {
154 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
155 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
156 if(ff == c)
157 {
158 sum += in[roi_in->width * yy + xx];
159 count++;
160 }
161 }
162 pix[c] = sum / count;
163 }
164 }
165 }
166 }
167 }
168
169 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
170 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
171
172 // note that successive calculations are inset within the tile
173 // so as to give enough border data, and there needs to be a 6
174 // pixel border initially to allow allhex to find neighboring
175 // pixels
176
177 /* Set green1 and green3 to the minimum and maximum allowed values: */
178 // Run through each red/blue or blue/red pair, setting their g1
179 // and g3 values to the min/max of green pixels surrounding the
180 // pair. Use a 3 pixel border as gmin/gmax is used by
181 // interpolate green which has a 3 pixel border.
182 const int pad_g1_g3 = 3;
183 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
184 {
185 // setting max to 0.0f signifies that this is a new pair, which
186 // requires a new min/max calculation of its neighboring greens
187 float min = FLT_MAX, max = 0.0f;
188 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
189 {
190 // if in row of horizontal red & blue pairs (or processing
191 // vertical red & blue pairs near image bottom), reset min/max
192 // between each pair
193 if(FCxtrans(row, col, roi_in, xtrans) == 1)
194 {
195 min = FLT_MAX, max = 0.0f;
196 continue;
197 }
198 // if at start of red & blue pair, calculate min/max of green
199 // pixels surrounding it; note that while normally using == to
200 // compare floats is suspect, here the check is if 0.0f has
201 // explicitly been assigned to max (which signifies a new
202 // red/blue pair)
203 if(max == 0.0f)
204 {
205 float (*const pix)[3] = &rgb[0][row - top][col - left];
206 const short *const hex = hexmap(row,col,allhex);
207 for(int c = 0; c < 6; c++)
208 {
209 const float val = pix[hex[c]][1];
210 if(min > val) min = val;
211 if(max < val) max = val;
212 }
213 }
214 gmin[row - top][col - left] = min;
215 gmax[row - top][col - left] = max;
216 // handle vertical red/blue pairs
217 switch((row - sgrow) % 3)
218 {
219 // hop down a row to second pixel in vertical pair
220 case 1:
221 if(row < mrow - 4) row++, col--;
222 break;
223 // then if not done with the row hop up and right to next
224 // vertical red/blue pair, resetting min/max
225 case 2:
226 min = FLT_MAX, max = 0.0f;
227 if((col += 2) < mcol - 4 && row > top + 3) row--;
228 }
229 }
230 }
231
232 /* Interpolate green horizontally, vertically, and along both diagonals: */
233 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
234 const int pad_g_interp = 3;
235 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
236 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
237 {
238 float color[8];
239 const int f = FCxtrans(row, col, roi_in, xtrans);
240 if(f == 1) continue;
241 float (*const pix)[3] = &rgb[0][row - top][col - left];
242 const short *const hex = hexmap(row,col,allhex);
243 // TODO: these constants come from integer math constants in
244 // dcraw -- calculate them instead from interpolation math
245 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
246 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
247 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
248 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
249 for(int c = 0; c < 2; c++)
250 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
251 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
252 for(int c = 0; c < 4; c++)
253 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
254 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
255 }
256
257 for(int pass = 0; pass < passes; pass++)
258 {
259 if(pass == 1)
260 {
261 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7],
262 // and process that second set of buffers
263 memcpy(rgb + 4, rgb, sizeof(*rgb) * 4);
264 rgb += 4;
265 }
266
267 /* Recalculate green from interpolated values of closer pixels: */
268 if(pass)
269 {
270 const int pad_g_recalc = 6;
271 for(int row = top + pad_g_recalc; row < mrow - pad_g_recalc; row++)
272 for(int col = left + pad_g_recalc; col < mcol - pad_g_recalc; col++)
273 {
274 const int f = FCxtrans(row, col, roi_in, xtrans);
275 if(f == 1) continue;
276 const short *const hex = hexmap(row,col,allhex);
277 for(int d = 3; d < 6; d++)
278 {
279 float(*rfx)[3] = &rgb[(d - 2) ^ !((row - sgrow) % 3)][row - top][col - left];
280 const float val = rfx[-2 * hex[d]][1]
281 + 2 * rfx[hex[d]][1] - rfx[-2 * hex[d]][f]
282 - 2 * rfx[hex[d]][f] + 3 * rfx[0][f];
283 rfx[0][1] = CLAMPS(val / 3.0f, gmin[row - top][col - left], gmax[row - top][col - left]);
284 }
285 }
286 }
287
288 /* Interpolate red and blue values for solitary green pixels: */
289 const int pad_rb_g = (passes == 1) ? 6 : 5;
290 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
291 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
292 {
293 float(*rfx)[3] = &rgb[0][row - top][col - left];
294 int h = FCxtrans(row, col + 1, roi_in, xtrans);
295 float diff[6] = { 0.0f };
296 // interplated color: first index is red/blue, second is
297 // pass, is double actual result
298 float color[2][6];
299 // Six passes, alternating hori/vert interp (i),
300 // starting with R or B (h) depending on which is closest.
301 // Passes 0,1 to rgb[0], rgb[1] of hori/vert interp. Pass
302 // 3,5 to rgb[2], rgb[3] of best of interp hori/vert
303 // results. Each pass which outputs moves on to the next
304 // rgb[] for input of interp greens.
305 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
306 {
307 // look 1 and 2 pixels distance from solitary green to
308 // red then blue or blue then red
309 for(int c = 0; c < 2; c++, h ^= 2)
310 {
311 // rate of change in greens between current pixel and
312 // interpolated pixels 1 or 2 distant: a quick
313 // derivative which will be divided by two later to be
314 // rate of luminance change for red/blue between known
315 // red/blue neighbors and the current unknown pixel
316 const float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
317 // color is halved before being stored in rgb, hence
318 // this becomes green rate of change plus the average
319 // of the near red or blue pixels on current axis
320 color[h != 0][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
321 // Note that diff will become the slope for both red
322 // and blue differentials in the current direction.
323 // For 2nd and 3rd hori+vert passes, create a sum of
324 // steepness for both cardinal directions.
325 if(d > 1)
326 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h])
327 + SQR(g);
328 }
329 if((d < 2) || (d & 1))
330 { // output for passes 0, 1, 3, 5
331 // for 0, 1 just use hori/vert, for 3, 5 use best of x/y dir
332 const int d_out = d - ((d > 1) && (diff[d-1] < diff[d]));
333 rfx[0][0] = color[0][d_out] / 2.f;
334 rfx[0][2] = color[1][d_out] / 2.f;
335 rfx += TS * TS;
336 }
337 }
338 }
339
340 /* Interpolate red for blue pixels and vice versa: */
341 const int pad_rb_br = (passes == 1) ? 6 : 5;
342 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
343 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
344 {
345 const int f = 2 - FCxtrans(row, col, roi_in, xtrans);
346 if(f == 1) continue;
347 float(*rfx)[3] = &rgb[0][row - top][col - left];
348 const int c = (row - sgrow) % 3 ? TS : 1;
349 const int h = 3 * (c ^ TS ^ 1);
350 for(int d = 0; d < 4; d++, rfx += TS * TS)
351 {
352 const int i = d > 1 || ((d ^ c) & 1) ||
353 ((fabsf(rfx[0][1]-rfx[c][1]) + fabsf(rfx[0][1]-rfx[-c][1])) <
354 2.f*(fabsf(rfx[0][1]-rfx[h][1]) + fabsf(rfx[0][1]-rfx[-h][1]))) ? c:h;
355 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
356 }
357 }
358
359 /* Fill in red and blue for 2x2 blocks of green: */
360 const int pad_g22 = (passes == 1) ? 8 : 4;
361 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
362 {
363 if((row - sgrow) % 3)
364 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
365 if((col - sgcol) % 3)
366 {
367 float(*rfx)[3] = &rgb[0][row - top][col - left];
368 const short *const hex = hexmap(row,col,allhex);
369 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
370 if(hex[d] + hex[d + 1])
371 {
372 const float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
373 for(int c = 0; c < 4; c += 2)
374 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
375 }
376 else
377 {
378 const float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
379 for(int c = 0; c < 4; c += 2)
380 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
381 }
382 }
383 }
384 } // end of multipass loop
385
386 // jump back to the first set of rgb buffers (this is a nop
387 // unless on the second pass)
388 rgb = (float(*)[TS][TS][3])buffer;
389 // from here on out, mainly are working within the current tile
390 // rather than in reference to the image, so don't offset
391 // mrow/mcol by top/left of tile
392 mrow -= top;
393 mcol -= left;
394
395 /* Convert to perceptual colorspace and differentiate in all directions: */
396 // Original dcraw algorithm uses CIELab as perceptual space
397 // (presumably coming from original AHD) and converts taking
398 // camera matrix into account. Now use YPbPr which requires much
399 // less code and is nearly indistinguishable. It assumes the
400 // camera RGB is roughly linear.
401 for(int d = 0; d < ndir; d++)
402 {
403 const int pad_yuv = (passes == 1) ? 8 : 13;
404 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
405 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
406 {
407 const float *rx = rgb[d][row][col];
408 // use ITU-R BT.2020 YPbPr, which is great, but could use
409 // a better/simpler choice? note that imageop.h provides
410 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
411 // which appears less good with specular highlights
412 const float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
413 yuv[0][row][col] = y;
414 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
415 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
416 }
417 // Note that f can offset by a column (-1 or +1) and by a row
418 // (-TS or TS). The row-wise offsets cause the undefined
419 // behavior sanitizer to warn of an out of bounds index, but
420 // as yfx is multi-dimensional and there is sufficient
421 // padding, that is not actually so.
422 const int f = dir[d & 3];
423 const int pad_drv = (passes == 1) ? 9 : 14;
424 for(int row = pad_drv; row < mrow - pad_drv; row++)
425 for(int col = pad_drv; col < mcol - pad_drv; col++)
426 {
427 const float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
428 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
429 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
430 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
431 }
432 }
433
434 /* Build homogeneity maps from the derivatives: */
435 memset_zero(homo, sizeof(uint8_t) * ndir * TS * TS);
436 const int pad_homo = (passes == 1) ? 10 : 15;
437 for(int row = pad_homo; row < mrow - pad_homo; row++)
438 for(int col = pad_homo; col < mcol - pad_homo; col++)
439 {
440 float tr = FLT_MAX;
441 for(int d = 0; d < ndir; d++)
442 if(tr > drv[d][row][col]) tr = drv[d][row][col];
443 tr *= 8;
444 for(int d = 0; d < ndir; d++)
445 for(int v = -1; v <= 1; v++)
446 for(int h = -1; h <= 1; h++)
447 homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
448 }
449
450 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
451 for(int d = 0; d < ndir; d++)
452 for(int row = pad_tile; row < mrow - pad_tile; row++)
453 {
454 // start before first column where homo[d][row][col+2] != 0,
455 // so can know v5sum and homosum[d][row][col] will be 0
456 int col = pad_tile-5;
457 uint8_t v5sum[5] = { 0 };
458 homosum[d][row][col] = 0;
459 // calculate by rolling through column sums
460 for(col++; col < mcol - pad_tile; col++)
461 {
462 uint8_t colsum = 0;
463 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
464 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
465 v5sum[col % 5] = colsum;
466 }
467 }
468
469 /* Average the most homogeneous pixels for the final result: */
470 for(int row = pad_tile; row < mrow - pad_tile; row++)
471 for(int col = pad_tile; col < mcol - pad_tile; col++)
472 {
473 uint8_t hm[8] = { 0 };
474 uint8_t maxval = 0;
475 for(int d = 0; d < ndir; d++)
476 {
477 hm[d] = homosum[d][row][col];
478 maxval = (maxval < hm[d] ? hm[d] : maxval);
479 }
480 maxval -= maxval >> 3;
481 for(int d = 0; d < ndir - 4; d++)
482 {
483 if(hm[d] < hm[d + 4])
484 hm[d] = 0;
485 else if(hm[d] > hm[d + 4])
486 hm[d + 4] = 0;
487 }
488 dt_aligned_pixel_t avg = { 0.0f };
489 for(int d = 0; d < ndir; d++)
490 {
491 if(hm[d] >= maxval)
492 {
493 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
494 avg[3]++;
495 }
496 }
497 for(int c = 0; c < 3; c++)
498 out[4 * (width * (row + top) + col + left) + c] = avg[c]/avg[3];
499 }
500 }
501 }
502 dt_free_align(all_buffers);
503}
504
505#undef TS
506
507#define TS 122
508static void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in,
509 const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in,
510 const uint8_t (*const xtrans)[6])
511{
512
513 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
514 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
515 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
516 dir[4] = { 1, TS, TS + 1, TS - 1 };
517
518 static const float directionality[8] = { 1.0f, 0.0f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f };
519
520 short allhex[3][3][8];
521 // sgrow/sgcol is the offset in the sensor matrix of the solitary
522 // green pixels (initialized here only to avoid compiler warning)
523 unsigned short sgrow = 0, sgcol = 0;
524
525 const int width = roi_out->width;
526 const int height = roi_out->height;
527 static const int ndir = 4;
528
529 static const float complex Minv[3][8] = {
530 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
531 -1.000000e+00f, 7.500000e-01f - 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
532 7.500000e-01f + 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
533 { 1.000000e+00f, -2.000000e-01f + 3.464102e-01f * _Complex_I, 2.000000e-01f + 3.464102e-01f * _Complex_I,
534 8.000000e-01f, 0.0f, 2.000000e-01f - 3.464102e-01f * _Complex_I, 0.0f,
535 -2.000000e-01f - 3.464102e-01f * _Complex_I },
536 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
537 -1.000000e+00f, -7.500000e-01f + 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
538 -7.500000e-01f - 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
539 };
540
541 static const float complex modarr[6][6][8] = {
542 { { 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
543 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
544 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
545 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
546 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
547 -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
548 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
549 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
550 { 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
551 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
552 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
553 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
554 { -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
555 -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f - 1.224647e-16f * _Complex_I,
556 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
557 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
558 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
559 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
560 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
561 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
562 { -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
563 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
564 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
565 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
566 { { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
567 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
568 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
569 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
570 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
571 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
572 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
573 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
574 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
575 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
576 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
577 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
578 { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
579 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
580 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
581 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
582 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
583 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
584 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
585 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
586 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
587 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
588 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
589 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
590 { { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
591 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
592 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
593 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
594 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
595 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
596 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
597 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
598 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
599 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
600 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
601 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
602 { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
603 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
604 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
605 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
606 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
607 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
608 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
609 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
610 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
611 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
612 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
613 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 1.133108e-15f * _Complex_I } },
614 { { -1.000000e+00f + 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
615 -1.000000e+00f - 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
616 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
617 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
618 { 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
619 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
620 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
621 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
622 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
623 -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
624 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
625 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
626 { 1.000000e+00f + 2.449294e-16f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
627 1.000000e+00f + 4.898587e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
628 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
629 1.000000e+00f - 4.898587e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
630 { -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
631 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
632 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
633 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
634 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
635 1.000000e+00f + 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
636 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
637 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
638 { { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
639 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
640 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
641 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
642 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
643 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
644 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
645 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
646 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
647 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
648 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
649 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
650 { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
651 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
652 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
653 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
654 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
655 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
656 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
657 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
658 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
659 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
660 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
661 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
662 { { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
663 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
664 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
665 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
666 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
667 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
668 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
669 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
670 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
671 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
672 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
673 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 1.133108e-15f * _Complex_I },
674 { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 7.347881e-16f * _Complex_I,
675 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
676 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
677 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
678 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
679 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
680 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
681 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
682 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
683 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
684 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
685 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I } },
686 };
687
688 static const float complex harr[4][13][13]
689 = { { { 1.326343e-03f - 1.299441e-18f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
690 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f + 9.920263e-19f * _Complex_I,
691 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
692 1.225183e-03f - 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
693 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f + 7.085902e-19f * _Complex_I,
694 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
695 1.326343e-03f - 6.497206e-19f * _Complex_I },
696 { -1.980815e-03f + 1.698059e-18f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
697 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f - 1.378892e-18f * _Complex_I,
698 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
699 -3.273971e-03f + 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
700 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f - 9.192611e-19f * _Complex_I,
701 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
702 -1.980815e-03f + 7.277398e-19f * _Complex_I },
703 { 1.457023e-03f - 1.070603e-18f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
704 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f + 1.633884e-18f * _Complex_I,
705 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
706 7.074895e-03f - 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
707 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f + 9.803302e-19f * _Complex_I,
708 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
709 1.457023e-03f - 3.568678e-19f * _Complex_I },
710 { -1.017660e-03f + 6.231370e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
711 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f - 1.812375e-18f * _Complex_I,
712 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
713 -1.391431e-02f + 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
714 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f - 9.061876e-19f * _Complex_I,
715 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
716 -1.017660e-03f + 1.246274e-19f * _Complex_I },
717 { 9.198983e-04f - 4.506202e-19f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
718 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f + 1.845735e-18f * _Complex_I,
719 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
720 2.504795e-02f - 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
721 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f + 6.152449e-19f * _Complex_I,
722 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
723 9.198983e-04f + 0.000000e+00f * _Complex_I },
724 { -7.972663e-04f + 2.929109e-19f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
725 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f - 1.648398e-18f * _Complex_I,
726 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
727 -3.906939e-02f + 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
728 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
729 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
730 -7.972663e-04f - 9.763696e-20f * _Complex_I },
731 { 8.625458e-04f - 2.112628e-19f * _Complex_I, 1.431113e-03f - 2.478760e-03f * _Complex_I,
732 -2.310309e-03f - 4.001572e-03f * _Complex_I, -7.706486e-03f + 9.437723e-19f * _Complex_I,
733 -7.220186e-03f + 1.250573e-02f * _Complex_I, 1.587118e-02f + 2.748969e-02f * _Complex_I,
734 4.765675e-02f + 0.000000e+00f * _Complex_I, 1.587118e-02f - 2.748969e-02f * _Complex_I,
735 -7.220186e-03f - 1.250573e-02f * _Complex_I, -7.706486e-03f - 9.437723e-19f * _Complex_I,
736 -2.310309e-03f + 4.001572e-03f * _Complex_I, 1.431113e-03f + 2.478760e-03f * _Complex_I,
737 8.625458e-04f + 2.112628e-19f * _Complex_I },
738 { -7.972663e-04f + 9.763696e-20f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
739 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
740 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
741 -3.906939e-02f - 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
742 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 1.648398e-18f * _Complex_I,
743 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
744 -7.972663e-04f - 2.929109e-19f * _Complex_I },
745 { 9.198983e-04f + 0.000000e+00f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
746 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f - 6.152449e-19f * _Complex_I,
747 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
748 2.504795e-02f + 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
749 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f - 1.845735e-18f * _Complex_I,
750 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
751 9.198983e-04f + 4.506202e-19f * _Complex_I },
752 { -1.017660e-03f - 1.246274e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
753 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f + 9.061876e-19f * _Complex_I,
754 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
755 -1.391431e-02f - 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
756 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f + 1.812375e-18f * _Complex_I,
757 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
758 -1.017660e-03f - 6.231370e-19f * _Complex_I },
759 { 1.457023e-03f + 3.568678e-19f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
760 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f - 9.803302e-19f * _Complex_I,
761 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
762 7.074895e-03f + 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
763 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f - 1.633884e-18f * _Complex_I,
764 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
765 1.457023e-03f + 1.070603e-18f * _Complex_I },
766 { -1.980815e-03f - 7.277398e-19f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
767 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f + 9.192611e-19f * _Complex_I,
768 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
769 -3.273971e-03f - 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
770 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f + 1.378892e-18f * _Complex_I,
771 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
772 -1.980815e-03f - 1.698059e-18f * _Complex_I },
773 { 1.326343e-03f + 6.497206e-19f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
774 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f - 7.085902e-19f * _Complex_I,
775 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
776 1.225183e-03f + 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
777 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f - 9.920263e-19f * _Complex_I,
778 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
779 1.326343e-03f + 1.299441e-18f * _Complex_I } },
780 { { 9.129120e-04f - 8.943958e-19f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
781 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f - 8.514393e-19f * _Complex_I,
782 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
783 2.730635e-03f - 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
784 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f - 2.838131e-19f * _Complex_I,
785 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
786 9.129120e-04f + 0.000000e+00f * _Complex_I },
787 { -5.588854e-04f - 9.680179e-04f * _Complex_I, -6.474856e-04f + 1.121478e-03f * _Complex_I,
788 1.536588e-03f - 1.129066e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
789 -1.541434e-03f + 2.669842e-03f * _Complex_I, 4.379825e-03f - 9.925627e-18f * _Complex_I,
790 -2.394173e-03f - 4.146830e-03f * _Complex_I, -2.189912e-03f + 3.793039e-03f * _Complex_I,
791 3.082869e-03f - 3.493222e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
792 -7.682939e-04f + 1.330724e-03f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
793 -5.588854e-04f - 9.680179e-04f * _Complex_I },
794 { -5.883876e-04f + 1.019117e-03f * _Complex_I, 1.714796e-03f - 1.260012e-18f * _Complex_I,
795 -1.180365e-03f - 2.044451e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
796 4.933362e-03f - 2.416651e-18f * _Complex_I, -3.296542e-03f - 5.709779e-03f * _Complex_I,
797 -3.546477e-03f + 6.142678e-03f * _Complex_I, 6.593085e-03f - 1.614840e-18f * _Complex_I,
798 -2.466681e-03f - 4.272417e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
799 2.360729e-03f + 0.000000e+00f * _Complex_I, -8.573982e-04f - 1.485057e-03f * _Complex_I,
800 -5.883876e-04f + 1.019117e-03f * _Complex_I },
801 { 1.483526e-03f - 1.090077e-18f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
802 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f - 1.936126e-18f * _Complex_I,
803 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
804 1.070518e-02f - 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
805 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
806 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
807 1.483526e-03f + 3.633590e-19f * _Complex_I },
808 { -9.966429e-04f - 1.726236e-03f * _Complex_I, -1.478281e-03f + 2.560458e-03f * _Complex_I,
809 4.306274e-03f - 2.109466e-18f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
810 -5.436890e-03f + 9.416970e-03f * _Complex_I, 1.556418e-02f - 3.812124e-18f * _Complex_I,
811 -8.842875e-03f - 1.531631e-02f * _Complex_I, -7.782088e-03f + 1.347897e-02f * _Complex_I,
812 1.087378e-02f + 0.000000e+00f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
813 -2.153137e-03f + 3.729342e-03f * _Complex_I, 2.956562e-03f + 3.350104e-18f * _Complex_I,
814 -9.966429e-04f - 1.726236e-03f * _Complex_I },
815 { -1.291288e-03f + 2.236576e-03f * _Complex_I, 3.942788e-03f - 8.935208e-18f * _Complex_I,
816 -2.798347e-03f - 4.846880e-03f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
817 1.522441e-02f - 3.728906e-18f * _Complex_I, -1.175443e-02f - 2.035927e-02f * _Complex_I,
818 -1.417872e-02f + 2.455826e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
819 -7.612206e-03f - 1.318473e-02f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
820 5.596695e-03f + 1.370795e-18f * _Complex_I, -1.971394e-03f - 3.414555e-03f * _Complex_I,
821 -1.291288e-03f + 2.236576e-03f * _Complex_I },
822 { 2.779286e-03f - 1.361458e-18f * _Complex_I, -2.194126e-03f - 3.800338e-03f * _Complex_I,
823 -3.057720e-03f + 5.296126e-03f * _Complex_I, 9.725261e-03f - 2.382002e-18f * _Complex_I,
824 -8.649261e-03f - 1.498096e-02f * _Complex_I, -1.417667e-02f + 2.455472e-02f * _Complex_I,
825 3.552610e-02f + 0.000000e+00f * _Complex_I, -1.417667e-02f - 2.455472e-02f * _Complex_I,
826 -8.649261e-03f + 1.498096e-02f * _Complex_I, 9.725261e-03f + 2.382002e-18f * _Complex_I,
827 -3.057720e-03f - 5.296126e-03f * _Complex_I, -2.194126e-03f + 3.800338e-03f * _Complex_I,
828 2.779286e-03f + 1.361458e-18f * _Complex_I },
829 { -1.291288e-03f - 2.236576e-03f * _Complex_I, -1.971394e-03f + 3.414555e-03f * _Complex_I,
830 5.596695e-03f - 1.370795e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
831 -7.612206e-03f + 1.318473e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
832 -1.417872e-02f - 2.455826e-02f * _Complex_I, -1.175443e-02f + 2.035927e-02f * _Complex_I,
833 1.522441e-02f + 3.728906e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
834 -2.798347e-03f + 4.846880e-03f * _Complex_I, 3.942788e-03f + 8.935208e-18f * _Complex_I,
835 -1.291288e-03f - 2.236576e-03f * _Complex_I },
836 { -9.966429e-04f + 1.726236e-03f * _Complex_I, 2.956562e-03f - 3.350104e-18f * _Complex_I,
837 -2.153137e-03f - 3.729342e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
838 1.087378e-02f + 0.000000e+00f * _Complex_I, -7.782088e-03f - 1.347897e-02f * _Complex_I,
839 -8.842875e-03f + 1.531631e-02f * _Complex_I, 1.556418e-02f + 3.812124e-18f * _Complex_I,
840 -5.436890e-03f - 9.416970e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
841 4.306274e-03f + 2.109466e-18f * _Complex_I, -1.478281e-03f - 2.560458e-03f * _Complex_I,
842 -9.966429e-04f + 1.726236e-03f * _Complex_I },
843 { 1.483526e-03f - 3.633590e-19f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
844 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
845 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
846 1.070518e-02f + 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
847 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 1.936126e-18f * _Complex_I,
848 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
849 1.483526e-03f + 1.090077e-18f * _Complex_I },
850 { -5.883876e-04f - 1.019117e-03f * _Complex_I, -8.573982e-04f + 1.485057e-03f * _Complex_I,
851 2.360729e-03f + 0.000000e+00f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
852 -2.466681e-03f + 4.272417e-03f * _Complex_I, 6.593085e-03f + 1.614840e-18f * _Complex_I,
853 -3.546477e-03f - 6.142678e-03f * _Complex_I, -3.296542e-03f + 5.709779e-03f * _Complex_I,
854 4.933362e-03f + 2.416651e-18f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
855 -1.180365e-03f + 2.044451e-03f * _Complex_I, 1.714796e-03f + 1.260012e-18f * _Complex_I,
856 -5.883876e-04f - 1.019117e-03f * _Complex_I },
857 { -5.588854e-04f + 9.680179e-04f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
858 -7.682939e-04f - 1.330724e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
859 3.082869e-03f + 3.493222e-18f * _Complex_I, -2.189912e-03f - 3.793039e-03f * _Complex_I,
860 -2.394173e-03f + 4.146830e-03f * _Complex_I, 4.379825e-03f + 9.925627e-18f * _Complex_I,
861 -1.541434e-03f - 2.669842e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
862 1.536588e-03f + 1.129066e-18f * _Complex_I, -6.474856e-04f - 1.121478e-03f * _Complex_I,
863 -5.588854e-04f + 9.680179e-04f * _Complex_I },
864 { 9.129120e-04f + 0.000000e+00f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
865 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f + 2.838131e-19f * _Complex_I,
866 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
867 2.730635e-03f + 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
868 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f + 8.514393e-19f * _Complex_I,
869 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
870 9.129120e-04f + 8.943958e-19f * _Complex_I } },
871 { { 8.228091e-04f + 0.000000e+00f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
872 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f - 3.061346e-19f * _Complex_I,
873 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
874 2.080486e-03f - 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
875 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f - 9.184039e-19f * _Complex_I,
876 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
877 8.228091e-04f - 8.061204e-19f * _Complex_I },
878 { -5.616336e-04f - 9.727779e-04f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
879 -8.694311e-04f + 1.505899e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
880 2.446785e-03f - 2.772471e-18f * _Complex_I, -1.605471e-03f + 2.780758e-03f * _Complex_I,
881 -1.832781e-03f - 3.174469e-03f * _Complex_I, 3.210942e-03f - 7.276687e-18f * _Complex_I,
882 -1.223392e-03f + 2.118978e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
883 1.738862e-03f - 1.277695e-18f * _Complex_I, -6.914471e-04f + 1.197621e-03f * _Complex_I,
884 -5.616336e-04f - 9.727779e-04f * _Complex_I },
885 { -5.723872e-04f + 9.914038e-04f * _Complex_I, -8.302721e-04f - 1.438073e-03f * _Complex_I,
886 2.445280e-03f + 0.000000e+00f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
887 -1.882898e-03f - 3.261274e-03f * _Complex_I, 4.921549e-03f - 1.205432e-18f * _Complex_I,
888 -2.760152e-03f + 4.780723e-03f * _Complex_I, -2.460774e-03f - 4.262186e-03f * _Complex_I,
889 3.765795e-03f - 1.844708e-18f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
890 -1.222640e-03f - 2.117675e-03f * _Complex_I, 1.660544e-03f - 1.220148e-18f * _Complex_I,
891 -5.723872e-04f + 9.914038e-04f * _Complex_I },
892 { 1.226482e-03f + 3.004015e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
893 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
894 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
895 9.412791e-03f - 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
896 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f - 1.877877e-18f * _Complex_I,
897 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
898 1.226482e-03f - 9.012046e-19f * _Complex_I },
899 { -9.898007e-04f - 1.714385e-03f * _Complex_I, 3.215120e-03f + 3.643077e-18f * _Complex_I,
900 -2.507621e-03f + 4.343327e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
901 1.105198e-02f + 0.000000e+00f * _Complex_I, -7.691179e-03f + 1.332151e-02f * _Complex_I,
902 -8.705793e-03f - 1.507888e-02f * _Complex_I, 1.538236e-02f - 3.767591e-18f * _Complex_I,
903 -5.525988e-03f + 9.571292e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
904 5.015242e-03f - 2.456760e-18f * _Complex_I, -1.607560e-03f + 2.784375e-03f * _Complex_I,
905 -9.898007e-04f - 1.714385e-03f * _Complex_I },
906 { -1.414655e-03f + 2.450254e-03f * _Complex_I, -2.341263e-03f - 4.055186e-03f * _Complex_I,
907 6.915775e-03f + 1.693876e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
908 -8.062191e-03f - 1.396412e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
909 -1.451128e-02f + 2.513428e-02f * _Complex_I, -1.207667e-02f - 2.091740e-02f * _Complex_I,
910 1.612438e-02f - 3.949335e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
911 -3.457887e-03f - 5.989237e-03f * _Complex_I, 4.682526e-03f - 1.061161e-17f * _Complex_I,
912 -1.414655e-03f + 2.450254e-03f * _Complex_I },
913 { 3.039574e-03f + 1.488962e-18f * _Complex_I, -2.598226e-03f + 4.500260e-03f * _Complex_I,
914 -3.750909e-03f - 6.496765e-03f * _Complex_I, 1.119776e-02f + 2.742661e-18f * _Complex_I,
915 -9.210579e-03f + 1.595319e-02f * _Complex_I, -1.464762e-02f - 2.537042e-02f * _Complex_I,
916 3.672076e-02f + 0.000000e+00f * _Complex_I, -1.464762e-02f + 2.537042e-02f * _Complex_I,
917 -9.210579e-03f - 1.595319e-02f * _Complex_I, 1.119776e-02f - 2.742661e-18f * _Complex_I,
918 -3.750909e-03f + 6.496765e-03f * _Complex_I, -2.598226e-03f - 4.500260e-03f * _Complex_I,
919 3.039574e-03f - 1.488962e-18f * _Complex_I },
920 { -1.414655e-03f - 2.450254e-03f * _Complex_I, 4.682526e-03f + 1.061161e-17f * _Complex_I,
921 -3.457887e-03f + 5.989237e-03f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
922 1.612438e-02f + 3.949335e-18f * _Complex_I, -1.207667e-02f + 2.091740e-02f * _Complex_I,
923 -1.451128e-02f - 2.513428e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
924 -8.062191e-03f + 1.396412e-02f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
925 6.915775e-03f - 1.693876e-18f * _Complex_I, -2.341263e-03f + 4.055186e-03f * _Complex_I,
926 -1.414655e-03f - 2.450254e-03f * _Complex_I },
927 { -9.898007e-04f + 1.714385e-03f * _Complex_I, -1.607560e-03f - 2.784375e-03f * _Complex_I,
928 5.015242e-03f + 2.456760e-18f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
929 -5.525988e-03f - 9.571292e-03f * _Complex_I, 1.538236e-02f + 3.767591e-18f * _Complex_I,
930 -8.705793e-03f + 1.507888e-02f * _Complex_I, -7.691179e-03f - 1.332151e-02f * _Complex_I,
931 1.105198e-02f + 0.000000e+00f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
932 -2.507621e-03f - 4.343327e-03f * _Complex_I, 3.215120e-03f - 3.643077e-18f * _Complex_I,
933 -9.898007e-04f + 1.714385e-03f * _Complex_I },
934 { 1.226482e-03f + 9.012046e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
935 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 1.877877e-18f * _Complex_I,
936 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
937 9.412791e-03f + 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
938 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
939 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
940 1.226482e-03f - 3.004015e-19f * _Complex_I },
941 { -5.723872e-04f - 9.914038e-04f * _Complex_I, 1.660544e-03f + 1.220148e-18f * _Complex_I,
942 -1.222640e-03f + 2.117675e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
943 3.765795e-03f + 1.844708e-18f * _Complex_I, -2.460774e-03f + 4.262186e-03f * _Complex_I,
944 -2.760152e-03f - 4.780723e-03f * _Complex_I, 4.921549e-03f + 1.205432e-18f * _Complex_I,
945 -1.882898e-03f + 3.261274e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
946 2.445280e-03f + 0.000000e+00f * _Complex_I, -8.302721e-04f + 1.438073e-03f * _Complex_I,
947 -5.723872e-04f - 9.914038e-04f * _Complex_I },
948 { -5.616336e-04f + 9.727779e-04f * _Complex_I, -6.914471e-04f - 1.197621e-03f * _Complex_I,
949 1.738862e-03f + 1.277695e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
950 -1.223392e-03f - 2.118978e-03f * _Complex_I, 3.210942e-03f + 7.276687e-18f * _Complex_I,
951 -1.832781e-03f + 3.174469e-03f * _Complex_I, -1.605471e-03f - 2.780758e-03f * _Complex_I,
952 2.446785e-03f + 2.772471e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
953 -8.694311e-04f - 1.505899e-03f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
954 -5.616336e-04f + 9.727779e-04f * _Complex_I },
955 { 8.228091e-04f + 8.061204e-19f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
956 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f + 9.184039e-19f * _Complex_I,
957 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
958 2.080486e-03f + 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
959 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f + 3.061346e-19f * _Complex_I,
960 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
961 8.228091e-04f + 0.000000e+00f * _Complex_I } },
962 { { 1.221201e-03f + 5.982162e-19f * _Complex_I, -1.773498e-03f - 6.515727e-19f * _Complex_I,
963 1.246697e-03f + 3.053526e-19f * _Complex_I, -8.215306e-04f - 1.006085e-19f * _Complex_I,
964 7.609372e-04f + 0.000000e+00f * _Complex_I, -4.863927e-04f + 5.956592e-20f * _Complex_I,
965 4.882100e-04f - 1.195770e-19f * _Complex_I, -4.863927e-04f + 1.786978e-19f * _Complex_I,
966 7.609372e-04f - 3.727517e-19f * _Complex_I, -8.215306e-04f + 5.030424e-19f * _Complex_I,
967 1.246697e-03f - 9.160579e-19f * _Complex_I, -1.773498e-03f + 1.520336e-18f * _Complex_I,
968 1.221201e-03f - 1.196432e-18f * _Complex_I },
969 { 7.406884e-04f - 1.282910e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
970 7.186273e-04f - 1.244699e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
971 5.908383e-04f - 1.023362e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
972 1.432695e-03f - 2.481501e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
973 5.908383e-04f - 1.023362e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
974 7.186273e-04f - 1.244699e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
975 7.406884e-04f - 1.282910e-03f * _Complex_I },
976 { -7.162255e-04f - 1.240539e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
977 -6.705589e-04f - 1.161442e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
978 -1.165433e-03f - 2.018589e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
979 -2.297663e-03f - 3.979669e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
980 -1.165433e-03f - 2.018589e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
981 -6.705589e-04f - 1.161442e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
982 -7.162255e-04f - 1.240539e-03f * _Complex_I },
983 { -1.280260e-03f - 7.839331e-19f * _Complex_I, 1.987108e-03f + 9.734024e-19f * _Complex_I,
984 -2.614019e-03f - 9.603749e-19f * _Complex_I, 3.635167e-03f + 8.903590e-19f * _Complex_I,
985 -4.954867e-03f - 6.067962e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
986 -7.600546e-03f + 9.307984e-19f * _Complex_I, 6.653220e-03f - 1.629569e-18f * _Complex_I,
987 -4.954867e-03f + 1.820389e-18f * _Complex_I, 3.635167e-03f - 1.780718e-18f * _Complex_I,
988 -2.614019e-03f + 1.600625e-18f * _Complex_I, 1.987108e-03f - 1.460104e-18f * _Complex_I,
989 -1.280260e-03f + 1.097506e-18f * _Complex_I },
990 { -5.756945e-04f + 9.971322e-04f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
991 -2.421407e-03f + 4.194000e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
992 -5.527367e-03f + 9.573681e-03f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
993 -7.288212e-03f + 1.262355e-02f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
994 -5.527367e-03f + 9.573681e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
995 -2.421407e-03f + 4.194000e-03f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
996 -5.756945e-04f + 9.971322e-04f * _Complex_I },
997 { 7.349896e-04f + 1.273039e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
998 3.332671e-03f + 5.772355e-03f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
999 9.842376e-03f + 1.704749e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1000 1.598601e-02f + 2.768858e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1001 9.842376e-03f + 1.704749e-02f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
1002 3.332671e-03f + 5.772355e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
1003 7.349896e-04f + 1.273039e-03f * _Complex_I },
1004 { 1.400383e-03f + 1.028985e-18f * _Complex_I, -3.545886e-03f - 2.171229e-18f * _Complex_I,
1005 7.289370e-03f + 3.570761e-18f * _Complex_I, -1.418908e-02f - 5.212982e-18f * _Complex_I,
1006 2.520839e-02f + 6.174275e-18f * _Complex_I, -3.934772e-02f - 4.818706e-18f * _Complex_I,
1007 4.797481e-02f + 0.000000e+00f * _Complex_I, -3.934772e-02f + 4.818706e-18f * _Complex_I,
1008 2.520839e-02f - 6.174275e-18f * _Complex_I, -1.418908e-02f + 5.212982e-18f * _Complex_I,
1009 7.289370e-03f - 3.570761e-18f * _Complex_I, -3.545886e-03f + 2.171229e-18f * _Complex_I,
1010 1.400383e-03f - 1.028985e-18f * _Complex_I },
1011 { 7.349896e-04f - 1.273039e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1012 3.332671e-03f - 5.772355e-03f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1013 9.842376e-03f - 1.704749e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1014 1.598601e-02f - 2.768858e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1015 9.842376e-03f - 1.704749e-02f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1016 3.332671e-03f - 5.772355e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1017 7.349896e-04f - 1.273039e-03f * _Complex_I },
1018 { -5.756945e-04f - 9.971322e-04f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1019 -2.421407e-03f - 4.194000e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1020 -5.527367e-03f - 9.573681e-03f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1021 -7.288212e-03f - 1.262355e-02f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1022 -5.527367e-03f - 9.573681e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1023 -2.421407e-03f - 4.194000e-03f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1024 -5.756945e-04f - 9.971322e-04f * _Complex_I },
1025 { -1.280260e-03f - 1.097506e-18f * _Complex_I, 1.987108e-03f + 1.460104e-18f * _Complex_I,
1026 -2.614019e-03f - 1.600625e-18f * _Complex_I, 3.635167e-03f + 1.780718e-18f * _Complex_I,
1027 -4.954867e-03f - 1.820389e-18f * _Complex_I, 6.653220e-03f + 1.629569e-18f * _Complex_I,
1028 -7.600546e-03f - 9.307984e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
1029 -4.954867e-03f + 6.067962e-19f * _Complex_I, 3.635167e-03f - 8.903590e-19f * _Complex_I,
1030 -2.614019e-03f + 9.603749e-19f * _Complex_I, 1.987108e-03f - 9.734024e-19f * _Complex_I,
1031 -1.280260e-03f + 7.839331e-19f * _Complex_I },
1032 { -7.162255e-04f + 1.240539e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1033 -6.705589e-04f + 1.161442e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1034 -1.165433e-03f + 2.018589e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1035 -2.297663e-03f + 3.979669e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1036 -1.165433e-03f + 2.018589e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1037 -6.705589e-04f + 1.161442e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1038 -7.162255e-04f + 1.240539e-03f * _Complex_I },
1039 { 7.406884e-04f + 1.282910e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1040 7.186273e-04f + 1.244699e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1041 5.908383e-04f + 1.023362e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1042 1.432695e-03f + 2.481501e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1043 5.908383e-04f + 1.023362e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1044 7.186273e-04f + 1.244699e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1045 7.406884e-04f + 1.282910e-03f * _Complex_I },
1046 { 1.221201e-03f + 1.196432e-18f * _Complex_I, -1.773498e-03f - 1.520336e-18f * _Complex_I,
1047 1.246697e-03f + 9.160579e-19f * _Complex_I, -8.215306e-04f - 5.030424e-19f * _Complex_I,
1048 7.609372e-04f + 3.727517e-19f * _Complex_I, -4.863927e-04f - 1.786978e-19f * _Complex_I,
1049 4.882100e-04f + 1.195770e-19f * _Complex_I, -4.863927e-04f - 5.956592e-20f * _Complex_I,
1050 7.609372e-04f + 0.000000e+00f * _Complex_I, -8.215306e-04f + 1.006085e-19f * _Complex_I,
1051 1.246697e-03f - 3.053526e-19f * _Complex_I, -1.773498e-03f + 6.515727e-19f * _Complex_I,
1052 1.221201e-03f - 5.982162e-19f * _Complex_I } } };
1053
1054 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 7) * sizeof(float);
1055 size_t padded_buffer_size;
1056 char *const all_buffers = (char *)dt_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
1057 if(!all_buffers)
1058 {
1059 fprintf(stderr, "[demosaic] not able to allocate FDC base buffers\n");
1060 return;
1061 }
1062
1063 /* Map a green hexagon around each non-green pixel and vice versa: */
1064 for(int row = 0; row < 3; row++)
1065 for(int col = 0; col < 3; col++)
1066 for(int ng = 0, d = 0; d < 10; d += 2)
1067 {
1068 int g = FCxtrans(row, col, NULL, xtrans) == 1;
1069 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
1070 ng = 0;
1071 else
1072 ng++;
1073 // if there are four non-green pixels adjacent in cardinal
1074 // directions, this is the solitary green pixel
1075 if(ng == 4)
1076 {
1077 sgrow = row;
1078 sgcol = col;
1079 }
1080 if(ng == g + 1)
1081 for(int c = 0; c < 8; c++)
1082 {
1083 int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1084 int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1085 // offset within TSxTS buffer
1086 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
1087 }
1088 }
1089
1090 // extra passes propagates out errors at edges, hence need more padding
1091 const int pad_tile = 13;
1092
1093 // calculate offsets for this roi
1094 int rowoffset = 0;
1095 int coloffset = 0;
1096 for(int row = 0; row < 6; row++)
1097 {
1098 if(!((row - sgrow) % 3))
1099 {
1100 for(int col = 0; col < 6; col++)
1101 {
1102 if(!((col - sgcol) % 3) && (FCxtrans(row, col + 1, roi_in, xtrans) == 0))
1103 {
1104 rowoffset = 37 - row - pad_tile; // 1 plus a generous multiple of 6
1105 coloffset = 37 - col - pad_tile; // to avoid that this value gets negative
1106 break;
1107 }
1108 }
1109 break;
1110 }
1111 }
1112
1113 // depending on the iso, use either a hybrid approach for chroma, or pure fdc
1114 float hybrid_fdc[2] = { 1.0f, 0.0f };
1115 const int xover_iso = dt_conf_get_int("plugins/darkroom/demosaic/fdc_xover_iso");
1116 int iso = self->dev->image_storage.exif_iso;
1117 if(iso > xover_iso)
1118 {
1119 hybrid_fdc[0] = 0.0f;
1120 hybrid_fdc[1] = 1.0f;
1121 }
1122
1123#ifdef _OPENMP
1124#pragma omp parallel for default(none) \
1125 dt_omp_firstprivate(ndir, all_buffers, dir, directionality, harr, height, in, Minv, modarr, roi_in, width, \
1126 xtrans, pad_tile, padded_buffer_size) \
1127 shared(sgrow, sgcol, allhex, out, rowoffset, coloffset, hybrid_fdc) schedule(static)
1128#endif
1129 // step through TSxTS cells of image, each tile overlapping the
1130 // prior as interpolation needs a substantial border
1131 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile * 2))
1132 {
1133 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
1134 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
1135 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
1136 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
1137 // note that channels come before tiles to allow for a
1138 // vectorization optimization when building drv[] from yuv[]
1139 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1140 // drv points to ndir TSxTS tiles, each a single channel of derivatives
1141 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
1142 // gmin and gmax reuse memory which is used later by yuv buffer;
1143 // each points to a TSxTS tile of single channel data
1144 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1145 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
1146 // homo and homosum reuse memory which is used earlier in the
1147 // loop; each points to ndir single-channel TSxTS tiles
1148 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1149 uint8_t (*const homosum)[TS][TS]
1150 = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float) + TS * TS * ndir * sizeof(uint8_t));
1151 // append all fdc related buffers
1152 float complex *fdc_buf_start = (float complex *)(buffer + TS * TS * (ndir * 4 + 3) * sizeof(float));
1153 const int fdc_buf_size = TS * TS;
1154 float(*const i_src) = (float *)fdc_buf_start;
1155 float complex(*const o_src) = fdc_buf_start + fdc_buf_size;
1156 // by the time the chroma values are calculated, o_src can be overwritten.
1157 float(*const fdc_chroma) = (float *)o_src;
1158
1159 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile * 2))
1160 {
1161 int mrow = MIN(top + TS, height + pad_tile);
1162 int mcol = MIN(left + TS, width + pad_tile);
1163
1164 // Copy current tile from in to image buffer. If border goes
1165 // beyond edges of image, fill with mirrored/interpolated edges.
1166 // The extra border avoids discontinuities at image edges.
1167 for(int row = top; row < mrow; row++)
1168 for(int col = left; col < mcol; col++)
1169 {
1170 float(*const pix) = rgb[0][row - top][col - left];
1171 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
1172 {
1173 const int f = FCxtrans(row, col, roi_in, xtrans);
1174 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
1175 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * row + col];
1176 }
1177 else
1178 {
1179 // mirror a border pixel if beyond image edge
1180 const int c = FCxtrans(row, col, roi_in, xtrans);
1181 for(int cc = 0; cc < 3; cc++)
1182 if(cc != c)
1183 pix[cc] = 0.0f;
1184 else
1185 {
1186#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
1187 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
1188 if(c == FCxtrans(cy, cx, roi_in, xtrans))
1189 {
1190 pix[c] = in[roi_in->width * cy + cx];
1191 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * cy + cx];
1192 }
1193 else
1194 {
1195 // interpolate if mirror pixel is a different color
1196 float sum = 0.0f;
1197 uint8_t count = 0;
1198 for(int y = row - 1; y <= row + 1; y++)
1199 for(int x = col - 1; x <= col + 1; x++)
1200 {
1201 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
1202 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
1203 if(ff == c)
1204 {
1205 sum += in[roi_in->width * yy + xx];
1206 count++;
1207 }
1208 }
1209 pix[c] = sum / count;
1210 *(i_src + TS * (row - top) + (col - left)) = pix[c];
1211 }
1212 }
1213 }
1214 }
1215
1216 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
1217 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
1218
1219 // note that successive calculations are inset within the tile
1220 // so as to give enough border data, and there needs to be a 6
1221 // pixel border initially to allow allhex to find neighboring
1222 // pixels
1223
1224 /* Set green1 and green3 to the minimum and maximum allowed values: */
1225 // Run through each red/blue or blue/red pair, setting their g1
1226 // and g3 values to the min/max of green pixels surrounding the
1227 // pair. Use a 3 pixel border as gmin/gmax is used by
1228 // interpolate green which has a 3 pixel border.
1229 const int pad_g1_g3 = 3;
1230 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
1231 {
1232 // setting max to 0.0f signifies that this is a new pair, which
1233 // requires a new min/max calculation of its neighboring greens
1234 float min = FLT_MAX, max = 0.0f;
1235 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
1236 {
1237 // if in row of horizontal red & blue pairs (or processing
1238 // vertical red & blue pairs near image bottom), reset min/max
1239 // between each pair
1240 if(FCxtrans(row, col, roi_in, xtrans) == 1)
1241 {
1242 min = FLT_MAX, max = 0.0f;
1243 continue;
1244 }
1245 // if at start of red & blue pair, calculate min/max of green
1246 // pixels surrounding it; note that while normally using == to
1247 // compare floats is suspect, here the check is if 0.0f has
1248 // explicitly been assigned to max (which signifies a new
1249 // red/blue pair)
1250 if(max == 0.0f)
1251 {
1252 float (*const pix)[3] = &rgb[0][row - top][col - left];
1253 const short *const hex = hexmap(row, col, allhex);
1254 for(int c = 0; c < 6; c++)
1255 {
1256 const float val = pix[hex[c]][1];
1257 if(min > val) min = val;
1258 if(max < val) max = val;
1259 }
1260 }
1261 gmin[row - top][col - left] = min;
1262 gmax[row - top][col - left] = max;
1263 // handle vertical red/blue pairs
1264 switch((row - sgrow) % 3)
1265 {
1266 // hop down a row to second pixel in vertical pair
1267 case 1:
1268 if(row < mrow - 4) row++, col--;
1269 break;
1270 // then if not done with the row hop up and right to next
1271 // vertical red/blue pair, resetting min/max
1272 case 2:
1273 min = FLT_MAX, max = 0.0f;
1274 if((col += 2) < mcol - 4 && row > top + 3) row--;
1275 }
1276 }
1277 }
1278
1279 /* Interpolate green horizontally, vertically, and along both diagonals: */
1280 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
1281 const int pad_g_interp = 3;
1282 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
1283 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
1284 {
1285 float color[8];
1286 int f = FCxtrans(row, col, roi_in, xtrans);
1287 if(f == 1) continue;
1288 float (*const pix)[3] = &rgb[0][row - top][col - left];
1289 const short *const hex = hexmap(row, col, allhex);
1290 // TODO: these constants come from integer math constants in
1291 // dcraw -- calculate them instead from interpolation math
1292 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
1293 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
1294 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
1295 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
1296 for(int c = 0; c < 2; c++)
1297 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
1298 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
1299 for(int c = 0; c < 4; c++)
1300 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
1301 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
1302 }
1303
1304 /* Interpolate red and blue values for solitary green pixels: */
1305 const int pad_rb_g = 6;
1306 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
1307 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
1308 {
1309 float(*rfx)[3] = &rgb[0][row - top][col - left];
1310 int h = FCxtrans(row, col + 1, roi_in, xtrans);
1311 float diff[6] = { 0.0f };
1312 float color[3][8];
1313 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
1314 {
1315 for(int c = 0; c < 2; c++, h ^= 2)
1316 {
1317 float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
1318 color[h][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
1319 if(d > 1)
1320 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h]) + SQR(g);
1321 }
1322 if(d > 1 && (d & 1))
1323 if(diff[d - 1] < diff[d])
1324 for(int c = 0; c < 2; c++) color[c * 2][d] = color[c * 2][d - 1];
1325 if(d < 2 || (d & 1))
1326 {
1327 for(int c = 0; c < 2; c++) rfx[0][c * 2] = color[c * 2][d] / 2.f;
1328 rfx += TS * TS;
1329 }
1330 }
1331 }
1332
1333 /* Interpolate red for blue pixels and vice versa: */
1334 const int pad_rb_br = 6;
1335 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
1336 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
1337 {
1338 int f = 2 - FCxtrans(row, col, roi_in, xtrans);
1339 if(f == 1) continue;
1340 float(*rfx)[3] = &rgb[0][row - top][col - left];
1341 int c = (row - sgrow) % 3 ? TS : 1;
1342 int h = 3 * (c ^ TS ^ 1);
1343 for(int d = 0; d < 4; d++, rfx += TS * TS)
1344 {
1345 int i = d > 1 || ((d ^ c) & 1)
1346 || ((fabsf(rfx[0][1] - rfx[c][1]) + fabsf(rfx[0][1] - rfx[-c][1]))
1347 < 2.f * (fabsf(rfx[0][1] - rfx[h][1]) + fabsf(rfx[0][1] - rfx[-h][1]))) ? c : h;
1348 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
1349 }
1350 }
1351
1352 /* Fill in red and blue for 2x2 blocks of green: */
1353 const int pad_g22 = 8;
1354 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
1355 if((row - sgrow) % 3)
1356 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
1357 if((col - sgcol) % 3)
1358 {
1359 float redblue[3][3];
1360 float(*rfx)[3] = &rgb[0][row - top][col - left];
1361 const short *const hex = hexmap(row, col, allhex);
1362 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1363 if(hex[d] + hex[d + 1])
1364 {
1365 float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1366 for(int c = 0; c < 4; c += 2)
1367 {
1368 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
1369 redblue[d][c] = rfx[0][c];
1370 }
1371 }
1372 else
1373 {
1374 float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1375 for(int c = 0; c < 4; c += 2)
1376 {
1377 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
1378 redblue[d][c] = rfx[0][c];
1379 }
1380 }
1381 // to fill in red and blue also for diagonal directions
1382 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1383 for(int c = 0; c < 4; c += 2) rfx[0][c] = (redblue[0][c] + redblue[2][c]) * 0.5f;
1384 }
1385
1386 // jump back to the first set of rgb buffers (this is a nop
1387 // unless on the second pass)
1388 rgb = (float(*)[TS][TS][3])buffer;
1389 // from here on out, mainly are working within the current tile
1390 // rather than in reference to the image, so don't offset
1391 // mrow/mcol by top/left of tile
1392 mrow -= top;
1393 mcol -= left;
1394
1395 /* Convert to perceptual colorspace and differentiate in all directions: */
1396 // Original dcraw algorithm uses CIELab as perceptual space
1397 // (presumably coming from original AHD) and converts taking
1398 // camera matrix into account. Now use YPbPr which requires much
1399 // less code and is nearly indistinguishable. It assumes the
1400 // camera RGB is roughly linear.
1401 for(int d = 0; d < ndir; d++)
1402 {
1403 const int pad_yuv = 8;
1404 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
1405 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
1406 {
1407 float *rx = rgb[d][row][col];
1408 // use ITU-R BT.2020 YPbPr, which is great, but could use
1409 // a better/simpler choice? note that imageop.h provides
1410 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
1411 // which appears less good with specular highlights
1412 float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
1413 yuv[0][row][col] = y;
1414 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
1415 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
1416 }
1417 // Note that f can offset by a column (-1 or +1) and by a row
1418 // (-TS or TS). The row-wise offsets cause the undefined
1419 // behavior sanitizer to warn of an out of bounds index, but
1420 // as yfx is multi-dimensional and there is sufficient
1421 // padding, that is not actually so.
1422 const int f = dir[d & 3];
1423 const int pad_drv = 9;
1424 for(int row = pad_drv; row < mrow - pad_drv; row++)
1425 for(int col = pad_drv; col < mcol - pad_drv; col++)
1426 {
1427 float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
1428 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
1429 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
1430 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
1431 }
1432 }
1433
1434 /* Build homogeneity maps from the derivatives: */
1435 memset(homo, 0, sizeof(uint8_t) * ndir * TS * TS);
1436 const int pad_homo = 10;
1437 for(int row = pad_homo; row < mrow - pad_homo; row++)
1438 for(int col = pad_homo; col < mcol - pad_homo; col++)
1439 {
1440 float tr = FLT_MAX;
1441 for(int d = 0; d < ndir; d++)
1442 if(tr > drv[d][row][col]) tr = drv[d][row][col];
1443 tr *= 8;
1444 for(int d = 0; d < ndir; d++)
1445 for(int v = -1; v <= 1; v++)
1446 for(int h = -1; h <= 1; h++) homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
1447 }
1448
1449 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
1450 for(int d = 0; d < ndir; d++)
1451 for(int row = pad_tile; row < mrow - pad_tile; row++)
1452 {
1453 // start before first column where homo[d][row][col+2] != 0,
1454 // so can know v5sum and homosum[d][row][col] will be 0
1455 int col = pad_tile - 5;
1456 uint8_t v5sum[5] = { 0 };
1457 homosum[d][row][col] = 0;
1458 // calculate by rolling through column sums
1459 for(col++; col < mcol - pad_tile; col++)
1460 {
1461 uint8_t colsum = 0;
1462 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
1463 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
1464 v5sum[col % 5] = colsum;
1465 }
1466 }
1467
1468 /* Calculate chroma values in fdc: */
1469 const int pad_fdc = 6;
1470 for(int row = pad_fdc; row < mrow - pad_fdc; row++)
1471 for(int col = pad_fdc; col < mcol - pad_fdc; col++)
1472 {
1473 int myrow, mycol;
1474 uint8_t hm[8] = { 0 };
1475 uint8_t maxval = 0;
1476 for(int d = 0; d < ndir; d++)
1477 {
1478 hm[d] = homosum[d][row][col];
1479 maxval = (maxval < hm[d] ? hm[d] : maxval);
1480 }
1481 maxval -= maxval >> 3;
1482 float dircount = 0;
1483 float dirsum = 0.f;
1484 for(int d = 0; d < ndir; d++)
1485 if(hm[d] >= maxval)
1486 {
1487 dircount++;
1488 dirsum += directionality[d];
1489 }
1490 float w = dirsum / (float)dircount;
1491 int fdc_row, fdc_col;
1492 float complex C2m, C5m, C7m, C10m;
1493#define CONV_FILT(VAR, FILT) \
1494 VAR = 0.0f + 0.0f * _Complex_I; \
1495 for(fdc_row = 0, myrow = row - 6; fdc_row < 13; fdc_row++, myrow++) \
1496 for(fdc_col = 0, mycol = col - 6; fdc_col < 13; fdc_col++, mycol++) \
1497 VAR += FILT[12 - fdc_row][12 - fdc_col] * *(i_src + TS * myrow + mycol);
1498 CONV_FILT(C2m, harr[0])
1499 CONV_FILT(C5m, harr[1])
1500 CONV_FILT(C7m, harr[2])
1501 CONV_FILT(C10m, harr[3])
1502#undef CONV_FILT
1503 // build the q vector components
1504 myrow = (row + rowoffset) % 6;
1505 mycol = (col + coloffset) % 6;
1506 float complex modulator[8];
1507 for(int c = 0; c < 8; c++) modulator[c] = modarr[myrow][mycol][c];
1508 float complex qmat[8];
1509 qmat[4] = w * C10m * modulator[0] - (1.0f - w) * C2m * modulator[1];
1510 qmat[6] = conjf(qmat[4]);
1511 qmat[1] = C5m * modulator[6];
1512 qmat[2] = conjf(-0.5f * qmat[1]);
1513 qmat[5] = conjf(qmat[2]);
1514 qmat[3] = C7m * modulator[7];
1515 qmat[7] = conjf(qmat[1]);
1516 // get L
1517 C2m = qmat[4] * (conjf(modulator[0]) - conjf(modulator[1]));
1518 float complex C3m = qmat[6] * (modulator[2] - modulator[3]);
1519 float complex C6m = qmat[2] * (conjf(modulator[4]) + conjf(modulator[5]));
1520 float complex C12m = qmat[5] * (modulator[4] + modulator[5]);
1521 float complex C18m = qmat[7] * modulator[6];
1522 qmat[0] = *(i_src + row * TS + col) - C2m - C3m - C5m - C6m - 2.0f * C7m - C12m - C18m;
1523 // get the rgb components from fdc
1524 dt_aligned_pixel_t rgbpix = { 0.f, 0.f, 0.f };
1525 // multiply with the inverse matrix of M
1526 for(int color = 0; color < 3; color++)
1527 for(int c = 0; c < 8; c++)
1528 {
1529 rgbpix[color] += Minv[color][c] * qmat[c];
1530 }
1531 // now separate luma and chroma for
1532 // frequency domain chroma
1533 // and store it in fdc_chroma
1534 float uv[2];
1535 float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1536 uv[0] = (rgbpix[2] - y) * 0.56433f;
1537 uv[1] = (rgbpix[0] - y) * 0.67815f;
1538 for(int c = 0; c < 2; c++) *(fdc_chroma + c * TS * TS + row * TS + col) = uv[c];
1539 }
1540
1541 /* Average the most homogeneous pixels for the final result: */
1542 for(int row = pad_tile; row < mrow - pad_tile; row++)
1543 for(int col = pad_tile; col < mcol - pad_tile; col++)
1544 {
1545 uint8_t hm[8] = { 0 };
1546 uint8_t maxval = 0;
1547 for(int d = 0; d < ndir; d++)
1548 {
1549 hm[d] = homosum[d][row][col];
1550 maxval = (maxval < hm[d] ? hm[d] : maxval);
1551 }
1552 maxval -= maxval >> 3;
1553 for(int d = 0; d < ndir - 4; d++)
1554 {
1555 if(hm[d] < hm[d + 4])
1556 hm[d] = 0;
1557 else if(hm[d] > hm[d + 4])
1558 hm[d + 4] = 0;
1559 }
1560
1561 dt_aligned_pixel_t avg = { 0.f };
1562 for(int d = 0; d < ndir; d++)
1563 {
1564 if(hm[d] >= maxval)
1565 {
1566 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
1567 avg[3]++;
1568 }
1569 }
1570 dt_aligned_pixel_t rgbpix;
1571 for(int c = 0; c < 3; c++) rgbpix[c] = avg[c] / avg[3];
1572 // preserve all components of Markesteijn for this pixel
1573 const float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1574 const float um = (rgbpix[2] - y) * 0.56433f;
1575 const float vm = (rgbpix[0] - y) * 0.67815f;
1576 float uvf[2];
1577 // macros for fast meadian filtering
1578#define PIX_SWAP(a, b) \
1579 { \
1580 tempf = (a); \
1581 (a) = (b); \
1582 (b) = tempf; \
1583 }
1584#define PIX_SORT(a, b) \
1585 { \
1586 if((a) > (b)) PIX_SWAP((a), (b)); \
1587 }
1588 // instead of merely reading the values, perform 5 pixel median filter
1589 // one median filter is required to avoid textile artifacts
1590 for(int chrm = 0; chrm < 2; chrm++)
1591 {
1592 float temp[5];
1593 float tempf;
1594 // load the window into temp
1595 memcpy(&temp[0], fdc_chroma + chrm * TS * TS + (row - 1) * TS + (col), sizeof(float) * 1);
1596 memcpy(&temp[1], fdc_chroma + chrm * TS * TS + (row)*TS + (col - 1), sizeof(float) * 3);
1597 memcpy(&temp[4], fdc_chroma + chrm * TS * TS + (row + 1) * TS + (col), sizeof(float) * 1);
1598 PIX_SORT(temp[0], temp[1]);
1599 PIX_SORT(temp[3], temp[4]);
1600 PIX_SORT(temp[0], temp[3]);
1601 PIX_SORT(temp[1], temp[4]);
1602 PIX_SORT(temp[1], temp[2]);
1603 PIX_SORT(temp[2], temp[3]);
1604 PIX_SORT(temp[1], temp[2]);
1605 uvf[chrm] = temp[2];
1606 }
1607 // use hybrid or pure fdc, depending on what was set above.
1608 // in case of hybrid, use the chroma that has the smallest
1609 // absolute value
1610 float uv[2];
1611 uv[0] = (((ABS(uvf[0]) < ABS(um)) & (ABS(uvf[1]) < (1.02f * ABS(vm)))) ? uvf[0] : um) * hybrid_fdc[0] + uvf[0] * hybrid_fdc[1];
1612 uv[1] = (((ABS(uvf[1]) < ABS(vm)) & (ABS(uvf[0]) < (1.02f * ABS(vm)))) ? uvf[1] : vm) * hybrid_fdc[0] + uvf[1] * hybrid_fdc[1];
1613 // combine the luma from Markesteijn with the chroma from above
1614 rgbpix[0] = y + 1.474600014746f * uv[1];
1615 rgbpix[1] = y - 0.15498578286403f * uv[0] - 0.571353132557189f * uv[1];
1616 rgbpix[2] = y + 1.77201282937288f * uv[0];
1617 for(int c = 0; c < 3; c++) out[4 * (width * (row + top) + col + left) + c] = rgbpix[c];
1618 }
1619 }
1620 }
1621 dt_free_align(all_buffers);
1622}
1623
1624#ifdef HAVE_OPENCL
1625
1626static int process_markesteijn_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in,
1627 cl_mem dev_out, const dt_iop_roi_t *const roi_in,
1628 const dt_iop_roi_t *const roi_out, const gboolean smooth)
1629{
1632
1633 const int devid = piece->pipe->devid;
1634 const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->pipe->dsc.xtrans;
1635
1636 const float processed_maximum[4]
1637 = { piece->pipe->dsc.processed_maximum[0], piece->pipe->dsc.processed_maximum[1],
1638 piece->pipe->dsc.processed_maximum[2], 1.0f };
1639
1640 const int qual_flags = demosaic_qual_flags(piece, &self->dev->image_storage, roi_out);
1641
1642 cl_mem dev_tmp = NULL;
1643 cl_mem dev_tmptmp = NULL;
1644 cl_mem dev_xtrans = NULL;
1645 cl_mem dev_green_eq = NULL;
1646 cl_mem dev_rgbv[8] = { NULL };
1647 cl_mem dev_drv[8] = { NULL };
1648 cl_mem dev_homo[8] = { NULL };
1649 cl_mem dev_homosum[8] = { NULL };
1650 cl_mem dev_gminmax = NULL;
1651 cl_mem dev_allhex = NULL;
1652 cl_mem dev_aux = NULL;
1653 cl_mem dev_edge_in = NULL;
1654 cl_mem dev_edge_out = NULL;
1655 cl_int err = -999;
1656
1657 cl_mem *dev_rgb = dev_rgbv;
1658
1659 dev_xtrans
1660 = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->pipe->dsc.xtrans), piece->pipe->dsc.xtrans);
1661 if(dev_xtrans == NULL) goto error;
1662
1663 if(qual_flags & DEMOSAIC_FULL_SCALE)
1664 {
1665 // Full demosaic and then scaling if needed
1666 const int scaled = (roi_out->width != roi_in->width || roi_out->height != roi_in->height);
1667
1668 int width = roi_in->width;
1669 int height = roi_in->height;
1670 const int passes = ((data->demosaicing_method & ~DEMOSAIC_DUAL) == DT_IOP_DEMOSAIC_MARKESTEIJN_3) ? 3 : 1;
1671 const int ndir = 4 << (passes > 1);
1672 const int pad_tile = (passes == 1) ? 12 : 17;
1673
1674 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
1675 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
1676 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } };
1677
1678 // allhex contains the offset coordinates (x,y) of a green hexagon around each
1679 // non-green pixel and vice versa
1680 char allhex[3][3][8][2];
1681 // sgreen is the offset in the sensor matrix of the solitary
1682 // green pixels (initialized here only to avoid compiler warning)
1683 char sgreen[2] = { 0 };
1684
1685 // Map a green hexagon around each non-green pixel and vice versa:
1686 for(int row = 0; row < 3; row++)
1687 for(int col = 0; col < 3; col++)
1688 for(int ng = 0, d = 0; d < 10; d += 2)
1689 {
1690 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
1691 if(FCxtrans(row + orth[d] + 6, col + orth[d + 2] + 6, NULL, xtrans) == 1)
1692 ng = 0;
1693 else
1694 ng++;
1695 // if there are four non-green pixels adjacent in cardinal
1696 // directions, this is the solitary green pixel
1697 if(ng == 4)
1698 {
1699 sgreen[0] = col;
1700 sgreen[1] = row;
1701 }
1702 if(ng == g + 1)
1703 for(int c = 0; c < 8; c++)
1704 {
1705 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1706 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1707
1708 allhex[row][col][c ^ (g * 2 & d)][0] = h;
1709 allhex[row][col][c ^ (g * 2 & d)][1] = v;
1710 }
1711 }
1712
1713 dev_allhex = dt_opencl_copy_host_to_device_constant(devid, sizeof(allhex), allhex);
1714 if(dev_allhex == NULL) goto error;
1715
1716 for(int n = 0; n < ndir; n++)
1717 {
1718 dev_rgbv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1719 if(dev_rgbv[n] == NULL) goto error;
1720 }
1721
1722 dev_gminmax = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 2 * width * height);
1723 if(dev_gminmax == NULL) goto error;
1724
1725 dev_aux = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1726 if(dev_aux == NULL) goto error;
1727
1728 if(scaled)
1729 {
1730 // need to scale to right res
1731 dev_tmp = dt_opencl_alloc_device(devid, width, height, sizeof(float) * 4);
1732 if(dev_tmp == NULL) goto error;
1733 }
1734 else
1735 {
1736 // scaling factor 1.0 --> we can directly process into the output buffer
1737 dev_tmp = dev_out;
1738 }
1739
1740 {
1741 // copy from dev_in to first rgb image buffer.
1742 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1743 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 0, sizeof(cl_mem), (void *)&dev_in);
1744 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 1, sizeof(cl_mem), (void *)&dev_rgb[0]);
1745 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 2, sizeof(int), (void *)&width);
1746 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 3, sizeof(int), (void *)&height);
1747 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 4, sizeof(int), (void *)&roi_in->x);
1748 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 5, sizeof(int), (void *)&roi_in->y);
1749 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 6, sizeof(cl_mem), (void *)&dev_xtrans);
1750 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_initial_copy, sizes);
1751 if(err != CL_SUCCESS) goto error;
1752 }
1753
1754
1755 // duplicate dev_rgb[0] to dev_rgb[1], dev_rgb[2], and dev_rgb[3]
1756 for(int c = 1; c <= 3; c++)
1757 {
1758 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[0], dev_rgb[c], 0, 0,
1759 sizeof(float) * 4 * width * height);
1760 if(err != CL_SUCCESS) goto error;
1761 }
1762
1763 // find minimum and maximum allowed green values of red/blue pixel pairs
1764 const int pad_g1_g3 = 3;
1765 dt_opencl_local_buffer_t locopt_g1_g3
1766 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1767 .cellsize = 1 * sizeof(float), .overhead = 0,
1768 .sizex = 1 << 8, .sizey = 1 << 8 };
1769
1770 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_green_minmax, &locopt_g1_g3))
1771 goto error;
1772
1773 {
1774 size_t sizes[3] = { ROUNDUP(width, locopt_g1_g3.sizex), ROUNDUP(height, locopt_g1_g3.sizey), 1 };
1775 size_t local[3] = { locopt_g1_g3.sizex, locopt_g1_g3.sizey, 1 };
1776 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1777 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 1, sizeof(cl_mem), (void *)&dev_gminmax);
1778 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 2, sizeof(int), (void *)&width);
1779 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 3, sizeof(int), (void *)&height);
1780 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 4, sizeof(int), (void *)&pad_g1_g3);
1781 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 5, sizeof(int), (void *)&roi_in->x);
1782 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 6, sizeof(int), (void *)&roi_in->y);
1783 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 7, 2 * sizeof(char), (void *)sgreen);
1784 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1785 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 9, sizeof(cl_mem), (void *)&dev_allhex);
1786 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 10,
1787 sizeof(float) * (locopt_g1_g3.sizex + 2*3) * (locopt_g1_g3.sizey + 2*3), NULL);
1788 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_green_minmax, sizes, local);
1789 if(err != CL_SUCCESS) goto error;
1790 }
1791
1792 // interpolate green horizontally, vertically, and along both diagonals
1793 const int pad_g_interp = 3;
1794 dt_opencl_local_buffer_t locopt_g_interp
1795 = (dt_opencl_local_buffer_t){ .xoffset = 2*6, .xfactor = 1, .yoffset = 2*6, .yfactor = 1,
1796 .cellsize = 4 * sizeof(float), .overhead = 0,
1797 .sizex = 1 << 8, .sizey = 1 << 8 };
1798
1799 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_interpolate_green, &locopt_g_interp))
1800 goto error;
1801
1802 {
1803 size_t sizes[3] = { ROUNDUP(width, locopt_g_interp.sizex), ROUNDUP(height, locopt_g_interp.sizey), 1 };
1804 size_t local[3] = { locopt_g_interp.sizex, locopt_g_interp.sizey, 1 };
1805 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1806 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1807 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1808 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1809 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1810 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 5, sizeof(int), (void *)&width);
1811 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 6, sizeof(int), (void *)&height);
1812 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 7, sizeof(int), (void *)&pad_g_interp);
1813 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 8, sizeof(int), (void *)&roi_in->x);
1814 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 9, sizeof(int), (void *)&roi_in->y);
1815 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 10, 2 * sizeof(char), (void *)sgreen);
1816 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1817 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1818 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 13,
1819 sizeof(float) * 4 * (locopt_g_interp.sizex + 2*6) * (locopt_g_interp.sizey + 2*6), NULL);
1820 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_interpolate_green, sizes, local);
1821 if(err != CL_SUCCESS) goto error;
1822 }
1823
1824 // multi-pass loop: one pass for Markesteijn-1 and three passes for Markesteijn-3
1825 for(int pass = 0; pass < passes; pass++)
1826 {
1827
1828 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7] ....
1829 if(pass == 1)
1830 {
1831 for(int c = 0; c < 4; c++)
1832 {
1833 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[c], dev_rgb[c + 4], 0, 0,
1834 sizeof(float) * 4 * width * height);
1835 if(err != CL_SUCCESS) goto error;
1836 }
1837 // ... and process that second set of buffers
1838 dev_rgb += 4;
1839 }
1840
1841 // second and third pass (only Markesteijn-3)
1842 if(pass)
1843 {
1844 // recalculate green from interpolated values of closer pixels
1845 const int pad_g_recalc = 6;
1846 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1847 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1848 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1849 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1850 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1851 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1852 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 5, sizeof(int), (void *)&width);
1853 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 6, sizeof(int), (void *)&height);
1854 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 7, sizeof(int), (void *)&pad_g_recalc);
1855 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 8, sizeof(int), (void *)&roi_in->x);
1856 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 9, sizeof(int), (void *)&roi_in->y);
1857 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 10, 2 * sizeof(char), (void *)sgreen);
1858 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1859 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1860 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_recalculate_green, sizes);
1861 if(err != CL_SUCCESS) goto error;
1862 }
1863
1864 // interpolate red and blue values for solitary green pixels
1865 const int pad_rb_g = (passes == 1) ? 6 : 5;
1866 dt_opencl_local_buffer_t locopt_rb_g
1867 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1868 .cellsize = 4 * sizeof(float), .overhead = 0,
1869 .sizex = 1 << 8, .sizey = 1 << 8 };
1870
1871 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_solitary_green, &locopt_rb_g))
1872 goto error;
1873
1874 cl_mem *dev_trgb = dev_rgb;
1875 for(int d = 0, i = 1, h = 0; d < 6; d++, i ^= 1, h ^= 2)
1876 {
1877 const char dir[2] = { i, i ^ 1 };
1878
1879 // we use dev_aux to transport intermediate results from one loop run to the next
1880 size_t sizes[3] = { ROUNDUP(width, locopt_rb_g.sizex), ROUNDUP(height, locopt_rb_g.sizey), 1 };
1881 size_t local[3] = { locopt_rb_g.sizex, locopt_rb_g.sizey, 1 };
1882 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 0, sizeof(cl_mem), (void *)&dev_trgb[0]);
1883 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 1, sizeof(cl_mem), (void *)&dev_aux);
1884 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 2, sizeof(int), (void *)&width);
1885 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 3, sizeof(int), (void *)&height);
1886 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 4, sizeof(int), (void *)&pad_rb_g);
1887 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 5, sizeof(int), (void *)&roi_in->x);
1888 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 6, sizeof(int), (void *)&roi_in->y);
1889 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 7, sizeof(int), (void *)&d);
1890 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 8, 2 * sizeof(char), (void *)dir);
1891 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 9, sizeof(int), (void *)&h);
1892 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 10, 2 * sizeof(char), (void *)sgreen);
1893 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1894 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 12,
1895 sizeof(float) * 4 * (locopt_rb_g.sizex + 2*2) * (locopt_rb_g.sizey + 2*2), NULL);
1896 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_solitary_green, sizes, local);
1897 if(err != CL_SUCCESS) goto error;
1898
1899 if((d < 2) || (d & 1)) dev_trgb++;
1900 }
1901
1902 // interpolate red for blue pixels and vice versa
1903 const int pad_rb_br = (passes == 1) ? 6 : 5;
1904 dt_opencl_local_buffer_t locopt_rb_br
1905 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1906 .cellsize = 4 * sizeof(float), .overhead = 0,
1907 .sizex = 1 << 8, .sizey = 1 << 8 };
1908
1909 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_red_and_blue, &locopt_rb_br))
1910 goto error;
1911
1912 for(int d = 0; d < 4; d++)
1913 {
1914 size_t sizes[3] = { ROUNDUP(width, locopt_rb_br.sizex), ROUNDUP(height, locopt_rb_br.sizey), 1 };
1915 size_t local[3] = { locopt_rb_br.sizex, locopt_rb_br.sizey, 1 };
1916 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1917 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 1, sizeof(int), (void *)&width);
1918 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 2, sizeof(int), (void *)&height);
1919 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 3, sizeof(int), (void *)&pad_rb_br);
1920 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 4, sizeof(int), (void *)&roi_in->x);
1921 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 5, sizeof(int), (void *)&roi_in->y);
1922 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 6, sizeof(int), (void *)&d);
1923 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 7, 2 * sizeof(char), (void *)sgreen);
1924 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1925 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 9,
1926 sizeof(float) * 4 * (locopt_rb_br.sizex + 2*3) * (locopt_rb_br.sizey + 2*3), NULL);
1927 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_red_and_blue, sizes, local);
1928 if(err != CL_SUCCESS) goto error;
1929 }
1930
1931 // interpolate red and blue for 2x2 blocks of green
1932 const int pad_g22 = (passes == 1) ? 8 : 4;
1933 dt_opencl_local_buffer_t locopt_g22
1934 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1935 .cellsize = 4 * sizeof(float), .overhead = 0,
1936 .sizex = 1 << 8, .sizey = 1 << 8 };
1937
1938 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_interpolate_twoxtwo, &locopt_g22))
1939 goto error;
1940
1941 for(int d = 0, n = 0; d < ndir; d += 2, n++)
1942 {
1943 size_t sizes[3] = { ROUNDUP(width, locopt_g22.sizex), ROUNDUP(height, locopt_g22.sizey), 1 };
1944 size_t local[3] = { locopt_g22.sizex, locopt_g22.sizey, 1 };
1945 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 0, sizeof(cl_mem), (void *)&dev_rgb[n]);
1946 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 1, sizeof(int), (void *)&width);
1947 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 2, sizeof(int), (void *)&height);
1948 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 3, sizeof(int), (void *)&pad_g22);
1949 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 4, sizeof(int), (void *)&roi_in->x);
1950 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 5, sizeof(int), (void *)&roi_in->y);
1951 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 6, sizeof(int), (void *)&d);
1952 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 7, 2 * sizeof(char), (void *)sgreen);
1953 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1954 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 9, sizeof(cl_mem), (void *)&dev_allhex);
1955 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 10,
1956 sizeof(float) * 4 * (locopt_g22.sizex + 2*2) * (locopt_g22.sizey + 2*2), NULL);
1957 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_interpolate_twoxtwo, sizes, local);
1958 if(err != CL_SUCCESS) goto error;
1959 }
1960 }
1961 // end of multi pass
1962
1963 // gminmax data no longer needed
1964 dt_opencl_release_mem_object(dev_gminmax);
1965 dev_gminmax = NULL;
1966
1967 // jump back to the first set of rgb buffers (this is a noop for Markesteijn-1)
1968 dev_rgb = dev_rgbv;
1969
1970 // prepare derivatives buffers
1971 for(int n = 0; n < ndir; n++)
1972 {
1973 dev_drv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * width * height);
1974 if(dev_drv[n] == NULL) goto error;
1975 }
1976
1977 // convert to perceptual colorspace and differentiate in all directions
1978 const int pad_yuv = (passes == 1) ? 8 : 13;
1979 dt_opencl_local_buffer_t locopt_diff
1980 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
1981 .cellsize = 4 * sizeof(float), .overhead = 0,
1982 .sizex = 1 << 8, .sizey = 1 << 8 };
1983
1984 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_differentiate, &locopt_diff))
1985 goto error;
1986
1987 for(int d = 0; d < ndir; d++)
1988 {
1989 // convert to perceptual YPbPr colorspace
1990 size_t sizes_yuv[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1991 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1992 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 1, sizeof(cl_mem), (void *)&dev_aux);
1993 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 2, sizeof(int), (void *)&width);
1994 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 3, sizeof(int), (void *)&height);
1995 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 4, sizeof(int), (void *)&pad_yuv);
1996 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_convert_yuv, sizes_yuv);
1997 if(err != CL_SUCCESS) goto error;
1998
1999
2000 // differentiate in all directions
2001 size_t sizes_diff[3] = { ROUNDUP(width, locopt_diff.sizex), ROUNDUP(height, locopt_diff.sizey), 1 };
2002 size_t local_diff[3] = { locopt_diff.sizex, locopt_diff.sizey, 1 };
2003 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 0, sizeof(cl_mem), (void *)&dev_aux);
2004 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 1, sizeof(cl_mem), (void *)&dev_drv[d]);
2005 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 2, sizeof(int), (void *)&width);
2006 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 3, sizeof(int), (void *)&height);
2007 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 4, sizeof(int), (void *)&pad_yuv);
2008 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 5, sizeof(int), (void *)&d);
2009 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 6,
2010 sizeof(float) * 4 * (locopt_diff.sizex + 2*1) * (locopt_diff.sizey + 2*1), NULL);
2011 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_differentiate, sizes_diff, local_diff);
2012 if(err != CL_SUCCESS) goto error;
2013 }
2014
2015 // reserve buffers for homogeneity maps and sum maps
2016 for(int n = 0; n < ndir; n++)
2017 {
2018 dev_homo[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2019 if(dev_homo[n] == NULL) goto error;
2020
2021 dev_homosum[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2022 if(dev_homosum[n] == NULL) goto error;
2023 }
2024
2025 // get thresholds for homogeneity map (store them in dev_aux)
2026 for(int d = 0; d < ndir; d++)
2027 {
2028 const int pad_homo = (passes == 1) ? 10 : 15;
2029 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2030 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2031 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 1, sizeof(cl_mem), (void *)&dev_aux);
2032 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 2, sizeof(int), (void *)&width);
2033 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 3, sizeof(int), (void *)&height);
2034 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 4, sizeof(int), (void *)&pad_homo);
2035 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 5, sizeof(int), (void *)&d);
2036 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_threshold, sizes);
2037 if(err != CL_SUCCESS) goto error;
2038 }
2039
2040 // set homogeneity maps
2041 const int pad_homo = (passes == 1) ? 10 : 15;
2042 dt_opencl_local_buffer_t locopt_homo
2043 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
2044 .cellsize = 1 * sizeof(float), .overhead = 0,
2045 .sizex = 1 << 8, .sizey = 1 << 8 };
2046
2047 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_set, &locopt_homo))
2048 goto error;
2049
2050 for(int d = 0; d < ndir; d++)
2051 {
2052 size_t sizes[3] = { ROUNDUP(width, locopt_homo.sizex),ROUNDUP(height, locopt_homo.sizey), 1 };
2053 size_t local[3] = { locopt_homo.sizex, locopt_homo.sizey, 1 };
2054 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2055 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 1, sizeof(cl_mem), (void *)&dev_aux);
2056 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 2, sizeof(cl_mem), (void *)&dev_homo[d]);
2057 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 3, sizeof(int), (void *)&width);
2058 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 4, sizeof(int), (void *)&height);
2059 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 5, sizeof(int), (void *)&pad_homo);
2060 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 6,
2061 sizeof(float) * (locopt_homo.sizex + 2*1) * (locopt_homo.sizey + 2*1), NULL);
2062 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_homo_set, sizes, local);
2063 if(err != CL_SUCCESS) goto error;
2064 }
2065
2066 // get rid of dev_drv buffers
2067 for(int n = 0; n < 8; n++)
2068 {
2069 dt_opencl_release_mem_object(dev_drv[n]);
2070 dev_drv[n] = NULL;
2071 }
2072
2073 // build 5x5 sum of homogeneity maps for each pixel and direction
2074 dt_opencl_local_buffer_t locopt_homo_sum
2075 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
2076 .cellsize = 1 * sizeof(float), .overhead = 0,
2077 .sizex = 1 << 8, .sizey = 1 << 8 };
2078
2079 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_sum, &locopt_homo_sum))
2080 goto error;
2081
2082 for(int d = 0; d < ndir; d++)
2083 {
2084 size_t sizes[3] = { ROUNDUP(width, locopt_homo_sum.sizex), ROUNDUP(height, locopt_homo_sum.sizey), 1 };
2085 size_t local[3] = { locopt_homo_sum.sizex, locopt_homo_sum.sizey, 1 };
2086 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 0, sizeof(cl_mem), (void *)&dev_homo[d]);
2087 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 1, sizeof(cl_mem), (void *)&dev_homosum[d]);
2088 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 2, sizeof(int), (void *)&width);
2089 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 3, sizeof(int), (void *)&height);
2090 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 4, sizeof(int), (void *)&pad_tile);
2091 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 5,
2092 sizeof(char) * (locopt_homo_sum.sizex + 2*2) * (locopt_homo_sum.sizey + 2*2), NULL);
2093 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_homo_sum, sizes, local);
2094 if(err != CL_SUCCESS) goto error;
2095 }
2096
2097 // get maximum of homogeneity maps (store in dev_aux)
2098 for(int d = 0; d < ndir; d++)
2099 {
2100 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2101 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2102 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 1, sizeof(cl_mem), (void *)&dev_aux);
2103 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 2, sizeof(int), (void *)&width);
2104 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 3, sizeof(int), (void *)&height);
2105 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 4, sizeof(int), (void *)&pad_tile);
2106 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 5, sizeof(int), (void *)&d);
2107 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_max, sizes);
2108 if(err != CL_SUCCESS) goto error;
2109 }
2110
2111 {
2112 // adjust maximum value
2113 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2114 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 0, sizeof(cl_mem), (void *)&dev_aux);
2115 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 1, sizeof(int), (void *)&width);
2116 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 2, sizeof(int), (void *)&height);
2117 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 3, sizeof(int), (void *)&pad_tile);
2118 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_max_corr, sizes);
2119 if(err != CL_SUCCESS) goto error;
2120 }
2121
2122 // for Markesteijn-3: use only one of two directions if there is a difference in homogeneity
2123 for(int d = 0; d < ndir - 4; d++)
2124 {
2125 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2126 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2127 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 1, sizeof(cl_mem), (void *)&dev_homosum[d + 4]);
2128 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 2, sizeof(int), (void *)&width);
2129 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 3, sizeof(int), (void *)&height);
2130 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 4, sizeof(int), (void *)&pad_tile);
2131 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_quench, sizes);
2132 if(err != CL_SUCCESS) goto error;
2133 }
2134
2135 {
2136 // initialize output buffer to zero
2137 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2138 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 0, sizeof(cl_mem), (void *)&dev_tmp);
2139 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 1, sizeof(int), (void *)&width);
2140 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 2, sizeof(int), (void *)&height);
2141 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 3, sizeof(int), (void *)&pad_tile);
2142 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_zero, sizes);
2143 if(err != CL_SUCCESS) goto error;
2144 }
2145
2146 // need to get another temp buffer for the output image (may use the space of dev_drv[] freed earlier)
2147 dev_tmptmp = dt_opencl_alloc_device(devid, (size_t)width, height, sizeof(float) * 4);
2148 if(dev_tmptmp == NULL) goto error;
2149
2150 cl_mem dev_t1 = dev_tmp;
2151 cl_mem dev_t2 = dev_tmptmp;
2152
2153 // accumulate all contributions
2154 for(int d = 0; d < ndir; d++)
2155 {
2156 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2157 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 0, sizeof(cl_mem), (void *)&dev_t1);
2158 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 1, sizeof(cl_mem), (void *)&dev_t2);
2159 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 2, sizeof(cl_mem), (void *)&dev_rgbv[d]);
2160 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 3, sizeof(cl_mem), (void *)&dev_homosum[d]);
2161 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 4, sizeof(cl_mem), (void *)&dev_aux);
2162 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 5, sizeof(int), (void *)&width);
2163 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 6, sizeof(int), (void *)&height);
2164 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 7, sizeof(int), (void *)&pad_tile);
2165 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_accu, sizes);
2166 if(err != CL_SUCCESS) goto error;
2167
2168 // swap buffers
2169 cl_mem dev_t = dev_t2;
2170 dev_t2 = dev_t1;
2171 dev_t1 = dev_t;
2172 }
2173
2174 // copy output to dev_tmptmp (if not already there)
2175 // note: we need to take swap of buffers into account, so current output lies in dev_t1
2176 if(dev_t1 != dev_tmptmp)
2177 {
2178 size_t origin[] = { 0, 0, 0 };
2179 size_t region[] = { width, height, 1 };
2180 err = dt_opencl_enqueue_copy_image(devid, dev_t1, dev_tmptmp, origin, origin, region);
2181 if(err != CL_SUCCESS) goto error;
2182 }
2183
2184 {
2185 // process the final image
2186 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2187 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 0, sizeof(cl_mem), (void *)&dev_tmptmp);
2188 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 1, sizeof(cl_mem), (void *)&dev_tmp);
2189 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 2, sizeof(int), (void *)&width);
2190 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 3, sizeof(int), (void *)&height);
2191 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 4, sizeof(int), (void *)&pad_tile);
2192 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 5, 4*sizeof(float), (void *)processed_maximum);
2193 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_final, sizes);
2194 if(err != CL_SUCCESS) goto error;
2195 }
2196
2197 // now it's time to get rid of most of the temporary buffers (except of dev_tmp and dev_xtrans)
2198 for(int n = 0; n < 8; n++)
2199 {
2200 dt_opencl_release_mem_object(dev_rgbv[n]);
2201 dev_rgbv[n] = NULL;
2202 }
2203
2204 for(int n = 0; n < 8; n++)
2205 {
2206 dt_opencl_release_mem_object(dev_homo[n]);
2207 dev_homo[n] = NULL;
2208 }
2209
2210 for(int n = 0; n < 8; n++)
2211 {
2212 dt_opencl_release_mem_object(dev_homosum[n]);
2213 dev_homosum[n] = NULL;
2214 }
2215
2217 dev_aux = NULL;
2218
2219 dt_opencl_release_mem_object(dev_xtrans);
2220 dev_xtrans = NULL;
2221
2222 dt_opencl_release_mem_object(dev_allhex);
2223 dev_allhex = NULL;
2224
2225 dt_opencl_release_mem_object(dev_green_eq);
2226 dev_green_eq = NULL;
2227
2228 dt_opencl_release_mem_object(dev_tmptmp);
2229 dev_tmptmp = NULL;
2230
2231 // take care of image borders. the algorithm above leaves an unprocessed border of pad_tile pixels.
2232 // strategy: take the four edges and process them each with process_vng_cl(). as VNG produces
2233 // an image with a border with only linear interpolation we process edges of pad_tile+3px and
2234 // drop 3px on the inner side if possible
2235
2236 // take care of some degenerate cases (which might happen if we are called in a tiling context)
2237 const int wd = (width > pad_tile+3) ? pad_tile+3 : width;
2238 const int ht = (height > pad_tile+3) ? pad_tile+3 : height;
2239 const int wdc = (wd >= pad_tile+3) ? 3 : 0;
2240 const int htc = (ht >= pad_tile+3) ? 3 : 0;
2241
2242 // the data of all four edges:
2243 // total edge: x-offset, y-offset, width, height,
2244 // after dropping: x-offset adjust, y-offset adjust, width adjust, height adjust
2245 const int edges[4][8] = { { 0, 0, wd, height, 0, 0, -wdc, 0 },
2246 { 0, 0, width, ht, 0, 0, 0, -htc },
2247 { width - wd, 0, wd, height, wdc, 0, -wdc, 0 },
2248 { 0, height - ht, width, ht, 0, htc, 0, -htc } };
2249
2250 for(int n = 0; n < 4; n++)
2251 {
2252 dt_iop_roi_t roi = { roi_in->x + edges[n][0], roi_in->y + edges[n][1], edges[n][2], edges[n][3], 1.0f };
2253
2254 size_t iorigin[] = { edges[n][0], edges[n][1], 0 };
2255 size_t oorigin[] = { 0, 0, 0 };
2256 size_t region[] = { edges[n][2], edges[n][3], 1 };
2257
2258 // reserve input buffer for image edge
2259 dev_edge_in = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float));
2260 if(dev_edge_in == NULL) goto error;
2261
2262 // reserve output buffer for VNG processing of edge
2263 dev_edge_out = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float) * 4);
2264 if(dev_edge_out == NULL) goto error;
2265
2266 // copy edge to input buffer
2267 err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_edge_in, iorigin, oorigin, region);
2268 if(err != CL_SUCCESS) goto error;
2269
2270 // VNG processing
2271 if(!process_vng_cl(self, piece, dev_edge_in, dev_edge_out, &roi, &roi, smooth, qual_flags & DEMOSAIC_ONLY_VNG_LINEAR))
2272 goto error;
2273
2274 // adjust for "good" part, dropping linear border where possible
2275 iorigin[0] += edges[n][4];
2276 iorigin[1] += edges[n][5];
2277 oorigin[0] += edges[n][4];
2278 oorigin[1] += edges[n][5];
2279 region[0] += edges[n][6];
2280 region[1] += edges[n][7];
2281
2282 // copy output
2283 err = dt_opencl_enqueue_copy_image(devid, dev_edge_out, dev_tmp, oorigin, iorigin, region);
2284 if(err != CL_SUCCESS) goto error;
2285
2286 // release intermediate buffers
2287 dt_opencl_release_mem_object(dev_edge_in);
2288 dt_opencl_release_mem_object(dev_edge_out);
2289 dev_edge_in = dev_edge_out = NULL;
2290 }
2291 dt_dev_write_rawdetail_mask_cl(piece, dev_tmp, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
2292
2293 if(scaled)
2294 {
2295 // scale temp buffer to output buffer
2296 err = dt_iop_clip_and_zoom_roi_cl(devid, dev_out, dev_tmp, roi_out, roi_in);
2297 if(err != CL_SUCCESS) goto error;
2298 }
2299 }
2300 else
2301 {
2302 // sample third-size image
2303 const int width = roi_out->width;
2304 const int height = roi_out->height;
2305
2306 size_t sizes[2] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid) };
2307 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 0, sizeof(cl_mem), &dev_in);
2308 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 1, sizeof(cl_mem), &dev_out);
2309 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 2, sizeof(int), &width);
2310 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 3, sizeof(int), &height);
2311 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 4, sizeof(int), (void *)&roi_in->x);
2312 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 5, sizeof(int), (void *)&roi_in->y);
2313 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 6, sizeof(int), (void *)&roi_in->width);
2314 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 7, sizeof(int), (void *)&roi_in->height);
2315 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 8, sizeof(float), (void *)&roi_out->scale);
2316 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 9, sizeof(cl_mem), (void *)&dev_xtrans);
2317 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_zoom_third_size, sizes);
2318 if(err != CL_SUCCESS) goto error;
2319 }
2320
2321 // free remaining temporary buffers
2322 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2323 dev_tmp = NULL;
2324
2325 dt_opencl_release_mem_object(dev_xtrans);
2326 dev_xtrans = NULL;
2327
2328
2329 // color smoothing
2330 if(data->color_smoothing)
2331 {
2332 if(!color_smoothing_cl(self, piece, dev_out, dev_out, roi_out, data->color_smoothing))
2333 goto error;
2334 }
2335
2336 return TRUE;
2337
2338error:
2339 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2340
2341 for(int n = 0; n < 8; n++)
2342 dt_opencl_release_mem_object(dev_rgbv[n]);
2343 for(int n = 0; n < 8; n++)
2344 dt_opencl_release_mem_object(dev_drv[n]);
2345 for(int n = 0; n < 8; n++)
2346 dt_opencl_release_mem_object(dev_homo[n]);
2347 for(int n = 0; n < 8; n++)
2348 dt_opencl_release_mem_object(dev_homosum[n]);
2349 dt_opencl_release_mem_object(dev_gminmax);
2350 dt_opencl_release_mem_object(dev_tmptmp);
2351 dt_opencl_release_mem_object(dev_xtrans);
2352 dt_opencl_release_mem_object(dev_allhex);
2353 dt_opencl_release_mem_object(dev_green_eq);
2355 dt_opencl_release_mem_object(dev_edge_in);
2356 dt_opencl_release_mem_object(dev_edge_out);
2357 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] couldn't enqueue kernel! %d\n", err);
2358 return FALSE;
2359}
2360
2361#endif // OPENCL
2362
2363#undef PIX_SWAP
2364#undef PIX_SORT
2365#undef CCLIP
2366#undef TS
2367
2368// clang-format off
2369// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
2370// vim: shiftwidth=2 expandtab tabstop=2 cindent
2371// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
2372// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:191
#define TRUE
Definition ashift_lsd.c:151
#define FALSE
Definition ashift_lsd.c:147
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
int dt_conf_get_int(const char *name)
Definition conf.c:194
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1395
static void memset_zero(void *const buffer, size_t size)
Set the memory buffer to zero as a pack of unsigned char.
Definition darktable.h:653
@ DT_DEBUG_OPENCL
Definition darktable.h:478
static void * dt_alloc_perthread(const size_t n, const size_t objsize, size_t *padded_size)
Definition darktable.h:766
#define dt_get_perthread(buf, padsize)
Definition darktable.h:797
#define dt_free_align(A)
Definition darktable.h:334
static int FCxtrans(const int row, const int col, global const unsigned char(*const xtrans)[6])
Definition data/kernels/common.h:50
@ DT_IOP_DEMOSAIC_MARKESTEIJN_3
Definition demosaic.c:84
@ DEMOSAIC_FULL_SCALE
Definition demosaic.c:103
@ DEMOSAIC_ONLY_VNG_LINEAR
Definition demosaic.c:104
static int demosaic_qual_flags(const dt_dev_pixelpipe_iop_t *const piece, const dt_image_t *const img, const dt_iop_roi_t *const roi_out)
Definition demosaic.c:239
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:110
static float f(const float t, const float c, const float x)
Definition graduatednd.c:173
#define TRANSLATE(n, size)
#define CONV_FILT(VAR, FILT)
static void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6])
Definition markesteijn.c:508
static void xtrans_markesteijn_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6], const int passes)
Definition markesteijn.c:29
static const short * hexmap(const int row, const int col, short(*const allhex)[3][8])
Definition markesteijn.c:13
#define SQR(x)
Definition markesteijn.c:8
#define TS
Definition markesteijn.c:10
#define PIX_SORT(a, b)
#define CLAMPS(A, L, H)
Definition math.h:68
c
Definition derive_filmic_v6_gamut_mapping.py:11
g
Definition derive_filmic_v6_gamut_mapping.py:18
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:560
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:556
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:601
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:564
Definition pixelpipe_hb.h:46
void * data
Definition pixelpipe_hb.h:49
dt_image_t image_storage
Definition develop.h:164
float exif_iso
Definition common/image.h:202
Definition demosaic.c:187
Definition demosaic.c:129
Definition imageop.h:182
struct dt_develop_t * dev
Definition imageop.h:227
dt_iop_global_data_t * global_data
Definition imageop.h:245
Definition imageop.h:32
int x
Definition imageop.h:33
int width
Definition imageop.h:33
int height
Definition imageop.h:33
#define MIN(a, b)
Definition thinplate.c:23