Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
markesteijn.c
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
4
5 Ansel is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 Ansel is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
17*/
18
19//
20// x-trans specific demosaicing algorithms
21//
22
23// xtrans_interpolate adapted from dcraw 9.20
24
25#define SQR(x) ((x) * (x))
26// tile size, optimized to keep data in L2 cache
27#define TS 122
28
30static inline const short * hexmap(const int row, const int col, short (*const allhex)[3][8])
31{
32 // Row and column offsets may be negative, but C's modulo function
33 // is not useful here with a negative dividend. To be safe, add a
34 // fairly large multiple of 3. In current code row and col will
35 // never be less than -9 (1-pass) or -14 (3-pass).
36 int irow = row + 600;
37 int icol = col + 600;
38 assert(irow >= 0 && icol >= 0);
39 return allhex[irow % 3][icol % 3];
40}
41
42
43/*
44 Frank Markesteijn's algorithm for Fuji X-Trans sensors
45 */
46static void xtrans_markesteijn_interpolate(float *out, const float *const in,
47 const dt_iop_roi_t *const roi_out,
48 const dt_iop_roi_t *const roi_in,
49 const uint8_t (*const xtrans)[6], const int passes)
50{
51 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
52 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
53 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
54 dir[4] = { 1, TS, TS + 1, TS - 1 };
55
56 short allhex[3][3][8];
57 // sgrow/sgcol is the offset in the sensor matrix of the solitary
58 // green pixels (initialized here only to avoid compiler warning)
59 unsigned short sgrow = 0, sgcol = 0;
60
61 const int width = roi_out->width;
62 const int height = roi_out->height;
63 const int ndir = 4 << (passes > 1);
64
65 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 3) * sizeof(float);
66 size_t padded_buffer_size;
67 char *const all_buffers = (char *)dt_pixelpipe_cache_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
68 if(!all_buffers)
69 {
70 printf("[demosaic] not able to allocate Markesteijn buffers\n");
71 return;
72 }
73
74 /* Map a green hexagon around each non-green pixel and vice versa: */
75 for(int row = 0; row < 3; row++)
76 for(int col = 0; col < 3; col++)
77 for(int ng = 0, d = 0; d < 10; d += 2)
78 {
79 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
80 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
81 ng = 0;
82 else
83 ng++;
84 // if there are four non-green pixels adjacent in cardinal
85 // directions, this is the solitary green pixel
86 if(ng == 4)
87 {
88 sgrow = row;
89 sgcol = col;
90 }
91 if(ng == g + 1)
92 for(int c = 0; c < 8; c++)
93 {
94 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
95 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
96 // offset within TSxTS buffer
97 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
98 }
99 }
100
101 // extra passes propagates out errors at edges, hence need more padding
102 const int pad_tile = (passes == 1) ? 12 : 17;
103#ifdef _OPENMP
104#pragma omp parallel for default(none) \
105 dt_omp_firstprivate(all_buffers, padded_buffer_size, dir, height, in, ndir, pad_tile, passes, roi_in, width, xtrans) \
106 shared(sgrow, sgcol, allhex, out) \
107 schedule(static)
108#endif
109 // step through TSxTS cells of image, each tile overlapping the
110 // prior as interpolation needs a substantial border
111 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile*2))
112 {
113 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
114 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
115 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
116 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
117 // note that channels come before tiles to allow for a
118 // vectorization optimization when building drv[] from yuv[]
119 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
120 // drv points to ndir TSxTS tiles, each a single channel of derivatives
121 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
122 // gmin and gmax reuse memory which is used later by yuv buffer;
123 // each points to a TSxTS tile of single channel data
124 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
125 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
126 // homo and homosum reuse memory which is used earlier in the
127 // loop; each points to ndir single-channel TSxTS tiles
128 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
129 uint8_t (*const homosum)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float)
130 + TS * TS * ndir * sizeof(uint8_t));
131
132 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile*2))
133 {
134 int mrow = MIN(top + TS, height + pad_tile);
135 int mcol = MIN(left + TS, width + pad_tile);
136
137 // Copy current tile from in to image buffer. If border goes
138 // beyond edges of image, fill with mirrored/interpolated edges.
139 // The extra border avoids discontinuities at image edges.
140 for(int row = top; row < mrow; row++)
141 for(int col = left; col < mcol; col++)
142 {
143 float(*const pix) = rgb[0][row - top][col - left];
144 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
145 {
146 const int f = FCxtrans(row, col, roi_in, xtrans);
147 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
148 }
149 else
150 {
151 // mirror a border pixel if beyond image edge
152 const int c = FCxtrans(row, col, roi_in, xtrans);
153 for(int cc = 0; cc < 3; cc++)
154 {
155 if(cc != c)
156 pix[cc] = 0.0f;
157 else
158 {
159#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
160 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
161 if(c == FCxtrans(cy, cx, roi_in, xtrans))
162 pix[c] = in[roi_in->width * cy + cx];
163 else
164 {
165 // interpolate if mirror pixel is a different color
166 float sum = 0.0f;
167 uint8_t count = 0;
168 for(int y = row - 1; y <= row + 1; y++)
169 for(int x = col - 1; x <= col + 1; x++)
170 {
171 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
172 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
173 if(ff == c)
174 {
175 sum += in[roi_in->width * yy + xx];
176 count++;
177 }
178 }
179 pix[c] = sum / count;
180 }
181 }
182 }
183 }
184 }
185
186 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
187 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
188
189 // note that successive calculations are inset within the tile
190 // so as to give enough border data, and there needs to be a 6
191 // pixel border initially to allow allhex to find neighboring
192 // pixels
193
194 /* Set green1 and green3 to the minimum and maximum allowed values: */
195 // Run through each red/blue or blue/red pair, setting their g1
196 // and g3 values to the min/max of green pixels surrounding the
197 // pair. Use a 3 pixel border as gmin/gmax is used by
198 // interpolate green which has a 3 pixel border.
199 const int pad_g1_g3 = 3;
200 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
201 {
202 // setting max to 0.0f signifies that this is a new pair, which
203 // requires a new min/max calculation of its neighboring greens
204 float min = FLT_MAX, max = 0.0f;
205 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
206 {
207 // if in row of horizontal red & blue pairs (or processing
208 // vertical red & blue pairs near image bottom), reset min/max
209 // between each pair
210 if(FCxtrans(row, col, roi_in, xtrans) == 1)
211 {
212 min = FLT_MAX, max = 0.0f;
213 continue;
214 }
215 // if at start of red & blue pair, calculate min/max of green
216 // pixels surrounding it; note that while normally using == to
217 // compare floats is suspect, here the check is if 0.0f has
218 // explicitly been assigned to max (which signifies a new
219 // red/blue pair)
220 if(max == 0.0f)
221 {
222 float (*const pix)[3] = &rgb[0][row - top][col - left];
223 const short *const hex = hexmap(row,col,allhex);
224 for(int c = 0; c < 6; c++)
225 {
226 const float val = pix[hex[c]][1];
227 if(min > val) min = val;
228 if(max < val) max = val;
229 }
230 }
231 gmin[row - top][col - left] = min;
232 gmax[row - top][col - left] = max;
233 // handle vertical red/blue pairs
234 switch((row - sgrow) % 3)
235 {
236 // hop down a row to second pixel in vertical pair
237 case 1:
238 if(row < mrow - 4) row++, col--;
239 break;
240 // then if not done with the row hop up and right to next
241 // vertical red/blue pair, resetting min/max
242 case 2:
243 min = FLT_MAX, max = 0.0f;
244 if((col += 2) < mcol - 4 && row > top + 3) row--;
245 }
246 }
247 }
248
249 /* Interpolate green horizontally, vertically, and along both diagonals: */
250 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
251 const int pad_g_interp = 3;
252 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
253 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
254 {
255 float color[8];
256 const int f = FCxtrans(row, col, roi_in, xtrans);
257 if(f == 1) continue;
258 float (*const pix)[3] = &rgb[0][row - top][col - left];
259 const short *const hex = hexmap(row,col,allhex);
260 // TODO: these constants come from integer math constants in
261 // dcraw -- calculate them instead from interpolation math
262 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
263 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
264 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
265 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
266 for(int c = 0; c < 2; c++)
267 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
268 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
269 for(int c = 0; c < 4; c++)
270 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
271 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
272 }
273
274 for(int pass = 0; pass < passes; pass++)
275 {
276 if(pass == 1)
277 {
278 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7],
279 // and process that second set of buffers
280 memcpy(rgb + 4, rgb, sizeof(*rgb) * 4);
281 rgb += 4;
282 }
283
284 /* Recalculate green from interpolated values of closer pixels: */
285 if(pass)
286 {
287 const int pad_g_recalc = 6;
288 for(int row = top + pad_g_recalc; row < mrow - pad_g_recalc; row++)
289 for(int col = left + pad_g_recalc; col < mcol - pad_g_recalc; col++)
290 {
291 const int f = FCxtrans(row, col, roi_in, xtrans);
292 if(f == 1) continue;
293 const short *const hex = hexmap(row,col,allhex);
294 for(int d = 3; d < 6; d++)
295 {
296 float(*rfx)[3] = &rgb[(d - 2) ^ !((row - sgrow) % 3)][row - top][col - left];
297 const float val = rfx[-2 * hex[d]][1]
298 + 2 * rfx[hex[d]][1] - rfx[-2 * hex[d]][f]
299 - 2 * rfx[hex[d]][f] + 3 * rfx[0][f];
300 rfx[0][1] = CLAMPS(val / 3.0f, gmin[row - top][col - left], gmax[row - top][col - left]);
301 }
302 }
303 }
304
305 /* Interpolate red and blue values for solitary green pixels: */
306 const int pad_rb_g = (passes == 1) ? 6 : 5;
307 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
308 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
309 {
310 float(*rfx)[3] = &rgb[0][row - top][col - left];
311 int h = FCxtrans(row, col + 1, roi_in, xtrans);
312 float diff[6] = { 0.0f };
313 // interplated color: first index is red/blue, second is
314 // pass, is double actual result
315 float color[2][6];
316 // Six passes, alternating hori/vert interp (i),
317 // starting with R or B (h) depending on which is closest.
318 // Passes 0,1 to rgb[0], rgb[1] of hori/vert interp. Pass
319 // 3,5 to rgb[2], rgb[3] of best of interp hori/vert
320 // results. Each pass which outputs moves on to the next
321 // rgb[] for input of interp greens.
322 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
323 {
324 // look 1 and 2 pixels distance from solitary green to
325 // red then blue or blue then red
326 for(int c = 0; c < 2; c++, h ^= 2)
327 {
328 // rate of change in greens between current pixel and
329 // interpolated pixels 1 or 2 distant: a quick
330 // derivative which will be divided by two later to be
331 // rate of luminance change for red/blue between known
332 // red/blue neighbors and the current unknown pixel
333 const float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
334 // color is halved before being stored in rgb, hence
335 // this becomes green rate of change plus the average
336 // of the near red or blue pixels on current axis
337 color[h != 0][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
338 // Note that diff will become the slope for both red
339 // and blue differentials in the current direction.
340 // For 2nd and 3rd hori+vert passes, create a sum of
341 // steepness for both cardinal directions.
342 if(d > 1)
343 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h])
344 + SQR(g);
345 }
346 if((d < 2) || (d & 1))
347 { // output for passes 0, 1, 3, 5
348 // for 0, 1 just use hori/vert, for 3, 5 use best of x/y dir
349 const int d_out = d - ((d > 1) && (diff[d-1] < diff[d]));
350 rfx[0][0] = color[0][d_out] / 2.f;
351 rfx[0][2] = color[1][d_out] / 2.f;
352 rfx += TS * TS;
353 }
354 }
355 }
356
357 /* Interpolate red for blue pixels and vice versa: */
358 const int pad_rb_br = (passes == 1) ? 6 : 5;
359 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
360 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
361 {
362 const int f = 2 - FCxtrans(row, col, roi_in, xtrans);
363 if(f == 1) continue;
364 float(*rfx)[3] = &rgb[0][row - top][col - left];
365 const int c = (row - sgrow) % 3 ? TS : 1;
366 const int h = 3 * (c ^ TS ^ 1);
367 for(int d = 0; d < 4; d++, rfx += TS * TS)
368 {
369 const int i = d > 1 || ((d ^ c) & 1) ||
370 ((fabsf(rfx[0][1]-rfx[c][1]) + fabsf(rfx[0][1]-rfx[-c][1])) <
371 2.f*(fabsf(rfx[0][1]-rfx[h][1]) + fabsf(rfx[0][1]-rfx[-h][1]))) ? c:h;
372 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
373 }
374 }
375
376 /* Fill in red and blue for 2x2 blocks of green: */
377 const int pad_g22 = (passes == 1) ? 8 : 4;
378 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
379 {
380 if((row - sgrow) % 3)
381 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
382 if((col - sgcol) % 3)
383 {
384 float(*rfx)[3] = &rgb[0][row - top][col - left];
385 const short *const hex = hexmap(row,col,allhex);
386 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
387 if(hex[d] + hex[d + 1])
388 {
389 const float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
390 for(int c = 0; c < 4; c += 2)
391 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
392 }
393 else
394 {
395 const float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
396 for(int c = 0; c < 4; c += 2)
397 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
398 }
399 }
400 }
401 } // end of multipass loop
402
403 // jump back to the first set of rgb buffers (this is a nop
404 // unless on the second pass)
405 rgb = (float(*)[TS][TS][3])buffer;
406 // from here on out, mainly are working within the current tile
407 // rather than in reference to the image, so don't offset
408 // mrow/mcol by top/left of tile
409 mrow -= top;
410 mcol -= left;
411
412 /* Convert to perceptual colorspace and differentiate in all directions: */
413 // Original dcraw algorithm uses CIELab as perceptual space
414 // (presumably coming from original AHD) and converts taking
415 // camera matrix into account. Now use YPbPr which requires much
416 // less code and is nearly indistinguishable. It assumes the
417 // camera RGB is roughly linear.
418 for(int d = 0; d < ndir; d++)
419 {
420 const int pad_yuv = (passes == 1) ? 8 : 13;
421 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
422 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
423 {
424 const float *rx = rgb[d][row][col];
425 // use ITU-R BT.2020 YPbPr, which is great, but could use
426 // a better/simpler choice? note that imageop.h provides
427 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
428 // which appears less good with specular highlights
429 const float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
430 yuv[0][row][col] = y;
431 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
432 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
433 }
434 // Note that f can offset by a column (-1 or +1) and by a row
435 // (-TS or TS). The row-wise offsets cause the undefined
436 // behavior sanitizer to warn of an out of bounds index, but
437 // as yfx is multi-dimensional and there is sufficient
438 // padding, that is not actually so.
439 const int f = dir[d & 3];
440 const int pad_drv = (passes == 1) ? 9 : 14;
441 for(int row = pad_drv; row < mrow - pad_drv; row++)
442 for(int col = pad_drv; col < mcol - pad_drv; col++)
443 {
444 const float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
445 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
446 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
447 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
448 }
449 }
450
451 /* Build homogeneity maps from the derivatives: */
452 memset_zero(homo, sizeof(uint8_t) * ndir * TS * TS);
453 const int pad_homo = (passes == 1) ? 10 : 15;
454 for(int row = pad_homo; row < mrow - pad_homo; row++)
455 for(int col = pad_homo; col < mcol - pad_homo; col++)
456 {
457 float tr = FLT_MAX;
458 for(int d = 0; d < ndir; d++)
459 if(tr > drv[d][row][col]) tr = drv[d][row][col];
460 tr *= 8;
461 for(int d = 0; d < ndir; d++)
462 for(int v = -1; v <= 1; v++)
463 for(int h = -1; h <= 1; h++)
464 homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
465 }
466
467 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
468 for(int d = 0; d < ndir; d++)
469 for(int row = pad_tile; row < mrow - pad_tile; row++)
470 {
471 // start before first column where homo[d][row][col+2] != 0,
472 // so can know v5sum and homosum[d][row][col] will be 0
473 int col = pad_tile-5;
474 uint8_t v5sum[5] = { 0 };
475 homosum[d][row][col] = 0;
476 // calculate by rolling through column sums
477 for(col++; col < mcol - pad_tile; col++)
478 {
479 uint8_t colsum = 0;
480 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
481 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
482 v5sum[col % 5] = colsum;
483 }
484 }
485
486 /* Average the most homogeneous pixels for the final result: */
487 for(int row = pad_tile; row < mrow - pad_tile; row++)
488 for(int col = pad_tile; col < mcol - pad_tile; col++)
489 {
490 uint8_t hm[8] = { 0 };
491 uint8_t maxval = 0;
492 for(int d = 0; d < ndir; d++)
493 {
494 hm[d] = homosum[d][row][col];
495 maxval = (maxval < hm[d] ? hm[d] : maxval);
496 }
497 maxval -= maxval >> 3;
498 for(int d = 0; d < ndir - 4; d++)
499 {
500 if(hm[d] < hm[d + 4])
501 hm[d] = 0;
502 else if(hm[d] > hm[d + 4])
503 hm[d + 4] = 0;
504 }
505 dt_aligned_pixel_t avg = { 0.0f };
506 for(int d = 0; d < ndir; d++)
507 {
508 if(hm[d] >= maxval)
509 {
510 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
511 avg[3]++;
512 }
513 }
514 for(int c = 0; c < 3; c++)
515 out[4 * (width * (row + top) + col + left) + c] = avg[c]/avg[3];
516 }
517 }
518 }
520}
521
522#undef TS
523
524#define TS 122
525static void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in,
526 const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in,
527 const uint8_t (*const xtrans)[6])
528{
529
530 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
531 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
532 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } },
533 dir[4] = { 1, TS, TS + 1, TS - 1 };
534
535 static const float directionality[8] = { 1.0f, 0.0f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f };
536
537 short allhex[3][3][8];
538 // sgrow/sgcol is the offset in the sensor matrix of the solitary
539 // green pixels (initialized here only to avoid compiler warning)
540 unsigned short sgrow = 0, sgcol = 0;
541
542 const int width = roi_out->width;
543 const int height = roi_out->height;
544 static const int ndir = 4;
545
546 static const float complex Minv[3][8] = {
547 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
548 -1.000000e+00f, 7.500000e-01f - 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
549 7.500000e-01f + 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
550 { 1.000000e+00f, -2.000000e-01f + 3.464102e-01f * _Complex_I, 2.000000e-01f + 3.464102e-01f * _Complex_I,
551 8.000000e-01f, 0.0f, 2.000000e-01f - 3.464102e-01f * _Complex_I, 0.0f,
552 -2.000000e-01f - 3.464102e-01f * _Complex_I },
553 { 1.000000e+00f, 2.500000e-01f - 4.330127e-01f * _Complex_I, -2.500000e-01f - 4.330127e-01f * _Complex_I,
554 -1.000000e+00f, -7.500000e-01f + 1.299038e+00f * _Complex_I, -2.500000e-01f + 4.330127e-01f * _Complex_I,
555 -7.500000e-01f - 1.299038e+00f * _Complex_I, 2.500000e-01f + 4.330127e-01f * _Complex_I },
556 };
557
558 static const float complex modarr[6][6][8] = {
559 { { 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
560 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
561 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
562 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
563 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
564 -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
565 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
566 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
567 { 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
568 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
569 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
570 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
571 { -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
572 -1.000000e+00f - 3.673940e-16f * _Complex_I, -1.000000e+00f - 1.224647e-16f * _Complex_I,
573 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
574 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
575 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
576 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
577 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
578 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
579 { -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
580 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
581 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
582 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
583 { { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
584 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
585 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
586 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
587 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
588 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
589 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
590 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
591 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
592 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
593 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
594 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
595 { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
596 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I,
597 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
598 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
599 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
600 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
601 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
602 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 2.449294e-16f * _Complex_I },
603 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
604 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
605 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
606 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
607 { { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
608 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
609 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
610 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
611 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
612 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
613 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
614 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
615 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
616 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
617 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
618 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
619 { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
620 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 1.224647e-16f * _Complex_I,
621 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
622 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
623 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
624 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
625 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
626 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
627 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
628 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
629 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
630 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 1.133108e-15f * _Complex_I } },
631 { { -1.000000e+00f + 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
632 -1.000000e+00f - 1.224647e-16f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
633 1.000000e+00f + 0.000000e+00f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
634 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
635 { 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
636 1.000000e+00f + 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
637 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
638 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
639 { -1.000000e+00f - 1.224647e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
640 -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
641 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
642 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
643 { 1.000000e+00f + 2.449294e-16f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
644 1.000000e+00f + 4.898587e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
645 1.000000e+00f - 2.449294e-16f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
646 1.000000e+00f - 4.898587e-16f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
647 { -1.000000e+00f - 3.673940e-16f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
648 -1.000000e+00f - 6.123234e-16f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
649 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
650 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
651 { 1.000000e+00f + 4.898587e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
652 1.000000e+00f + 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
653 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I,
654 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I } },
655 { { -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
656 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
657 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
658 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
659 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
660 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
661 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
662 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 2.449294e-16f * _Complex_I },
663 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
664 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
665 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
666 1.000000e+00f - 4.898587e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
667 { 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
668 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 3.673940e-16f * _Complex_I,
669 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
670 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
671 { -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
672 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
673 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
674 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I },
675 { 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
676 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
677 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
678 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I } },
679 { { 5.000000e-01f - 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
680 5.000000e-01f + 8.660254e-01f * _Complex_I, -1.000000e+00f + 6.123234e-16f * _Complex_I,
681 1.000000e+00f + 0.000000e+00f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
682 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
683 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
684 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
685 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
686 1.000000e+00f - 2.266216e-15f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
687 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
688 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
689 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
690 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 1.133108e-15f * _Complex_I },
691 { -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f - 7.347881e-16f * _Complex_I,
692 -5.000000e-01f - 8.660254e-01f * _Complex_I, 1.000000e+00f - 4.898587e-16f * _Complex_I,
693 1.000000e+00f - 2.449294e-16f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
694 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I },
695 { 5.000000e-01f - 8.660254e-01f * _Complex_I, 5.000000e-01f + 8.660254e-01f * _Complex_I,
696 5.000000e-01f + 8.660254e-01f * _Complex_I, 5.000000e-01f - 8.660254e-01f * _Complex_I,
697 -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
698 1.000000e+00f - 7.347881e-16f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I },
699 { -5.000000e-01f + 8.660254e-01f * _Complex_I, -5.000000e-01f + 8.660254e-01f * _Complex_I,
700 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
701 -5.000000e-01f - 8.660254e-01f * _Complex_I, -5.000000e-01f - 8.660254e-01f * _Complex_I,
702 -5.000000e-01f + 8.660254e-01f * _Complex_I, 1.000000e+00f + 0.000000e+00f * _Complex_I } },
703 };
704
705 static const float complex harr[4][13][13]
706 = { { { 1.326343e-03f - 1.299441e-18f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
707 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f + 9.920263e-19f * _Complex_I,
708 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
709 1.225183e-03f - 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
710 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f + 7.085902e-19f * _Complex_I,
711 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
712 1.326343e-03f - 6.497206e-19f * _Complex_I },
713 { -1.980815e-03f + 1.698059e-18f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
714 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f - 1.378892e-18f * _Complex_I,
715 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
716 -3.273971e-03f + 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
717 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f - 9.192611e-19f * _Complex_I,
718 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
719 -1.980815e-03f + 7.277398e-19f * _Complex_I },
720 { 1.457023e-03f - 1.070603e-18f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
721 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f + 1.633884e-18f * _Complex_I,
722 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
723 7.074895e-03f - 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
724 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f + 9.803302e-19f * _Complex_I,
725 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
726 1.457023e-03f - 3.568678e-19f * _Complex_I },
727 { -1.017660e-03f + 6.231370e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
728 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f - 1.812375e-18f * _Complex_I,
729 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
730 -1.391431e-02f + 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
731 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f - 9.061876e-19f * _Complex_I,
732 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
733 -1.017660e-03f + 1.246274e-19f * _Complex_I },
734 { 9.198983e-04f - 4.506202e-19f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
735 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f + 1.845735e-18f * _Complex_I,
736 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
737 2.504795e-02f - 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
738 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f + 6.152449e-19f * _Complex_I,
739 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
740 9.198983e-04f + 0.000000e+00f * _Complex_I },
741 { -7.972663e-04f + 2.929109e-19f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
742 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f - 1.648398e-18f * _Complex_I,
743 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
744 -3.906939e-02f + 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
745 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
746 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
747 -7.972663e-04f - 9.763696e-20f * _Complex_I },
748 { 8.625458e-04f - 2.112628e-19f * _Complex_I, 1.431113e-03f - 2.478760e-03f * _Complex_I,
749 -2.310309e-03f - 4.001572e-03f * _Complex_I, -7.706486e-03f + 9.437723e-19f * _Complex_I,
750 -7.220186e-03f + 1.250573e-02f * _Complex_I, 1.587118e-02f + 2.748969e-02f * _Complex_I,
751 4.765675e-02f + 0.000000e+00f * _Complex_I, 1.587118e-02f - 2.748969e-02f * _Complex_I,
752 -7.220186e-03f - 1.250573e-02f * _Complex_I, -7.706486e-03f - 9.437723e-19f * _Complex_I,
753 -2.310309e-03f + 4.001572e-03f * _Complex_I, 1.431113e-03f + 2.478760e-03f * _Complex_I,
754 8.625458e-04f + 2.112628e-19f * _Complex_I },
755 { -7.972663e-04f + 9.763696e-20f * _Complex_I, -1.145605e-03f + 1.984247e-03f * _Complex_I,
756 1.983334e-03f + 3.435235e-03f * _Complex_I, 6.730096e-03f + 0.000000e+00f * _Complex_I,
757 6.782033e-03f - 1.174683e-02f * _Complex_I, -1.392077e-02f - 2.411147e-02f * _Complex_I,
758 -3.906939e-02f - 4.784620e-18f * _Complex_I, -1.392077e-02f + 2.411147e-02f * _Complex_I,
759 6.782033e-03f + 1.174683e-02f * _Complex_I, 6.730096e-03f + 1.648398e-18f * _Complex_I,
760 1.983334e-03f - 3.435235e-03f * _Complex_I, -1.145605e-03f - 1.984247e-03f * _Complex_I,
761 -7.972663e-04f - 2.929109e-19f * _Complex_I },
762 { 9.198983e-04f + 0.000000e+00f * _Complex_I, 6.815900e-04f - 1.180548e-03f * _Complex_I,
763 -1.287335e-03f - 2.229729e-03f * _Complex_I, -5.023856e-03f - 6.152449e-19f * _Complex_I,
764 -5.499048e-03f + 9.524630e-03f * _Complex_I, 9.797672e-03f + 1.697006e-02f * _Complex_I,
765 2.504795e-02f + 6.134977e-18f * _Complex_I, 9.797672e-03f - 1.697006e-02f * _Complex_I,
766 -5.499048e-03f - 9.524630e-03f * _Complex_I, -5.023856e-03f - 1.845735e-18f * _Complex_I,
767 -1.287335e-03f + 2.229729e-03f * _Complex_I, 6.815900e-04f + 1.180548e-03f * _Complex_I,
768 9.198983e-04f + 4.506202e-19f * _Complex_I },
769 { -1.017660e-03f - 1.246274e-19f * _Complex_I, -5.415171e-04f + 9.379351e-04f * _Complex_I,
770 7.255109e-04f + 1.256622e-03f * _Complex_I, 3.699792e-03f + 9.061876e-19f * _Complex_I,
771 4.090356e-03f - 7.084704e-03f * _Complex_I, -6.006283e-03f - 1.040319e-02f * _Complex_I,
772 -1.391431e-02f - 5.112034e-18f * _Complex_I, -6.006283e-03f + 1.040319e-02f * _Complex_I,
773 4.090356e-03f + 7.084704e-03f * _Complex_I, 3.699792e-03f + 1.812375e-18f * _Complex_I,
774 7.255109e-04f - 1.256622e-03f * _Complex_I, -5.415171e-04f - 9.379351e-04f * _Complex_I,
775 -1.017660e-03f - 6.231370e-19f * _Complex_I },
776 { 1.457023e-03f + 3.568678e-19f * _Complex_I, 8.487143e-04f - 1.470016e-03f * _Complex_I,
777 -6.873776e-04f - 1.190573e-03f * _Complex_I, -2.668335e-03f - 9.803302e-19f * _Complex_I,
778 -2.459813e-03f + 4.260521e-03f * _Complex_I, 3.238772e-03f + 5.609717e-03f * _Complex_I,
779 7.074895e-03f + 3.465699e-18f * _Complex_I, 3.238772e-03f - 5.609717e-03f * _Complex_I,
780 -2.459813e-03f - 4.260521e-03f * _Complex_I, -2.668335e-03f - 1.633884e-18f * _Complex_I,
781 -6.873776e-04f + 1.190573e-03f * _Complex_I, 8.487143e-04f + 1.470016e-03f * _Complex_I,
782 1.457023e-03f + 1.070603e-18f * _Complex_I },
783 { -1.980815e-03f - 7.277398e-19f * _Complex_I, -1.070384e-03f + 1.853959e-03f * _Complex_I,
784 7.924697e-04f + 1.372598e-03f * _Complex_I, 1.876584e-03f + 9.192611e-19f * _Complex_I,
785 1.225866e-03f - 2.123262e-03f * _Complex_I, -1.569320e-03f - 2.718142e-03f * _Complex_I,
786 -3.273971e-03f - 2.004729e-18f * _Complex_I, -1.569320e-03f + 2.718142e-03f * _Complex_I,
787 1.225866e-03f + 2.123262e-03f * _Complex_I, 1.876584e-03f + 1.378892e-18f * _Complex_I,
788 7.924697e-04f - 1.372598e-03f * _Complex_I, -1.070384e-03f - 1.853959e-03f * _Complex_I,
789 -1.980815e-03f - 1.698059e-18f * _Complex_I },
790 { 1.326343e-03f + 6.497206e-19f * _Complex_I, 7.091837e-04f - 1.228342e-03f * _Complex_I,
791 -6.278557e-04f - 1.087478e-03f * _Complex_I, -1.157216e-03f - 7.085902e-19f * _Complex_I,
792 -4.887166e-04f + 8.464820e-04f * _Complex_I, 5.758687e-04f + 9.974338e-04f * _Complex_I,
793 1.225183e-03f + 9.002496e-19f * _Complex_I, 5.758687e-04f - 9.974338e-04f * _Complex_I,
794 -4.887166e-04f - 8.464820e-04f * _Complex_I, -1.157216e-03f - 9.920263e-19f * _Complex_I,
795 -6.278557e-04f + 1.087478e-03f * _Complex_I, 7.091837e-04f + 1.228342e-03f * _Complex_I,
796 1.326343e-03f + 1.299441e-18f * _Complex_I } },
797 { { 9.129120e-04f - 8.943958e-19f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
798 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f - 8.514393e-19f * _Complex_I,
799 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
800 2.730635e-03f - 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
801 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f - 2.838131e-19f * _Complex_I,
802 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
803 9.129120e-04f + 0.000000e+00f * _Complex_I },
804 { -5.588854e-04f - 9.680179e-04f * _Complex_I, -6.474856e-04f + 1.121478e-03f * _Complex_I,
805 1.536588e-03f - 1.129066e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
806 -1.541434e-03f + 2.669842e-03f * _Complex_I, 4.379825e-03f - 9.925627e-18f * _Complex_I,
807 -2.394173e-03f - 4.146830e-03f * _Complex_I, -2.189912e-03f + 3.793039e-03f * _Complex_I,
808 3.082869e-03f - 3.493222e-18f * _Complex_I, -9.123802e-04f - 1.580289e-03f * _Complex_I,
809 -7.682939e-04f + 1.330724e-03f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
810 -5.588854e-04f - 9.680179e-04f * _Complex_I },
811 { -5.883876e-04f + 1.019117e-03f * _Complex_I, 1.714796e-03f - 1.260012e-18f * _Complex_I,
812 -1.180365e-03f - 2.044451e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
813 4.933362e-03f - 2.416651e-18f * _Complex_I, -3.296542e-03f - 5.709779e-03f * _Complex_I,
814 -3.546477e-03f + 6.142678e-03f * _Complex_I, 6.593085e-03f - 1.614840e-18f * _Complex_I,
815 -2.466681e-03f - 4.272417e-03f * _Complex_I, -1.483082e-03f + 2.568774e-03f * _Complex_I,
816 2.360729e-03f + 0.000000e+00f * _Complex_I, -8.573982e-04f - 1.485057e-03f * _Complex_I,
817 -5.883876e-04f + 1.019117e-03f * _Complex_I },
818 { 1.483526e-03f - 1.090077e-18f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
819 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f - 1.936126e-18f * _Complex_I,
820 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
821 1.070518e-02f - 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
822 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
823 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
824 1.483526e-03f + 3.633590e-19f * _Complex_I },
825 { -9.966429e-04f - 1.726236e-03f * _Complex_I, -1.478281e-03f + 2.560458e-03f * _Complex_I,
826 4.306274e-03f - 2.109466e-18f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
827 -5.436890e-03f + 9.416970e-03f * _Complex_I, 1.556418e-02f - 3.812124e-18f * _Complex_I,
828 -8.842875e-03f - 1.531631e-02f * _Complex_I, -7.782088e-03f + 1.347897e-02f * _Complex_I,
829 1.087378e-02f + 0.000000e+00f * _Complex_I, -3.294955e-03f - 5.707029e-03f * _Complex_I,
830 -2.153137e-03f + 3.729342e-03f * _Complex_I, 2.956562e-03f + 3.350104e-18f * _Complex_I,
831 -9.966429e-04f - 1.726236e-03f * _Complex_I },
832 { -1.291288e-03f + 2.236576e-03f * _Complex_I, 3.942788e-03f - 8.935208e-18f * _Complex_I,
833 -2.798347e-03f - 4.846880e-03f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
834 1.522441e-02f - 3.728906e-18f * _Complex_I, -1.175443e-02f - 2.035927e-02f * _Complex_I,
835 -1.417872e-02f + 2.455826e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
836 -7.612206e-03f - 1.318473e-02f * _Complex_I, -4.448869e-03f + 7.705666e-03f * _Complex_I,
837 5.596695e-03f + 1.370795e-18f * _Complex_I, -1.971394e-03f - 3.414555e-03f * _Complex_I,
838 -1.291288e-03f + 2.236576e-03f * _Complex_I },
839 { 2.779286e-03f - 1.361458e-18f * _Complex_I, -2.194126e-03f - 3.800338e-03f * _Complex_I,
840 -3.057720e-03f + 5.296126e-03f * _Complex_I, 9.725261e-03f - 2.382002e-18f * _Complex_I,
841 -8.649261e-03f - 1.498096e-02f * _Complex_I, -1.417667e-02f + 2.455472e-02f * _Complex_I,
842 3.552610e-02f + 0.000000e+00f * _Complex_I, -1.417667e-02f - 2.455472e-02f * _Complex_I,
843 -8.649261e-03f + 1.498096e-02f * _Complex_I, 9.725261e-03f + 2.382002e-18f * _Complex_I,
844 -3.057720e-03f - 5.296126e-03f * _Complex_I, -2.194126e-03f + 3.800338e-03f * _Complex_I,
845 2.779286e-03f + 1.361458e-18f * _Complex_I },
846 { -1.291288e-03f - 2.236576e-03f * _Complex_I, -1.971394e-03f + 3.414555e-03f * _Complex_I,
847 5.596695e-03f - 1.370795e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
848 -7.612206e-03f + 1.318473e-02f * _Complex_I, 2.350886e-02f + 0.000000e+00f * _Complex_I,
849 -1.417872e-02f - 2.455826e-02f * _Complex_I, -1.175443e-02f + 2.035927e-02f * _Complex_I,
850 1.522441e-02f + 3.728906e-18f * _Complex_I, -4.448869e-03f - 7.705666e-03f * _Complex_I,
851 -2.798347e-03f + 4.846880e-03f * _Complex_I, 3.942788e-03f + 8.935208e-18f * _Complex_I,
852 -1.291288e-03f - 2.236576e-03f * _Complex_I },
853 { -9.966429e-04f + 1.726236e-03f * _Complex_I, 2.956562e-03f - 3.350104e-18f * _Complex_I,
854 -2.153137e-03f - 3.729342e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
855 1.087378e-02f + 0.000000e+00f * _Complex_I, -7.782088e-03f - 1.347897e-02f * _Complex_I,
856 -8.842875e-03f + 1.531631e-02f * _Complex_I, 1.556418e-02f + 3.812124e-18f * _Complex_I,
857 -5.436890e-03f - 9.416970e-03f * _Complex_I, -3.294955e-03f + 5.707029e-03f * _Complex_I,
858 4.306274e-03f + 2.109466e-18f * _Complex_I, -1.478281e-03f - 2.560458e-03f * _Complex_I,
859 -9.966429e-04f + 1.726236e-03f * _Complex_I },
860 { 1.483526e-03f - 3.633590e-19f * _Complex_I, -1.074793e-03f - 1.861596e-03f * _Complex_I,
861 -1.447448e-03f + 2.507053e-03f * _Complex_I, 3.952416e-03f + 0.000000e+00f * _Complex_I,
862 -3.496688e-03f - 6.056441e-03f * _Complex_I, -4.898024e-03f + 8.483627e-03f * _Complex_I,
863 1.070518e-02f + 2.622012e-18f * _Complex_I, -4.898024e-03f - 8.483627e-03f * _Complex_I,
864 -3.496688e-03f + 6.056441e-03f * _Complex_I, 3.952416e-03f + 1.936126e-18f * _Complex_I,
865 -1.447448e-03f - 2.507053e-03f * _Complex_I, -1.074793e-03f + 1.861596e-03f * _Complex_I,
866 1.483526e-03f + 1.090077e-18f * _Complex_I },
867 { -5.883876e-04f - 1.019117e-03f * _Complex_I, -8.573982e-04f + 1.485057e-03f * _Complex_I,
868 2.360729e-03f + 0.000000e+00f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
869 -2.466681e-03f + 4.272417e-03f * _Complex_I, 6.593085e-03f + 1.614840e-18f * _Complex_I,
870 -3.546477e-03f - 6.142678e-03f * _Complex_I, -3.296542e-03f + 5.709779e-03f * _Complex_I,
871 4.933362e-03f + 2.416651e-18f * _Complex_I, -1.483082e-03f - 2.568774e-03f * _Complex_I,
872 -1.180365e-03f + 2.044451e-03f * _Complex_I, 1.714796e-03f + 1.260012e-18f * _Complex_I,
873 -5.883876e-04f - 1.019117e-03f * _Complex_I },
874 { -5.588854e-04f + 9.680179e-04f * _Complex_I, 1.294971e-03f + 0.000000e+00f * _Complex_I,
875 -7.682939e-04f - 1.330724e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
876 3.082869e-03f + 3.493222e-18f * _Complex_I, -2.189912e-03f - 3.793039e-03f * _Complex_I,
877 -2.394173e-03f + 4.146830e-03f * _Complex_I, 4.379825e-03f + 9.925627e-18f * _Complex_I,
878 -1.541434e-03f - 2.669842e-03f * _Complex_I, -9.123802e-04f + 1.580289e-03f * _Complex_I,
879 1.536588e-03f + 1.129066e-18f * _Complex_I, -6.474856e-04f - 1.121478e-03f * _Complex_I,
880 -5.588854e-04f + 9.680179e-04f * _Complex_I },
881 { 9.129120e-04f + 0.000000e+00f * _Complex_I, -5.925973e-04f - 1.026409e-03f * _Complex_I,
882 -5.989682e-04f + 1.037443e-03f * _Complex_I, 1.158755e-03f + 2.838131e-19f * _Complex_I,
883 -8.992493e-04f - 1.557545e-03f * _Complex_I, -1.283187e-03f + 2.222546e-03f * _Complex_I,
884 2.730635e-03f + 1.337625e-18f * _Complex_I, -1.283187e-03f - 2.222546e-03f * _Complex_I,
885 -8.992493e-04f + 1.557545e-03f * _Complex_I, 1.158755e-03f + 8.514393e-19f * _Complex_I,
886 -5.989682e-04f - 1.037443e-03f * _Complex_I, -5.925973e-04f + 1.026409e-03f * _Complex_I,
887 9.129120e-04f + 8.943958e-19f * _Complex_I } },
888 { { 8.228091e-04f + 0.000000e+00f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
889 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f - 3.061346e-19f * _Complex_I,
890 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
891 2.080486e-03f - 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
892 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f - 9.184039e-19f * _Complex_I,
893 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
894 8.228091e-04f - 8.061204e-19f * _Complex_I },
895 { -5.616336e-04f - 9.727779e-04f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
896 -8.694311e-04f + 1.505899e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
897 2.446785e-03f - 2.772471e-18f * _Complex_I, -1.605471e-03f + 2.780758e-03f * _Complex_I,
898 -1.832781e-03f - 3.174469e-03f * _Complex_I, 3.210942e-03f - 7.276687e-18f * _Complex_I,
899 -1.223392e-03f + 2.118978e-03f * _Complex_I, -9.721139e-04f - 1.683751e-03f * _Complex_I,
900 1.738862e-03f - 1.277695e-18f * _Complex_I, -6.914471e-04f + 1.197621e-03f * _Complex_I,
901 -5.616336e-04f - 9.727779e-04f * _Complex_I },
902 { -5.723872e-04f + 9.914038e-04f * _Complex_I, -8.302721e-04f - 1.438073e-03f * _Complex_I,
903 2.445280e-03f + 0.000000e+00f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
904 -1.882898e-03f - 3.261274e-03f * _Complex_I, 4.921549e-03f - 1.205432e-18f * _Complex_I,
905 -2.760152e-03f + 4.780723e-03f * _Complex_I, -2.460774e-03f - 4.262186e-03f * _Complex_I,
906 3.765795e-03f - 1.844708e-18f * _Complex_I, -1.378399e-03f + 2.387458e-03f * _Complex_I,
907 -1.222640e-03f - 2.117675e-03f * _Complex_I, 1.660544e-03f - 1.220148e-18f * _Complex_I,
908 -5.723872e-04f + 9.914038e-04f * _Complex_I },
909 { 1.226482e-03f + 3.004015e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
910 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
911 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
912 9.412791e-03f - 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
913 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f - 1.877877e-18f * _Complex_I,
914 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
915 1.226482e-03f - 9.012046e-19f * _Complex_I },
916 { -9.898007e-04f - 1.714385e-03f * _Complex_I, 3.215120e-03f + 3.643077e-18f * _Complex_I,
917 -2.507621e-03f + 4.343327e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
918 1.105198e-02f + 0.000000e+00f * _Complex_I, -7.691179e-03f + 1.332151e-02f * _Complex_I,
919 -8.705793e-03f - 1.507888e-02f * _Complex_I, 1.538236e-02f - 3.767591e-18f * _Complex_I,
920 -5.525988e-03f + 9.571292e-03f * _Complex_I, -3.557798e-03f - 6.162286e-03f * _Complex_I,
921 5.015242e-03f - 2.456760e-18f * _Complex_I, -1.607560e-03f + 2.784375e-03f * _Complex_I,
922 -9.898007e-04f - 1.714385e-03f * _Complex_I },
923 { -1.414655e-03f + 2.450254e-03f * _Complex_I, -2.341263e-03f - 4.055186e-03f * _Complex_I,
924 6.915775e-03f + 1.693876e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
925 -8.062191e-03f - 1.396412e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
926 -1.451128e-02f + 2.513428e-02f * _Complex_I, -1.207667e-02f - 2.091740e-02f * _Complex_I,
927 1.612438e-02f - 3.949335e-18f * _Complex_I, -5.086403e-03f + 8.809908e-03f * _Complex_I,
928 -3.457887e-03f - 5.989237e-03f * _Complex_I, 4.682526e-03f - 1.061161e-17f * _Complex_I,
929 -1.414655e-03f + 2.450254e-03f * _Complex_I },
930 { 3.039574e-03f + 1.488962e-18f * _Complex_I, -2.598226e-03f + 4.500260e-03f * _Complex_I,
931 -3.750909e-03f - 6.496765e-03f * _Complex_I, 1.119776e-02f + 2.742661e-18f * _Complex_I,
932 -9.210579e-03f + 1.595319e-02f * _Complex_I, -1.464762e-02f - 2.537042e-02f * _Complex_I,
933 3.672076e-02f + 0.000000e+00f * _Complex_I, -1.464762e-02f + 2.537042e-02f * _Complex_I,
934 -9.210579e-03f - 1.595319e-02f * _Complex_I, 1.119776e-02f - 2.742661e-18f * _Complex_I,
935 -3.750909e-03f + 6.496765e-03f * _Complex_I, -2.598226e-03f - 4.500260e-03f * _Complex_I,
936 3.039574e-03f - 1.488962e-18f * _Complex_I },
937 { -1.414655e-03f - 2.450254e-03f * _Complex_I, 4.682526e-03f + 1.061161e-17f * _Complex_I,
938 -3.457887e-03f + 5.989237e-03f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
939 1.612438e-02f + 3.949335e-18f * _Complex_I, -1.207667e-02f + 2.091740e-02f * _Complex_I,
940 -1.451128e-02f - 2.513428e-02f * _Complex_I, 2.415333e-02f + 0.000000e+00f * _Complex_I,
941 -8.062191e-03f + 1.396412e-02f * _Complex_I, -5.086403e-03f - 8.809908e-03f * _Complex_I,
942 6.915775e-03f - 1.693876e-18f * _Complex_I, -2.341263e-03f + 4.055186e-03f * _Complex_I,
943 -1.414655e-03f - 2.450254e-03f * _Complex_I },
944 { -9.898007e-04f + 1.714385e-03f * _Complex_I, -1.607560e-03f - 2.784375e-03f * _Complex_I,
945 5.015242e-03f + 2.456760e-18f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
946 -5.525988e-03f - 9.571292e-03f * _Complex_I, 1.538236e-02f + 3.767591e-18f * _Complex_I,
947 -8.705793e-03f + 1.507888e-02f * _Complex_I, -7.691179e-03f - 1.332151e-02f * _Complex_I,
948 1.105198e-02f + 0.000000e+00f * _Complex_I, -3.557798e-03f + 6.162286e-03f * _Complex_I,
949 -2.507621e-03f - 4.343327e-03f * _Complex_I, 3.215120e-03f - 3.643077e-18f * _Complex_I,
950 -9.898007e-04f + 1.714385e-03f * _Complex_I },
951 { 1.226482e-03f + 9.012046e-19f * _Complex_I, -9.600816e-04f + 1.662910e-03f * _Complex_I,
952 -1.495900e-03f - 2.590974e-03f * _Complex_I, 3.833507e-03f + 1.877877e-18f * _Complex_I,
953 -3.167257e-03f + 5.485850e-03f * _Complex_I, -4.303595e-03f - 7.454046e-03f * _Complex_I,
954 9.412791e-03f + 2.305469e-18f * _Complex_I, -4.303595e-03f + 7.454046e-03f * _Complex_I,
955 -3.167257e-03f - 5.485850e-03f * _Complex_I, 3.833507e-03f + 0.000000e+00f * _Complex_I,
956 -1.495900e-03f + 2.590974e-03f * _Complex_I, -9.600816e-04f - 1.662910e-03f * _Complex_I,
957 1.226482e-03f - 3.004015e-19f * _Complex_I },
958 { -5.723872e-04f - 9.914038e-04f * _Complex_I, 1.660544e-03f + 1.220148e-18f * _Complex_I,
959 -1.222640e-03f + 2.117675e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
960 3.765795e-03f + 1.844708e-18f * _Complex_I, -2.460774e-03f + 4.262186e-03f * _Complex_I,
961 -2.760152e-03f - 4.780723e-03f * _Complex_I, 4.921549e-03f + 1.205432e-18f * _Complex_I,
962 -1.882898e-03f + 3.261274e-03f * _Complex_I, -1.378399e-03f - 2.387458e-03f * _Complex_I,
963 2.445280e-03f + 0.000000e+00f * _Complex_I, -8.302721e-04f + 1.438073e-03f * _Complex_I,
964 -5.723872e-04f - 9.914038e-04f * _Complex_I },
965 { -5.616336e-04f + 9.727779e-04f * _Complex_I, -6.914471e-04f - 1.197621e-03f * _Complex_I,
966 1.738862e-03f + 1.277695e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
967 -1.223392e-03f - 2.118978e-03f * _Complex_I, 3.210942e-03f + 7.276687e-18f * _Complex_I,
968 -1.832781e-03f + 3.174469e-03f * _Complex_I, -1.605471e-03f - 2.780758e-03f * _Complex_I,
969 2.446785e-03f + 2.772471e-18f * _Complex_I, -9.721139e-04f + 1.683751e-03f * _Complex_I,
970 -8.694311e-04f - 1.505899e-03f * _Complex_I, 1.382894e-03f + 0.000000e+00f * _Complex_I,
971 -5.616336e-04f + 9.727779e-04f * _Complex_I },
972 { 8.228091e-04f + 8.061204e-19f * _Complex_I, -5.365069e-04f + 9.292572e-04f * _Complex_I,
973 -6.011501e-04f - 1.041223e-03f * _Complex_I, 1.249890e-03f + 9.184039e-19f * _Complex_I,
974 -7.632708e-04f + 1.322024e-03f * _Complex_I, -9.846035e-04f - 1.705383e-03f * _Complex_I,
975 2.080486e-03f + 1.019144e-18f * _Complex_I, -9.846035e-04f + 1.705383e-03f * _Complex_I,
976 -7.632708e-04f - 1.322024e-03f * _Complex_I, 1.249890e-03f + 3.061346e-19f * _Complex_I,
977 -6.011501e-04f + 1.041223e-03f * _Complex_I, -5.365069e-04f - 9.292572e-04f * _Complex_I,
978 8.228091e-04f + 0.000000e+00f * _Complex_I } },
979 { { 1.221201e-03f + 5.982162e-19f * _Complex_I, -1.773498e-03f - 6.515727e-19f * _Complex_I,
980 1.246697e-03f + 3.053526e-19f * _Complex_I, -8.215306e-04f - 1.006085e-19f * _Complex_I,
981 7.609372e-04f + 0.000000e+00f * _Complex_I, -4.863927e-04f + 5.956592e-20f * _Complex_I,
982 4.882100e-04f - 1.195770e-19f * _Complex_I, -4.863927e-04f + 1.786978e-19f * _Complex_I,
983 7.609372e-04f - 3.727517e-19f * _Complex_I, -8.215306e-04f + 5.030424e-19f * _Complex_I,
984 1.246697e-03f - 9.160579e-19f * _Complex_I, -1.773498e-03f + 1.520336e-18f * _Complex_I,
985 1.221201e-03f - 1.196432e-18f * _Complex_I },
986 { 7.406884e-04f - 1.282910e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
987 7.186273e-04f - 1.244699e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
988 5.908383e-04f - 1.023362e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
989 1.432695e-03f - 2.481501e-03f * _Complex_I, -1.125190e-03f + 1.948886e-03f * _Complex_I,
990 5.908383e-04f - 1.023362e-03f * _Complex_I, -4.025606e-04f + 6.972554e-04f * _Complex_I,
991 7.186273e-04f - 1.244699e-03f * _Complex_I, -1.025411e-03f + 1.776065e-03f * _Complex_I,
992 7.406884e-04f - 1.282910e-03f * _Complex_I },
993 { -7.162255e-04f - 1.240539e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
994 -6.705589e-04f - 1.161442e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
995 -1.165433e-03f - 2.018589e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
996 -2.297663e-03f - 3.979669e-03f * _Complex_I, 1.948120e-03f + 3.374242e-03f * _Complex_I,
997 -1.165433e-03f - 2.018589e-03f * _Complex_I, 6.187140e-04f + 1.071644e-03f * _Complex_I,
998 -6.705589e-04f - 1.161442e-03f * _Complex_I, 8.961176e-04f + 1.552121e-03f * _Complex_I,
999 -7.162255e-04f - 1.240539e-03f * _Complex_I },
1000 { -1.280260e-03f - 7.839331e-19f * _Complex_I, 1.987108e-03f + 9.734024e-19f * _Complex_I,
1001 -2.614019e-03f - 9.603749e-19f * _Complex_I, 3.635167e-03f + 8.903590e-19f * _Complex_I,
1002 -4.954867e-03f - 6.067962e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
1003 -7.600546e-03f + 9.307984e-19f * _Complex_I, 6.653220e-03f - 1.629569e-18f * _Complex_I,
1004 -4.954867e-03f + 1.820389e-18f * _Complex_I, 3.635167e-03f - 1.780718e-18f * _Complex_I,
1005 -2.614019e-03f + 1.600625e-18f * _Complex_I, 1.987108e-03f - 1.460104e-18f * _Complex_I,
1006 -1.280260e-03f + 1.097506e-18f * _Complex_I },
1007 { -5.756945e-04f + 9.971322e-04f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
1008 -2.421407e-03f + 4.194000e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
1009 -5.527367e-03f + 9.573681e-03f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
1010 -7.288212e-03f + 1.262355e-02f * _Complex_I, 6.837207e-03f - 1.184239e-02f * _Complex_I,
1011 -5.527367e-03f + 9.573681e-03f * _Complex_I, 4.045715e-03f - 7.007384e-03f * _Complex_I,
1012 -2.421407e-03f + 4.194000e-03f * _Complex_I, 1.268614e-03f - 2.197304e-03f * _Complex_I,
1013 -5.756945e-04f + 9.971322e-04f * _Complex_I },
1014 { 7.349896e-04f + 1.273039e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
1015 3.332671e-03f + 5.772355e-03f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
1016 9.842376e-03f + 1.704749e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1017 1.598601e-02f + 2.768858e-02f * _Complex_I, -1.401169e-02f - 2.426897e-02f * _Complex_I,
1018 9.842376e-03f + 1.704749e-02f * _Complex_I, -6.051736e-03f - 1.048191e-02f * _Complex_I,
1019 3.332671e-03f + 5.772355e-03f * _Complex_I, -1.748057e-03f - 3.027723e-03f * _Complex_I,
1020 7.349896e-04f + 1.273039e-03f * _Complex_I },
1021 { 1.400383e-03f + 1.028985e-18f * _Complex_I, -3.545886e-03f - 2.171229e-18f * _Complex_I,
1022 7.289370e-03f + 3.570761e-18f * _Complex_I, -1.418908e-02f - 5.212982e-18f * _Complex_I,
1023 2.520839e-02f + 6.174275e-18f * _Complex_I, -3.934772e-02f - 4.818706e-18f * _Complex_I,
1024 4.797481e-02f + 0.000000e+00f * _Complex_I, -3.934772e-02f + 4.818706e-18f * _Complex_I,
1025 2.520839e-02f - 6.174275e-18f * _Complex_I, -1.418908e-02f + 5.212982e-18f * _Complex_I,
1026 7.289370e-03f - 3.570761e-18f * _Complex_I, -3.545886e-03f + 2.171229e-18f * _Complex_I,
1027 1.400383e-03f - 1.028985e-18f * _Complex_I },
1028 { 7.349896e-04f - 1.273039e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1029 3.332671e-03f - 5.772355e-03f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1030 9.842376e-03f - 1.704749e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1031 1.598601e-02f - 2.768858e-02f * _Complex_I, -1.401169e-02f + 2.426897e-02f * _Complex_I,
1032 9.842376e-03f - 1.704749e-02f * _Complex_I, -6.051736e-03f + 1.048191e-02f * _Complex_I,
1033 3.332671e-03f - 5.772355e-03f * _Complex_I, -1.748057e-03f + 3.027723e-03f * _Complex_I,
1034 7.349896e-04f - 1.273039e-03f * _Complex_I },
1035 { -5.756945e-04f - 9.971322e-04f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1036 -2.421407e-03f - 4.194000e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1037 -5.527367e-03f - 9.573681e-03f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1038 -7.288212e-03f - 1.262355e-02f * _Complex_I, 6.837207e-03f + 1.184239e-02f * _Complex_I,
1039 -5.527367e-03f - 9.573681e-03f * _Complex_I, 4.045715e-03f + 7.007384e-03f * _Complex_I,
1040 -2.421407e-03f - 4.194000e-03f * _Complex_I, 1.268614e-03f + 2.197304e-03f * _Complex_I,
1041 -5.756945e-04f - 9.971322e-04f * _Complex_I },
1042 { -1.280260e-03f - 1.097506e-18f * _Complex_I, 1.987108e-03f + 1.460104e-18f * _Complex_I,
1043 -2.614019e-03f - 1.600625e-18f * _Complex_I, 3.635167e-03f + 1.780718e-18f * _Complex_I,
1044 -4.954867e-03f - 1.820389e-18f * _Complex_I, 6.653220e-03f + 1.629569e-18f * _Complex_I,
1045 -7.600546e-03f - 9.307984e-19f * _Complex_I, 6.653220e-03f + 0.000000e+00f * _Complex_I,
1046 -4.954867e-03f + 6.067962e-19f * _Complex_I, 3.635167e-03f - 8.903590e-19f * _Complex_I,
1047 -2.614019e-03f + 9.603749e-19f * _Complex_I, 1.987108e-03f - 9.734024e-19f * _Complex_I,
1048 -1.280260e-03f + 7.839331e-19f * _Complex_I },
1049 { -7.162255e-04f + 1.240539e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1050 -6.705589e-04f + 1.161442e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1051 -1.165433e-03f + 2.018589e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1052 -2.297663e-03f + 3.979669e-03f * _Complex_I, 1.948120e-03f - 3.374242e-03f * _Complex_I,
1053 -1.165433e-03f + 2.018589e-03f * _Complex_I, 6.187140e-04f - 1.071644e-03f * _Complex_I,
1054 -6.705589e-04f + 1.161442e-03f * _Complex_I, 8.961176e-04f - 1.552121e-03f * _Complex_I,
1055 -7.162255e-04f + 1.240539e-03f * _Complex_I },
1056 { 7.406884e-04f + 1.282910e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1057 7.186273e-04f + 1.244699e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1058 5.908383e-04f + 1.023362e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1059 1.432695e-03f + 2.481501e-03f * _Complex_I, -1.125190e-03f - 1.948886e-03f * _Complex_I,
1060 5.908383e-04f + 1.023362e-03f * _Complex_I, -4.025606e-04f - 6.972554e-04f * _Complex_I,
1061 7.186273e-04f + 1.244699e-03f * _Complex_I, -1.025411e-03f - 1.776065e-03f * _Complex_I,
1062 7.406884e-04f + 1.282910e-03f * _Complex_I },
1063 { 1.221201e-03f + 1.196432e-18f * _Complex_I, -1.773498e-03f - 1.520336e-18f * _Complex_I,
1064 1.246697e-03f + 9.160579e-19f * _Complex_I, -8.215306e-04f - 5.030424e-19f * _Complex_I,
1065 7.609372e-04f + 3.727517e-19f * _Complex_I, -4.863927e-04f - 1.786978e-19f * _Complex_I,
1066 4.882100e-04f + 1.195770e-19f * _Complex_I, -4.863927e-04f - 5.956592e-20f * _Complex_I,
1067 7.609372e-04f + 0.000000e+00f * _Complex_I, -8.215306e-04f + 1.006085e-19f * _Complex_I,
1068 1.246697e-03f - 3.053526e-19f * _Complex_I, -1.773498e-03f + 6.515727e-19f * _Complex_I,
1069 1.221201e-03f - 5.982162e-19f * _Complex_I } } };
1070
1071 const size_t buffer_size = (size_t)TS * TS * (ndir * 4 + 7) * sizeof(float);
1072 size_t padded_buffer_size;
1073 char *const all_buffers = (char *)dt_pixelpipe_cache_alloc_perthread(buffer_size, sizeof(char), &padded_buffer_size);
1074 if(!all_buffers)
1075 {
1076 fprintf(stderr, "[demosaic] not able to allocate FDC base buffers\n");
1077 return;
1078 }
1079
1080 /* Map a green hexagon around each non-green pixel and vice versa: */
1081 for(int row = 0; row < 3; row++)
1082 for(int col = 0; col < 3; col++)
1083 for(int ng = 0, d = 0; d < 10; d += 2)
1084 {
1085 int g = FCxtrans(row, col, NULL, xtrans) == 1;
1086 if(FCxtrans(row + orth[d], col + orth[d + 2], NULL, xtrans) == 1)
1087 ng = 0;
1088 else
1089 ng++;
1090 // if there are four non-green pixels adjacent in cardinal
1091 // directions, this is the solitary green pixel
1092 if(ng == 4)
1093 {
1094 sgrow = row;
1095 sgcol = col;
1096 }
1097 if(ng == g + 1)
1098 for(int c = 0; c < 8; c++)
1099 {
1100 int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1101 int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1102 // offset within TSxTS buffer
1103 allhex[row][col][c ^ (g * 2 & d)] = h + v * TS;
1104 }
1105 }
1106
1107 // extra passes propagates out errors at edges, hence need more padding
1108 const int pad_tile = 13;
1109
1110 // calculate offsets for this roi
1111 int rowoffset = 0;
1112 int coloffset = 0;
1113 for(int row = 0; row < 6; row++)
1114 {
1115 if(!((row - sgrow) % 3))
1116 {
1117 for(int col = 0; col < 6; col++)
1118 {
1119 if(!((col - sgcol) % 3) && (FCxtrans(row, col + 1, roi_in, xtrans) == 0))
1120 {
1121 rowoffset = 37 - row - pad_tile; // 1 plus a generous multiple of 6
1122 coloffset = 37 - col - pad_tile; // to avoid that this value gets negative
1123 break;
1124 }
1125 }
1126 break;
1127 }
1128 }
1129
1130 // depending on the iso, use either a hybrid approach for chroma, or pure fdc
1131 float hybrid_fdc[2] = { 1.0f, 0.0f };
1132 const int xover_iso = dt_conf_get_int("plugins/darkroom/demosaic/fdc_xover_iso");
1133 int iso = self->dev->image_storage.exif_iso;
1134 if(iso > xover_iso)
1135 {
1136 hybrid_fdc[0] = 0.0f;
1137 hybrid_fdc[1] = 1.0f;
1138 }
1139
1140#ifdef _OPENMP
1141#pragma omp parallel for default(none) \
1142 dt_omp_firstprivate(ndir, all_buffers, dir, directionality, harr, height, in, Minv, modarr, roi_in, width, \
1143 xtrans, pad_tile, padded_buffer_size) \
1144 shared(sgrow, sgcol, allhex, out, rowoffset, coloffset, hybrid_fdc) schedule(static)
1145#endif
1146 // step through TSxTS cells of image, each tile overlapping the
1147 // prior as interpolation needs a substantial border
1148 for(int top = -pad_tile; top < height - pad_tile; top += TS - (pad_tile * 2))
1149 {
1150 char *const buffer = dt_get_perthread(all_buffers, padded_buffer_size);
1151 // rgb points to ndir TSxTS tiles of 3 channels (R, G, and B)
1152 float(*rgb)[TS][TS][3] = (float(*)[TS][TS][3])buffer;
1153 // yuv points to 3 channel (Y, u, and v) TSxTS tiles
1154 // note that channels come before tiles to allow for a
1155 // vectorization optimization when building drv[] from yuv[]
1156 float (*const yuv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1157 // drv points to ndir TSxTS tiles, each a single channel of derivatives
1158 float (*const drv)[TS][TS] = (float(*)[TS][TS])(buffer + TS * TS * (ndir * 3 + 3) * sizeof(float));
1159 // gmin and gmax reuse memory which is used later by yuv buffer;
1160 // each points to a TSxTS tile of single channel data
1161 float (*const gmin)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1162 float (*const gmax)[TS] = (float(*)[TS])(buffer + TS * TS * (ndir * 3 + 1) * sizeof(float));
1163 // homo and homosum reuse memory which is used earlier in the
1164 // loop; each points to ndir single-channel TSxTS tiles
1165 uint8_t (*const homo)[TS][TS] = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float));
1166 uint8_t (*const homosum)[TS][TS]
1167 = (uint8_t(*)[TS][TS])(buffer + TS * TS * (ndir * 3) * sizeof(float) + TS * TS * ndir * sizeof(uint8_t));
1168 // append all fdc related buffers
1169 float complex *fdc_buf_start = (float complex *)(buffer + TS * TS * (ndir * 4 + 3) * sizeof(float));
1170 const int fdc_buf_size = TS * TS;
1171 float(*const i_src) = (float *)fdc_buf_start;
1172 float complex(*const o_src) = fdc_buf_start + fdc_buf_size;
1173 // by the time the chroma values are calculated, o_src can be overwritten.
1174 float(*const fdc_chroma) = (float *)o_src;
1175
1176 for(int left = -pad_tile; left < width - pad_tile; left += TS - (pad_tile * 2))
1177 {
1178 int mrow = MIN(top + TS, height + pad_tile);
1179 int mcol = MIN(left + TS, width + pad_tile);
1180
1181 // Copy current tile from in to image buffer. If border goes
1182 // beyond edges of image, fill with mirrored/interpolated edges.
1183 // The extra border avoids discontinuities at image edges.
1184 for(int row = top; row < mrow; row++)
1185 for(int col = left; col < mcol; col++)
1186 {
1187 float(*const pix) = rgb[0][row - top][col - left];
1188 if((col >= 0) && (row >= 0) && (col < width) && (row < height))
1189 {
1190 const int f = FCxtrans(row, col, roi_in, xtrans);
1191 for(int c = 0; c < 3; c++) pix[c] = (c == f) ? in[roi_in->width * row + col] : 0.f;
1192 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * row + col];
1193 }
1194 else
1195 {
1196 // mirror a border pixel if beyond image edge
1197 const int c = FCxtrans(row, col, roi_in, xtrans);
1198 for(int cc = 0; cc < 3; cc++)
1199 if(cc != c)
1200 pix[cc] = 0.0f;
1201 else
1202 {
1203#define TRANSLATE(n, size) ((n >= size) ? (2 * size - n - 2) : abs(n))
1204 const int cy = TRANSLATE(row, height), cx = TRANSLATE(col, width);
1205 if(c == FCxtrans(cy, cx, roi_in, xtrans))
1206 {
1207 pix[c] = in[roi_in->width * cy + cx];
1208 *(i_src + TS * (row - top) + (col - left)) = in[roi_in->width * cy + cx];
1209 }
1210 else
1211 {
1212 // interpolate if mirror pixel is a different color
1213 float sum = 0.0f;
1214 uint8_t count = 0;
1215 for(int y = row - 1; y <= row + 1; y++)
1216 for(int x = col - 1; x <= col + 1; x++)
1217 {
1218 const int yy = TRANSLATE(y, height), xx = TRANSLATE(x, width);
1219 const int ff = FCxtrans(yy, xx, roi_in, xtrans);
1220 if(ff == c)
1221 {
1222 sum += in[roi_in->width * yy + xx];
1223 count++;
1224 }
1225 }
1226 pix[c] = sum / count;
1227 *(i_src + TS * (row - top) + (col - left)) = pix[c];
1228 }
1229 }
1230 }
1231 }
1232
1233 // duplicate rgb[0] to rgb[1], rgb[2], and rgb[3]
1234 for(int c = 1; c <= 3; c++) memcpy(rgb[c], rgb[0], sizeof(*rgb));
1235
1236 // note that successive calculations are inset within the tile
1237 // so as to give enough border data, and there needs to be a 6
1238 // pixel border initially to allow allhex to find neighboring
1239 // pixels
1240
1241 /* Set green1 and green3 to the minimum and maximum allowed values: */
1242 // Run through each red/blue or blue/red pair, setting their g1
1243 // and g3 values to the min/max of green pixels surrounding the
1244 // pair. Use a 3 pixel border as gmin/gmax is used by
1245 // interpolate green which has a 3 pixel border.
1246 const int pad_g1_g3 = 3;
1247 for(int row = top + pad_g1_g3; row < mrow - pad_g1_g3; row++)
1248 {
1249 // setting max to 0.0f signifies that this is a new pair, which
1250 // requires a new min/max calculation of its neighboring greens
1251 float min = FLT_MAX, max = 0.0f;
1252 for(int col = left + pad_g1_g3; col < mcol - pad_g1_g3; col++)
1253 {
1254 // if in row of horizontal red & blue pairs (or processing
1255 // vertical red & blue pairs near image bottom), reset min/max
1256 // between each pair
1257 if(FCxtrans(row, col, roi_in, xtrans) == 1)
1258 {
1259 min = FLT_MAX, max = 0.0f;
1260 continue;
1261 }
1262 // if at start of red & blue pair, calculate min/max of green
1263 // pixels surrounding it; note that while normally using == to
1264 // compare floats is suspect, here the check is if 0.0f has
1265 // explicitly been assigned to max (which signifies a new
1266 // red/blue pair)
1267 if(max == 0.0f)
1268 {
1269 float (*const pix)[3] = &rgb[0][row - top][col - left];
1270 const short *const hex = hexmap(row, col, allhex);
1271 for(int c = 0; c < 6; c++)
1272 {
1273 const float val = pix[hex[c]][1];
1274 if(min > val) min = val;
1275 if(max < val) max = val;
1276 }
1277 }
1278 gmin[row - top][col - left] = min;
1279 gmax[row - top][col - left] = max;
1280 // handle vertical red/blue pairs
1281 switch((row - sgrow) % 3)
1282 {
1283 // hop down a row to second pixel in vertical pair
1284 case 1:
1285 if(row < mrow - 4) row++, col--;
1286 break;
1287 // then if not done with the row hop up and right to next
1288 // vertical red/blue pair, resetting min/max
1289 case 2:
1290 min = FLT_MAX, max = 0.0f;
1291 if((col += 2) < mcol - 4 && row > top + 3) row--;
1292 }
1293 }
1294 }
1295
1296 /* Interpolate green horizontally, vertically, and along both diagonals: */
1297 // need a 3 pixel border here as 3*hex[] can have a 3 unit offset
1298 const int pad_g_interp = 3;
1299 for(int row = top + pad_g_interp; row < mrow - pad_g_interp; row++)
1300 for(int col = left + pad_g_interp; col < mcol - pad_g_interp; col++)
1301 {
1302 float color[8];
1303 int f = FCxtrans(row, col, roi_in, xtrans);
1304 if(f == 1) continue;
1305 float (*const pix)[3] = &rgb[0][row - top][col - left];
1306 const short *const hex = hexmap(row, col, allhex);
1307 // TODO: these constants come from integer math constants in
1308 // dcraw -- calculate them instead from interpolation math
1309 color[0] = 0.6796875f * (pix[hex[1]][1] + pix[hex[0]][1])
1310 - 0.1796875f * (pix[2 * hex[1]][1] + pix[2 * hex[0]][1]);
1311 color[1] = 0.87109375f * pix[hex[3]][1] + pix[hex[2]][1] * 0.13f
1312 + 0.359375f * (pix[0][f] - pix[-hex[2]][f]);
1313 for(int c = 0; c < 2; c++)
1314 color[2 + c] = 0.640625f * pix[hex[4 + c]][1] + 0.359375f * pix[-2 * hex[4 + c]][1]
1315 + 0.12890625f * (2 * pix[0][f] - pix[3 * hex[4 + c]][f] - pix[-3 * hex[4 + c]][f]);
1316 for(int c = 0; c < 4; c++)
1317 rgb[c ^ !((row - sgrow) % 3)][row - top][col - left][1]
1318 = CLAMPS(color[c], gmin[row - top][col - left], gmax[row - top][col - left]);
1319 }
1320
1321 /* Interpolate red and blue values for solitary green pixels: */
1322 const int pad_rb_g = 6;
1323 for(int row = (top - sgrow + pad_rb_g + 2) / 3 * 3 + sgrow; row < mrow - pad_rb_g; row += 3)
1324 for(int col = (left - sgcol + pad_rb_g + 2) / 3 * 3 + sgcol; col < mcol - pad_rb_g; col += 3)
1325 {
1326 float(*rfx)[3] = &rgb[0][row - top][col - left];
1327 int h = FCxtrans(row, col + 1, roi_in, xtrans);
1328 float diff[6] = { 0.0f };
1329 float color[3][8];
1330 for(int i = 1, d = 0; d < 6; d++, i ^= TS ^ 1, h ^= 2)
1331 {
1332 for(int c = 0; c < 2; c++, h ^= 2)
1333 {
1334 float g = 2 * rfx[0][1] - rfx[i << c][1] - rfx[-(i << c)][1];
1335 color[h][d] = g + rfx[i << c][h] + rfx[-(i << c)][h];
1336 if(d > 1)
1337 diff[d] += SQR(rfx[i << c][1] - rfx[-(i << c)][1] - rfx[i << c][h] + rfx[-(i << c)][h]) + SQR(g);
1338 }
1339 if(d > 1 && (d & 1))
1340 if(diff[d - 1] < diff[d])
1341 for(int c = 0; c < 2; c++) color[c * 2][d] = color[c * 2][d - 1];
1342 if(d < 2 || (d & 1))
1343 {
1344 for(int c = 0; c < 2; c++) rfx[0][c * 2] = color[c * 2][d] / 2.f;
1345 rfx += TS * TS;
1346 }
1347 }
1348 }
1349
1350 /* Interpolate red for blue pixels and vice versa: */
1351 const int pad_rb_br = 6;
1352 for(int row = top + pad_rb_br; row < mrow - pad_rb_br; row++)
1353 for(int col = left + pad_rb_br; col < mcol - pad_rb_br; col++)
1354 {
1355 int f = 2 - FCxtrans(row, col, roi_in, xtrans);
1356 if(f == 1) continue;
1357 float(*rfx)[3] = &rgb[0][row - top][col - left];
1358 int c = (row - sgrow) % 3 ? TS : 1;
1359 int h = 3 * (c ^ TS ^ 1);
1360 for(int d = 0; d < 4; d++, rfx += TS * TS)
1361 {
1362 int i = d > 1 || ((d ^ c) & 1)
1363 || ((fabsf(rfx[0][1] - rfx[c][1]) + fabsf(rfx[0][1] - rfx[-c][1]))
1364 < 2.f * (fabsf(rfx[0][1] - rfx[h][1]) + fabsf(rfx[0][1] - rfx[-h][1]))) ? c : h;
1365 rfx[0][f] = (rfx[i][f] + rfx[-i][f] + 2.f * rfx[0][1] - rfx[i][1] - rfx[-i][1]) / 2.f;
1366 }
1367 }
1368
1369 /* Fill in red and blue for 2x2 blocks of green: */
1370 const int pad_g22 = 8;
1371 for(int row = top + pad_g22; row < mrow - pad_g22; row++)
1372 if((row - sgrow) % 3)
1373 for(int col = left + pad_g22; col < mcol - pad_g22; col++)
1374 if((col - sgcol) % 3)
1375 {
1376 float redblue[3][3];
1377 float(*rfx)[3] = &rgb[0][row - top][col - left];
1378 const short *const hex = hexmap(row, col, allhex);
1379 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1380 if(hex[d] + hex[d + 1])
1381 {
1382 float g = 3.f * rfx[0][1] - 2.f * rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1383 for(int c = 0; c < 4; c += 2)
1384 {
1385 rfx[0][c] = (g + 2.f * rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 3.f;
1386 redblue[d][c] = rfx[0][c];
1387 }
1388 }
1389 else
1390 {
1391 float g = 2.f * rfx[0][1] - rfx[hex[d]][1] - rfx[hex[d + 1]][1];
1392 for(int c = 0; c < 4; c += 2)
1393 {
1394 rfx[0][c] = (g + rfx[hex[d]][c] + rfx[hex[d + 1]][c]) / 2.f;
1395 redblue[d][c] = rfx[0][c];
1396 }
1397 }
1398 // to fill in red and blue also for diagonal directions
1399 for(int d = 0; d < ndir; d += 2, rfx += TS * TS)
1400 for(int c = 0; c < 4; c += 2) rfx[0][c] = (redblue[0][c] + redblue[2][c]) * 0.5f;
1401 }
1402
1403 // jump back to the first set of rgb buffers (this is a nop
1404 // unless on the second pass)
1405 rgb = (float(*)[TS][TS][3])buffer;
1406 // from here on out, mainly are working within the current tile
1407 // rather than in reference to the image, so don't offset
1408 // mrow/mcol by top/left of tile
1409 mrow -= top;
1410 mcol -= left;
1411
1412 /* Convert to perceptual colorspace and differentiate in all directions: */
1413 // Original dcraw algorithm uses CIELab as perceptual space
1414 // (presumably coming from original AHD) and converts taking
1415 // camera matrix into account. Now use YPbPr which requires much
1416 // less code and is nearly indistinguishable. It assumes the
1417 // camera RGB is roughly linear.
1418 for(int d = 0; d < ndir; d++)
1419 {
1420 const int pad_yuv = 8;
1421 for(int row = pad_yuv; row < mrow - pad_yuv; row++)
1422 for(int col = pad_yuv; col < mcol - pad_yuv; col++)
1423 {
1424 float *rx = rgb[d][row][col];
1425 // use ITU-R BT.2020 YPbPr, which is great, but could use
1426 // a better/simpler choice? note that imageop.h provides
1427 // dt_iop_RGB_to_YCbCr which uses Rec. 601 conversion,
1428 // which appears less good with specular highlights
1429 float y = 0.2627f * rx[0] + 0.6780f * rx[1] + 0.0593f * rx[2];
1430 yuv[0][row][col] = y;
1431 yuv[1][row][col] = (rx[2] - y) * 0.56433f;
1432 yuv[2][row][col] = (rx[0] - y) * 0.67815f;
1433 }
1434 // Note that f can offset by a column (-1 or +1) and by a row
1435 // (-TS or TS). The row-wise offsets cause the undefined
1436 // behavior sanitizer to warn of an out of bounds index, but
1437 // as yfx is multi-dimensional and there is sufficient
1438 // padding, that is not actually so.
1439 const int f = dir[d & 3];
1440 const int pad_drv = 9;
1441 for(int row = pad_drv; row < mrow - pad_drv; row++)
1442 for(int col = pad_drv; col < mcol - pad_drv; col++)
1443 {
1444 float(*yfx)[TS][TS] = (float(*)[TS][TS]) & yuv[0][row][col];
1445 drv[d][row][col] = SQR(2 * yfx[0][0][0] - yfx[0][0][f] - yfx[0][0][-f])
1446 + SQR(2 * yfx[1][0][0] - yfx[1][0][f] - yfx[1][0][-f])
1447 + SQR(2 * yfx[2][0][0] - yfx[2][0][f] - yfx[2][0][-f]);
1448 }
1449 }
1450
1451 /* Build homogeneity maps from the derivatives: */
1452 memset(homo, 0, sizeof(uint8_t) * ndir * TS * TS);
1453 const int pad_homo = 10;
1454 for(int row = pad_homo; row < mrow - pad_homo; row++)
1455 for(int col = pad_homo; col < mcol - pad_homo; col++)
1456 {
1457 float tr = FLT_MAX;
1458 for(int d = 0; d < ndir; d++)
1459 if(tr > drv[d][row][col]) tr = drv[d][row][col];
1460 tr *= 8;
1461 for(int d = 0; d < ndir; d++)
1462 for(int v = -1; v <= 1; v++)
1463 for(int h = -1; h <= 1; h++) homo[d][row][col] += ((drv[d][row + v][col + h] <= tr) ? 1 : 0);
1464 }
1465
1466 /* Build 5x5 sum of homogeneity maps for each pixel & direction */
1467 for(int d = 0; d < ndir; d++)
1468 for(int row = pad_tile; row < mrow - pad_tile; row++)
1469 {
1470 // start before first column where homo[d][row][col+2] != 0,
1471 // so can know v5sum and homosum[d][row][col] will be 0
1472 int col = pad_tile - 5;
1473 uint8_t v5sum[5] = { 0 };
1474 homosum[d][row][col] = 0;
1475 // calculate by rolling through column sums
1476 for(col++; col < mcol - pad_tile; col++)
1477 {
1478 uint8_t colsum = 0;
1479 for(int v = -2; v <= 2; v++) colsum += homo[d][row + v][col + 2];
1480 homosum[d][row][col] = homosum[d][row][col - 1] - v5sum[col % 5] + colsum;
1481 v5sum[col % 5] = colsum;
1482 }
1483 }
1484
1485 /* Calculate chroma values in fdc: */
1486 const int pad_fdc = 6;
1487 for(int row = pad_fdc; row < mrow - pad_fdc; row++)
1488 for(int col = pad_fdc; col < mcol - pad_fdc; col++)
1489 {
1490 int myrow, mycol;
1491 uint8_t hm[8] = { 0 };
1492 uint8_t maxval = 0;
1493 for(int d = 0; d < ndir; d++)
1494 {
1495 hm[d] = homosum[d][row][col];
1496 maxval = (maxval < hm[d] ? hm[d] : maxval);
1497 }
1498 maxval -= maxval >> 3;
1499 float dircount = 0;
1500 float dirsum = 0.f;
1501 for(int d = 0; d < ndir; d++)
1502 if(hm[d] >= maxval)
1503 {
1504 dircount++;
1505 dirsum += directionality[d];
1506 }
1507 float w = dirsum / (float)dircount;
1508 int fdc_row, fdc_col;
1509 float complex C2m, C5m, C7m, C10m;
1510#define CONV_FILT(VAR, FILT) \
1511 VAR = 0.0f + 0.0f * _Complex_I; \
1512 for(fdc_row = 0, myrow = row - 6; fdc_row < 13; fdc_row++, myrow++) \
1513 for(fdc_col = 0, mycol = col - 6; fdc_col < 13; fdc_col++, mycol++) \
1514 VAR += FILT[12 - fdc_row][12 - fdc_col] * *(i_src + TS * myrow + mycol);
1515 CONV_FILT(C2m, harr[0])
1516 CONV_FILT(C5m, harr[1])
1517 CONV_FILT(C7m, harr[2])
1518 CONV_FILT(C10m, harr[3])
1519#undef CONV_FILT
1520 // build the q vector components
1521 myrow = (row + rowoffset) % 6;
1522 mycol = (col + coloffset) % 6;
1523 float complex modulator[8];
1524 for(int c = 0; c < 8; c++) modulator[c] = modarr[myrow][mycol][c];
1525 float complex qmat[8];
1526 qmat[4] = w * C10m * modulator[0] - (1.0f - w) * C2m * modulator[1];
1527 qmat[6] = conjf(qmat[4]);
1528 qmat[1] = C5m * modulator[6];
1529 qmat[2] = conjf(-0.5f * qmat[1]);
1530 qmat[5] = conjf(qmat[2]);
1531 qmat[3] = C7m * modulator[7];
1532 qmat[7] = conjf(qmat[1]);
1533 // get L
1534 C2m = qmat[4] * (conjf(modulator[0]) - conjf(modulator[1]));
1535 float complex C3m = qmat[6] * (modulator[2] - modulator[3]);
1536 float complex C6m = qmat[2] * (conjf(modulator[4]) + conjf(modulator[5]));
1537 float complex C12m = qmat[5] * (modulator[4] + modulator[5]);
1538 float complex C18m = qmat[7] * modulator[6];
1539 qmat[0] = *(i_src + row * TS + col) - C2m - C3m - C5m - C6m - 2.0f * C7m - C12m - C18m;
1540 // get the rgb components from fdc
1541 dt_aligned_pixel_t rgbpix = { 0.f, 0.f, 0.f };
1542 // multiply with the inverse matrix of M
1543 for(int color = 0; color < 3; color++)
1544 for(int c = 0; c < 8; c++)
1545 {
1546 rgbpix[color] += Minv[color][c] * qmat[c];
1547 }
1548 // now separate luma and chroma for
1549 // frequency domain chroma
1550 // and store it in fdc_chroma
1551 float uv[2];
1552 float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1553 uv[0] = (rgbpix[2] - y) * 0.56433f;
1554 uv[1] = (rgbpix[0] - y) * 0.67815f;
1555 for(int c = 0; c < 2; c++) *(fdc_chroma + c * TS * TS + row * TS + col) = uv[c];
1556 }
1557
1558 /* Average the most homogeneous pixels for the final result: */
1559 for(int row = pad_tile; row < mrow - pad_tile; row++)
1560 for(int col = pad_tile; col < mcol - pad_tile; col++)
1561 {
1562 uint8_t hm[8] = { 0 };
1563 uint8_t maxval = 0;
1564 for(int d = 0; d < ndir; d++)
1565 {
1566 hm[d] = homosum[d][row][col];
1567 maxval = (maxval < hm[d] ? hm[d] : maxval);
1568 }
1569 maxval -= maxval >> 3;
1570 for(int d = 0; d < ndir - 4; d++)
1571 {
1572 if(hm[d] < hm[d + 4])
1573 hm[d] = 0;
1574 else if(hm[d] > hm[d + 4])
1575 hm[d + 4] = 0;
1576 }
1577
1578 dt_aligned_pixel_t avg = { 0.f };
1579 for(int d = 0; d < ndir; d++)
1580 {
1581 if(hm[d] >= maxval)
1582 {
1583 for(int c = 0; c < 3; c++) avg[c] += rgb[d][row][col][c];
1584 avg[3]++;
1585 }
1586 }
1587 dt_aligned_pixel_t rgbpix;
1588 for(int c = 0; c < 3; c++) rgbpix[c] = avg[c] / avg[3];
1589 // preserve all components of Markesteijn for this pixel
1590 const float y = 0.2627f * rgbpix[0] + 0.6780f * rgbpix[1] + 0.0593f * rgbpix[2];
1591 const float um = (rgbpix[2] - y) * 0.56433f;
1592 const float vm = (rgbpix[0] - y) * 0.67815f;
1593 float uvf[2];
1594 // macros for fast meadian filtering
1595#define PIX_SWAP(a, b) \
1596 { \
1597 tempf = (a); \
1598 (a) = (b); \
1599 (b) = tempf; \
1600 }
1601#define PIX_SORT(a, b) \
1602 { \
1603 if((a) > (b)) PIX_SWAP((a), (b)); \
1604 }
1605 // instead of merely reading the values, perform 5 pixel median filter
1606 // one median filter is required to avoid textile artifacts
1607 for(int chrm = 0; chrm < 2; chrm++)
1608 {
1609 float temp[5];
1610 float tempf;
1611 // load the window into temp
1612 memcpy(&temp[0], fdc_chroma + chrm * TS * TS + (row - 1) * TS + (col), sizeof(float) * 1);
1613 memcpy(&temp[1], fdc_chroma + chrm * TS * TS + (row)*TS + (col - 1), sizeof(float) * 3);
1614 memcpy(&temp[4], fdc_chroma + chrm * TS * TS + (row + 1) * TS + (col), sizeof(float) * 1);
1615 PIX_SORT(temp[0], temp[1]);
1616 PIX_SORT(temp[3], temp[4]);
1617 PIX_SORT(temp[0], temp[3]);
1618 PIX_SORT(temp[1], temp[4]);
1619 PIX_SORT(temp[1], temp[2]);
1620 PIX_SORT(temp[2], temp[3]);
1621 PIX_SORT(temp[1], temp[2]);
1622 uvf[chrm] = temp[2];
1623 }
1624 // use hybrid or pure fdc, depending on what was set above.
1625 // in case of hybrid, use the chroma that has the smallest
1626 // absolute value
1627 float uv[2];
1628 uv[0] = (((ABS(uvf[0]) < ABS(um)) & (ABS(uvf[1]) < (1.02f * ABS(vm)))) ? uvf[0] : um) * hybrid_fdc[0] + uvf[0] * hybrid_fdc[1];
1629 uv[1] = (((ABS(uvf[1]) < ABS(vm)) & (ABS(uvf[0]) < (1.02f * ABS(vm)))) ? uvf[1] : vm) * hybrid_fdc[0] + uvf[1] * hybrid_fdc[1];
1630 // combine the luma from Markesteijn with the chroma from above
1631 rgbpix[0] = y + 1.474600014746f * uv[1];
1632 rgbpix[1] = y - 0.15498578286403f * uv[0] - 0.571353132557189f * uv[1];
1633 rgbpix[2] = y + 1.77201282937288f * uv[0];
1634 for(int c = 0; c < 3; c++) out[4 * (width * (row + top) + col + left) + c] = rgbpix[c];
1635 }
1636 }
1637 }
1638 dt_pixelpipe_cache_free_align(all_buffers);
1639}
1640
1641#ifdef HAVE_OPENCL
1642
1643static int process_markesteijn_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe,
1644 const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
1645 const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
1646 const gboolean smooth)
1647{
1650
1651 const int devid = pipe->devid;
1652 const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->dsc_in.xtrans;
1653
1654 const float processed_maximum[4]
1655 = { piece->dsc_in.processed_maximum[0], piece->dsc_in.processed_maximum[1],
1656 piece->dsc_in.processed_maximum[2], 1.0f };
1657
1658 cl_mem dev_tmp = NULL;
1659 cl_mem dev_tmptmp = NULL;
1660 cl_mem dev_xtrans = NULL;
1661 cl_mem dev_green_eq = NULL;
1662 cl_mem dev_rgbv[8] = { NULL };
1663 cl_mem dev_drv[8] = { NULL };
1664 cl_mem dev_homo[8] = { NULL };
1665 cl_mem dev_homosum[8] = { NULL };
1666 cl_mem dev_gminmax = NULL;
1667 cl_mem dev_allhex = NULL;
1668 cl_mem dev_aux = NULL;
1669 cl_mem dev_edge_in = NULL;
1670 cl_mem dev_edge_out = NULL;
1671 cl_int err = -999;
1672
1673 cl_mem *dev_rgb = dev_rgbv;
1674
1675 dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->dsc_in.xtrans), (void *)piece->dsc_in.xtrans);
1676 if(dev_xtrans == NULL) goto error;
1677
1678 int width = roi_in->width;
1679 int height = roi_in->height;
1680 const int passes = ((data->demosaicing_method & ~DEMOSAIC_DUAL) == DT_IOP_DEMOSAIC_MARKESTEIJN_3) ? 3 : 1;
1681 const int ndir = 4 << (passes > 1);
1682 const int pad_tile = (passes == 1) ? 12 : 17;
1683
1684 static const short orth[12] = { 1, 0, 0, 1, -1, 0, 0, -1, 1, 0, 0, 1 },
1685 patt[2][16] = { { 0, 1, 0, -1, 2, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, 0 },
1686 { 0, 1, 0, -2, 1, 0, -2, 0, 1, 1, -2, -2, 1, -1, -1, 1 } };
1687
1688 // allhex contains the offset coordinates (x,y) of a green hexagon around each
1689 // non-green pixel and vice versa
1690 char allhex[3][3][8][2];
1691 // sgreen is the offset in the sensor matrix of the solitary
1692 // green pixels (initialized here only to avoid compiler warning)
1693 char sgreen[2] = { 0 };
1694
1695 // Map a green hexagon around each non-green pixel and vice versa:
1696 for(int row = 0; row < 3; row++)
1697 for(int col = 0; col < 3; col++)
1698 for(int ng = 0, d = 0; d < 10; d += 2)
1699 {
1700 const int g = FCxtrans(row, col, NULL, xtrans) == 1;
1701 if(FCxtrans(row + orth[d] + 6, col + orth[d + 2] + 6, NULL, xtrans) == 1)
1702 ng = 0;
1703 else
1704 ng++;
1705 // if there are four non-green pixels adjacent in cardinal
1706 // directions, this is the solitary green pixel
1707 if(ng == 4)
1708 {
1709 sgreen[0] = col;
1710 sgreen[1] = row;
1711 }
1712 if(ng == g + 1)
1713 for(int c = 0; c < 8; c++)
1714 {
1715 const int v = orth[d] * patt[g][c * 2] + orth[d + 1] * patt[g][c * 2 + 1];
1716 const int h = orth[d + 2] * patt[g][c * 2] + orth[d + 3] * patt[g][c * 2 + 1];
1717
1718 allhex[row][col][c ^ (g * 2 & d)][0] = h;
1719 allhex[row][col][c ^ (g * 2 & d)][1] = v;
1720 }
1721 }
1722
1723 dev_allhex = dt_opencl_copy_host_to_device_constant(devid, sizeof(allhex), allhex);
1724 if(dev_allhex == NULL) goto error;
1725
1726 for(int n = 0; n < ndir; n++)
1727 {
1728 dev_rgbv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1729 if(dev_rgbv[n] == NULL) goto error;
1730 }
1731
1732 dev_gminmax = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 2 * width * height);
1733 if(dev_gminmax == NULL) goto error;
1734
1735 dev_aux = dt_opencl_alloc_device_buffer(devid, sizeof(float) * 4 * width * height);
1736 if(dev_aux == NULL) goto error;
1737
1738 dev_tmp = dev_out;
1739
1740 {
1741 // copy from dev_in to first rgb image buffer.
1742 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1743 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 0, sizeof(cl_mem), (void *)&dev_in);
1744 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 1, sizeof(cl_mem), (void *)&dev_rgb[0]);
1745 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 2, sizeof(int), (void *)&width);
1746 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 3, sizeof(int), (void *)&height);
1747 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 4, sizeof(int), (void *)&roi_in->x);
1748 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 5, sizeof(int), (void *)&roi_in->y);
1749 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_initial_copy, 6, sizeof(cl_mem), (void *)&dev_xtrans);
1750 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_initial_copy, sizes);
1751 if(err != CL_SUCCESS) goto error;
1752 }
1753
1754
1755 // duplicate dev_rgb[0] to dev_rgb[1], dev_rgb[2], and dev_rgb[3]
1756 for(int c = 1; c <= 3; c++)
1757 {
1758 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[0], dev_rgb[c], 0, 0,
1759 sizeof(float) * 4 * width * height);
1760 if(err != CL_SUCCESS) goto error;
1761 }
1762
1763 // find minimum and maximum allowed green values of red/blue pixel pairs
1764 const int pad_g1_g3 = 3;
1765 dt_opencl_local_buffer_t locopt_g1_g3
1766 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1767 .cellsize = 1 * sizeof(float), .overhead = 0,
1768 .sizex = 1 << 8, .sizey = 1 << 8 };
1769
1770 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_green_minmax, &locopt_g1_g3))
1771 goto error;
1772
1773 {
1774 size_t sizes[3] = { ROUNDUP(width, locopt_g1_g3.sizex), ROUNDUP(height, locopt_g1_g3.sizey), 1 };
1775 size_t local[3] = { locopt_g1_g3.sizex, locopt_g1_g3.sizey, 1 };
1776 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1777 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 1, sizeof(cl_mem), (void *)&dev_gminmax);
1778 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 2, sizeof(int), (void *)&width);
1779 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 3, sizeof(int), (void *)&height);
1780 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 4, sizeof(int), (void *)&pad_g1_g3);
1781 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 5, sizeof(int), (void *)&roi_in->x);
1782 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 6, sizeof(int), (void *)&roi_in->y);
1783 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 7, 2 * sizeof(char), (void *)sgreen);
1784 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1785 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 9, sizeof(cl_mem), (void *)&dev_allhex);
1786 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_green_minmax, 10,
1787 sizeof(float) * (locopt_g1_g3.sizex + 2*3) * (locopt_g1_g3.sizey + 2*3), NULL);
1788 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_green_minmax, sizes, local);
1789 if(err != CL_SUCCESS) goto error;
1790 }
1791
1792 // interpolate green horizontally, vertically, and along both diagonals
1793 const int pad_g_interp = 3;
1794 dt_opencl_local_buffer_t locopt_g_interp
1795 = (dt_opencl_local_buffer_t){ .xoffset = 2*6, .xfactor = 1, .yoffset = 2*6, .yfactor = 1,
1796 .cellsize = 4 * sizeof(float), .overhead = 0,
1797 .sizex = 1 << 8, .sizey = 1 << 8 };
1798
1799 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_interpolate_green, &locopt_g_interp))
1800 goto error;
1801
1802 {
1803 size_t sizes[3] = { ROUNDUP(width, locopt_g_interp.sizex), ROUNDUP(height, locopt_g_interp.sizey), 1 };
1804 size_t local[3] = { locopt_g_interp.sizex, locopt_g_interp.sizey, 1 };
1805 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1806 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1807 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1808 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1809 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1810 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 5, sizeof(int), (void *)&width);
1811 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 6, sizeof(int), (void *)&height);
1812 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 7, sizeof(int), (void *)&pad_g_interp);
1813 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 8, sizeof(int), (void *)&roi_in->x);
1814 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 9, sizeof(int), (void *)&roi_in->y);
1815 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 10, 2 * sizeof(char), (void *)sgreen);
1816 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1817 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1818 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_green, 13,
1819 sizeof(float) * 4 * (locopt_g_interp.sizex + 2*6) * (locopt_g_interp.sizey + 2*6), NULL);
1820 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_interpolate_green, sizes, local);
1821 if(err != CL_SUCCESS) goto error;
1822 }
1823
1824 // multi-pass loop: one pass for Markesteijn-1 and three passes for Markesteijn-3
1825 for(int pass = 0; pass < passes; pass++)
1826 {
1827
1828 // if on second pass, copy rgb[0] to [3] into rgb[4] to [7] ....
1829 if(pass == 1)
1830 {
1831 for(int c = 0; c < 4; c++)
1832 {
1833 err = dt_opencl_enqueue_copy_buffer_to_buffer(devid, dev_rgb[c], dev_rgb[c + 4], 0, 0,
1834 sizeof(float) * 4 * width * height);
1835 if(err != CL_SUCCESS) goto error;
1836 }
1837 // ... and process that second set of buffers
1838 dev_rgb += 4;
1839 }
1840
1841 // second and third pass (only Markesteijn-3)
1842 if(pass)
1843 {
1844 // recalculate green from interpolated values of closer pixels
1845 const int pad_g_recalc = 6;
1846 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1847 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 0, sizeof(cl_mem), (void *)&dev_rgb[0]);
1848 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 1, sizeof(cl_mem), (void *)&dev_rgb[1]);
1849 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 2, sizeof(cl_mem), (void *)&dev_rgb[2]);
1850 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 3, sizeof(cl_mem), (void *)&dev_rgb[3]);
1851 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 4, sizeof(cl_mem), (void *)&dev_gminmax);
1852 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 5, sizeof(int), (void *)&width);
1853 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 6, sizeof(int), (void *)&height);
1854 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 7, sizeof(int), (void *)&pad_g_recalc);
1855 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 8, sizeof(int), (void *)&roi_in->x);
1856 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 9, sizeof(int), (void *)&roi_in->y);
1857 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 10, 2 * sizeof(char), (void *)sgreen);
1858 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1859 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_recalculate_green, 12, sizeof(cl_mem), (void *)&dev_allhex);
1860 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_recalculate_green, sizes);
1861 if(err != CL_SUCCESS) goto error;
1862 }
1863
1864 // interpolate red and blue values for solitary green pixels
1865 const int pad_rb_g = (passes == 1) ? 6 : 5;
1866 dt_opencl_local_buffer_t locopt_rb_g
1867 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1868 .cellsize = 4 * sizeof(float), .overhead = 0,
1869 .sizex = 1 << 8, .sizey = 1 << 8 };
1870
1871 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_solitary_green, &locopt_rb_g))
1872 goto error;
1873
1874 cl_mem *dev_trgb = dev_rgb;
1875 for(int d = 0, i = 1, h = 0; d < 6; d++, i ^= 1, h ^= 2)
1876 {
1877 const char dir[2] = { i, i ^ 1 };
1878
1879 // we use dev_aux to transport intermediate results from one loop run to the next
1880 size_t sizes[3] = { ROUNDUP(width, locopt_rb_g.sizex), ROUNDUP(height, locopt_rb_g.sizey), 1 };
1881 size_t local[3] = { locopt_rb_g.sizex, locopt_rb_g.sizey, 1 };
1882 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 0, sizeof(cl_mem), (void *)&dev_trgb[0]);
1883 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 1, sizeof(cl_mem), (void *)&dev_aux);
1884 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 2, sizeof(int), (void *)&width);
1885 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 3, sizeof(int), (void *)&height);
1886 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 4, sizeof(int), (void *)&pad_rb_g);
1887 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 5, sizeof(int), (void *)&roi_in->x);
1888 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 6, sizeof(int), (void *)&roi_in->y);
1889 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 7, sizeof(int), (void *)&d);
1890 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 8, 2 * sizeof(char), (void *)dir);
1891 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 9, sizeof(int), (void *)&h);
1892 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 10, 2 * sizeof(char), (void *)sgreen);
1893 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 11, sizeof(cl_mem), (void *)&dev_xtrans);
1894 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_solitary_green, 12,
1895 sizeof(float) * 4 * (locopt_rb_g.sizex + 2*2) * (locopt_rb_g.sizey + 2*2), NULL);
1896 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_solitary_green, sizes, local);
1897 if(err != CL_SUCCESS) goto error;
1898
1899 if((d < 2) || (d & 1)) dev_trgb++;
1900 }
1901
1902 // interpolate red for blue pixels and vice versa
1903 const int pad_rb_br = (passes == 1) ? 6 : 5;
1904 dt_opencl_local_buffer_t locopt_rb_br
1905 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
1906 .cellsize = 4 * sizeof(float), .overhead = 0,
1907 .sizex = 1 << 8, .sizey = 1 << 8 };
1908
1909 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_red_and_blue, &locopt_rb_br))
1910 goto error;
1911
1912 for(int d = 0; d < 4; d++)
1913 {
1914 size_t sizes[3] = { ROUNDUP(width, locopt_rb_br.sizex), ROUNDUP(height, locopt_rb_br.sizey), 1 };
1915 size_t local[3] = { locopt_rb_br.sizex, locopt_rb_br.sizey, 1 };
1916 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1917 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 1, sizeof(int), (void *)&width);
1918 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 2, sizeof(int), (void *)&height);
1919 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 3, sizeof(int), (void *)&pad_rb_br);
1920 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 4, sizeof(int), (void *)&roi_in->x);
1921 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 5, sizeof(int), (void *)&roi_in->y);
1922 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 6, sizeof(int), (void *)&d);
1923 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 7, 2 * sizeof(char), (void *)sgreen);
1924 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1925 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_red_and_blue, 9,
1926 sizeof(float) * 4 * (locopt_rb_br.sizex + 2*3) * (locopt_rb_br.sizey + 2*3), NULL);
1927 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_red_and_blue, sizes, local);
1928 if(err != CL_SUCCESS) goto error;
1929 }
1930
1931 // interpolate red and blue for 2x2 blocks of green
1932 const int pad_g22 = (passes == 1) ? 8 : 4;
1933 dt_opencl_local_buffer_t locopt_g22
1934 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
1935 .cellsize = 4 * sizeof(float), .overhead = 0,
1936 .sizex = 1 << 8, .sizey = 1 << 8 };
1937
1938 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_interpolate_twoxtwo, &locopt_g22))
1939 goto error;
1940
1941 for(int d = 0, n = 0; d < ndir; d += 2, n++)
1942 {
1943 size_t sizes[3] = { ROUNDUP(width, locopt_g22.sizex), ROUNDUP(height, locopt_g22.sizey), 1 };
1944 size_t local[3] = { locopt_g22.sizex, locopt_g22.sizey, 1 };
1945 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 0, sizeof(cl_mem), (void *)&dev_rgb[n]);
1946 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 1, sizeof(int), (void *)&width);
1947 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 2, sizeof(int), (void *)&height);
1948 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 3, sizeof(int), (void *)&pad_g22);
1949 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 4, sizeof(int), (void *)&roi_in->x);
1950 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 5, sizeof(int), (void *)&roi_in->y);
1951 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 6, sizeof(int), (void *)&d);
1952 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 7, 2 * sizeof(char), (void *)sgreen);
1953 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 8, sizeof(cl_mem), (void *)&dev_xtrans);
1954 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 9, sizeof(cl_mem), (void *)&dev_allhex);
1955 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_interpolate_twoxtwo, 10,
1956 sizeof(float) * 4 * (locopt_g22.sizex + 2*2) * (locopt_g22.sizey + 2*2), NULL);
1957 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_interpolate_twoxtwo, sizes, local);
1958 if(err != CL_SUCCESS) goto error;
1959 }
1960 }
1961 // end of multi pass
1962
1963 // gminmax data no longer needed
1964 dt_opencl_release_mem_object(dev_gminmax);
1965 dev_gminmax = NULL;
1966
1967 // jump back to the first set of rgb buffers (this is a noop for Markesteijn-1)
1968 dev_rgb = dev_rgbv;
1969
1970 // prepare derivatives buffers
1971 for(int n = 0; n < ndir; n++)
1972 {
1973 dev_drv[n] = dt_opencl_alloc_device_buffer(devid, sizeof(float) * width * height);
1974 if(dev_drv[n] == NULL) goto error;
1975 }
1976
1977 // convert to perceptual colorspace and differentiate in all directions
1978 const int pad_yuv = (passes == 1) ? 8 : 13;
1979 dt_opencl_local_buffer_t locopt_diff
1980 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
1981 .cellsize = 4 * sizeof(float), .overhead = 0,
1982 .sizex = 1 << 8, .sizey = 1 << 8 };
1983
1984 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_differentiate, &locopt_diff))
1985 goto error;
1986
1987 for(int d = 0; d < ndir; d++)
1988 {
1989 // convert to perceptual YPbPr colorspace
1990 size_t sizes_yuv[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
1991 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 0, sizeof(cl_mem), (void *)&dev_rgb[d]);
1992 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 1, sizeof(cl_mem), (void *)&dev_aux);
1993 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 2, sizeof(int), (void *)&width);
1994 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 3, sizeof(int), (void *)&height);
1995 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_convert_yuv, 4, sizeof(int), (void *)&pad_yuv);
1996 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_convert_yuv, sizes_yuv);
1997 if(err != CL_SUCCESS) goto error;
1998
1999
2000 // differentiate in all directions
2001 size_t sizes_diff[3] = { ROUNDUP(width, locopt_diff.sizex), ROUNDUP(height, locopt_diff.sizey), 1 };
2002 size_t local_diff[3] = { locopt_diff.sizex, locopt_diff.sizey, 1 };
2003 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 0, sizeof(cl_mem), (void *)&dev_aux);
2004 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 1, sizeof(cl_mem), (void *)&dev_drv[d]);
2005 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 2, sizeof(int), (void *)&width);
2006 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 3, sizeof(int), (void *)&height);
2007 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 4, sizeof(int), (void *)&pad_yuv);
2008 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 5, sizeof(int), (void *)&d);
2009 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_differentiate, 6,
2010 sizeof(float) * 4 * (locopt_diff.sizex + 2*1) * (locopt_diff.sizey + 2*1), NULL);
2011 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_differentiate, sizes_diff, local_diff);
2012 if(err != CL_SUCCESS) goto error;
2013 }
2014
2015 // reserve buffers for homogeneity maps and sum maps
2016 for(int n = 0; n < ndir; n++)
2017 {
2018 dev_homo[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2019 if(dev_homo[n] == NULL) goto error;
2020
2021 dev_homosum[n] = dt_opencl_alloc_device_buffer(devid, sizeof(unsigned char) * width * height);
2022 if(dev_homosum[n] == NULL) goto error;
2023 }
2024
2025 // get thresholds for homogeneity map (store them in dev_aux)
2026 for(int d = 0; d < ndir; d++)
2027 {
2028 const int pad_homo = (passes == 1) ? 10 : 15;
2029 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2030 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2031 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 1, sizeof(cl_mem), (void *)&dev_aux);
2032 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 2, sizeof(int), (void *)&width);
2033 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 3, sizeof(int), (void *)&height);
2034 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 4, sizeof(int), (void *)&pad_homo);
2035 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_threshold, 5, sizeof(int), (void *)&d);
2036 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_threshold, sizes);
2037 if(err != CL_SUCCESS) goto error;
2038 }
2039
2040 // set homogeneity maps
2041 const int pad_homo = (passes == 1) ? 10 : 15;
2042 dt_opencl_local_buffer_t locopt_homo
2043 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
2044 .cellsize = 1 * sizeof(float), .overhead = 0,
2045 .sizex = 1 << 8, .sizey = 1 << 8 };
2046
2047 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_set, &locopt_homo))
2048 goto error;
2049
2050 for(int d = 0; d < ndir; d++)
2051 {
2052 size_t sizes[3] = { ROUNDUP(width, locopt_homo.sizex),ROUNDUP(height, locopt_homo.sizey), 1 };
2053 size_t local[3] = { locopt_homo.sizex, locopt_homo.sizey, 1 };
2054 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 0, sizeof(cl_mem), (void *)&dev_drv[d]);
2055 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 1, sizeof(cl_mem), (void *)&dev_aux);
2056 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 2, sizeof(cl_mem), (void *)&dev_homo[d]);
2057 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 3, sizeof(int), (void *)&width);
2058 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 4, sizeof(int), (void *)&height);
2059 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 5, sizeof(int), (void *)&pad_homo);
2060 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_set, 6,
2061 sizeof(float) * (locopt_homo.sizex + 2*1) * (locopt_homo.sizey + 2*1), NULL);
2062 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_homo_set, sizes, local);
2063 if(err != CL_SUCCESS) goto error;
2064 }
2065
2066 // get rid of dev_drv buffers
2067 for(int n = 0; n < 8; n++)
2068 {
2070 dev_drv[n] = NULL;
2071 }
2072
2073 // build 5x5 sum of homogeneity maps for each pixel and direction
2074 dt_opencl_local_buffer_t locopt_homo_sum
2075 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
2076 .cellsize = 1 * sizeof(float), .overhead = 0,
2077 .sizex = 1 << 8, .sizey = 1 << 8 };
2078
2079 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_markesteijn_homo_sum, &locopt_homo_sum))
2080 goto error;
2081
2082 for(int d = 0; d < ndir; d++)
2083 {
2084 size_t sizes[3] = { ROUNDUP(width, locopt_homo_sum.sizex), ROUNDUP(height, locopt_homo_sum.sizey), 1 };
2085 size_t local[3] = { locopt_homo_sum.sizex, locopt_homo_sum.sizey, 1 };
2086 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 0, sizeof(cl_mem), (void *)&dev_homo[d]);
2087 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 1, sizeof(cl_mem), (void *)&dev_homosum[d]);
2088 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 2, sizeof(int), (void *)&width);
2089 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 3, sizeof(int), (void *)&height);
2090 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 4, sizeof(int), (void *)&pad_tile);
2091 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_sum, 5,
2092 sizeof(char) * (locopt_homo_sum.sizex + 2*2) * (locopt_homo_sum.sizey + 2*2), NULL);
2093 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_markesteijn_homo_sum, sizes, local);
2094 if(err != CL_SUCCESS) goto error;
2095 }
2096
2097 // get maximum of homogeneity maps (store in dev_aux)
2098 for(int d = 0; d < ndir; d++)
2099 {
2100 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2101 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2102 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 1, sizeof(cl_mem), (void *)&dev_aux);
2103 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 2, sizeof(int), (void *)&width);
2104 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 3, sizeof(int), (void *)&height);
2105 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 4, sizeof(int), (void *)&pad_tile);
2106 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max, 5, sizeof(int), (void *)&d);
2107 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_max, sizes);
2108 if(err != CL_SUCCESS) goto error;
2109 }
2110
2111 {
2112 // adjust maximum value
2113 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2114 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 0, sizeof(cl_mem), (void *)&dev_aux);
2115 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 1, sizeof(int), (void *)&width);
2116 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 2, sizeof(int), (void *)&height);
2117 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_max_corr, 3, sizeof(int), (void *)&pad_tile);
2118 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_max_corr, sizes);
2119 if(err != CL_SUCCESS) goto error;
2120 }
2121
2122 // for Markesteijn-3: use only one of two directions if there is a difference in homogeneity
2123 for(int d = 0; d < ndir - 4; d++)
2124 {
2125 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2126 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 0, sizeof(cl_mem), (void *)&dev_homosum[d]);
2127 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 1, sizeof(cl_mem), (void *)&dev_homosum[d + 4]);
2128 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 2, sizeof(int), (void *)&width);
2129 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 3, sizeof(int), (void *)&height);
2130 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_homo_quench, 4, sizeof(int), (void *)&pad_tile);
2131 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_homo_quench, sizes);
2132 if(err != CL_SUCCESS) goto error;
2133 }
2134
2135 {
2136 // initialize output buffer to zero
2137 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2138 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 0, sizeof(cl_mem), (void *)&dev_tmp);
2139 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 1, sizeof(int), (void *)&width);
2140 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 2, sizeof(int), (void *)&height);
2141 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_zero, 3, sizeof(int), (void *)&pad_tile);
2142 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_zero, sizes);
2143 if(err != CL_SUCCESS) goto error;
2144 }
2145
2146 // need to get another temp buffer for the output image (may use the space of dev_drv[] freed earlier)
2147 dev_tmptmp = dt_opencl_alloc_device(devid, (size_t)width, height, sizeof(float) * 4);
2148 if(dev_tmptmp == NULL) goto error;
2149
2150 cl_mem dev_t1 = dev_tmp;
2151 cl_mem dev_t2 = dev_tmptmp;
2152
2153 // accumulate all contributions
2154 for(int d = 0; d < ndir; d++)
2155 {
2156 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2157 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 0, sizeof(cl_mem), (void *)&dev_t1);
2158 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 1, sizeof(cl_mem), (void *)&dev_t2);
2159 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 2, sizeof(cl_mem), (void *)&dev_rgbv[d]);
2160 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 3, sizeof(cl_mem), (void *)&dev_homosum[d]);
2161 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 4, sizeof(cl_mem), (void *)&dev_aux);
2162 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 5, sizeof(int), (void *)&width);
2163 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 6, sizeof(int), (void *)&height);
2164 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_accu, 7, sizeof(int), (void *)&pad_tile);
2165 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_accu, sizes);
2166 if(err != CL_SUCCESS) goto error;
2167
2168 // swap buffers
2169 cl_mem dev_t = dev_t2;
2170 dev_t2 = dev_t1;
2171 dev_t1 = dev_t;
2172 }
2173
2174 // copy output to dev_tmptmp (if not already there)
2175 // note: we need to take swap of buffers into account, so current output lies in dev_t1
2176 if(dev_t1 != dev_tmptmp)
2177 {
2178 size_t origin[] = { 0, 0, 0 };
2179 size_t region[] = { width, height, 1 };
2180 err = dt_opencl_enqueue_copy_image(devid, dev_t1, dev_tmptmp, origin, origin, region);
2181 if(err != CL_SUCCESS) goto error;
2182 }
2183
2184 {
2185 // process the final image
2186 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
2187 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 0, sizeof(cl_mem), (void *)&dev_tmptmp);
2188 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 1, sizeof(cl_mem), (void *)&dev_tmp);
2189 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 2, sizeof(int), (void *)&width);
2190 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 3, sizeof(int), (void *)&height);
2191 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 4, sizeof(int), (void *)&pad_tile);
2192 dt_opencl_set_kernel_arg(devid, gd->kernel_markesteijn_final, 5, 4*sizeof(float), (void *)processed_maximum);
2193 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_markesteijn_final, sizes);
2194 if(err != CL_SUCCESS) goto error;
2195 }
2196
2197 // now it's time to get rid of most of the temporary buffers (except of dev_tmp and dev_xtrans)
2198 for(int n = 0; n < 8; n++)
2199 {
2201 dev_rgbv[n] = NULL;
2202 }
2203
2204 for(int n = 0; n < 8; n++)
2205 {
2207 dev_homo[n] = NULL;
2208 }
2209
2210 for(int n = 0; n < 8; n++)
2211 {
2212 dt_opencl_release_mem_object(dev_homosum[n]);
2213 dev_homosum[n] = NULL;
2214 }
2215
2217 dev_aux = NULL;
2218
2219 dt_opencl_release_mem_object(dev_xtrans);
2220 dev_xtrans = NULL;
2221
2222 dt_opencl_release_mem_object(dev_allhex);
2223 dev_allhex = NULL;
2224
2225 dt_opencl_release_mem_object(dev_green_eq);
2226 dev_green_eq = NULL;
2227
2228 dt_opencl_release_mem_object(dev_tmptmp);
2229 dev_tmptmp = NULL;
2230
2231 // take care of image borders. the algorithm above leaves an unprocessed border of pad_tile pixels.
2232 // strategy: take the four edges and process them each with process_vng_cl(). as VNG produces
2233 // an image with a border with only linear interpolation we process edges of pad_tile+3px and
2234 // drop 3px on the inner side if possible
2235
2236 // take care of some degenerate cases (which might happen if we are called in a tiling context)
2237 const int wd = (width > pad_tile+3) ? pad_tile+3 : width;
2238 const int ht = (height > pad_tile+3) ? pad_tile+3 : height;
2239 const int wdc = (wd >= pad_tile+3) ? 3 : 0;
2240 const int htc = (ht >= pad_tile+3) ? 3 : 0;
2241
2242 // the data of all four edges:
2243 // total edge: x-offset, y-offset, width, height,
2244 // after dropping: x-offset adjust, y-offset adjust, width adjust, height adjust
2245 const int edges[4][8] = { { 0, 0, wd, height, 0, 0, -wdc, 0 },
2246 { 0, 0, width, ht, 0, 0, 0, -htc },
2247 { width - wd, 0, wd, height, wdc, 0, -wdc, 0 },
2248 { 0, height - ht, width, ht, 0, htc, 0, -htc } };
2249
2250 for(int n = 0; n < 4; n++)
2251 {
2252 dt_iop_roi_t roi = { roi_in->x + edges[n][0], roi_in->y + edges[n][1], edges[n][2], edges[n][3], 1.0f };
2253
2254 size_t iorigin[] = { edges[n][0], edges[n][1], 0 };
2255 size_t oorigin[] = { 0, 0, 0 };
2256 size_t region[] = { edges[n][2], edges[n][3], 1 };
2257
2258 // reserve input buffer for image edge
2259 dev_edge_in = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float));
2260 if(dev_edge_in == NULL) goto error;
2261
2262 // reserve output buffer for VNG processing of edge
2263 dev_edge_out = dt_opencl_alloc_device(devid, edges[n][2], edges[n][3], sizeof(float) * 4);
2264 if(dev_edge_out == NULL) goto error;
2265
2266 // copy edge to input buffer
2267 err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_edge_in, iorigin, oorigin, region);
2268 if(err != CL_SUCCESS) goto error;
2269
2270 // VNG processing
2271 if(!process_vng_cl(self, pipe, piece, dev_edge_in, dev_edge_out, &roi, &roi, smooth, FALSE))
2272 goto error;
2273
2274 // adjust for "good" part, dropping linear border where possible
2275 iorigin[0] += edges[n][4];
2276 iorigin[1] += edges[n][5];
2277 oorigin[0] += edges[n][4];
2278 oorigin[1] += edges[n][5];
2279 region[0] += edges[n][6];
2280 region[1] += edges[n][7];
2281
2282 // copy output
2283 err = dt_opencl_enqueue_copy_image(devid, dev_edge_out, dev_tmp, oorigin, iorigin, region);
2284 if(err != CL_SUCCESS) goto error;
2285
2286 // release intermediate buffers
2287 dt_opencl_release_mem_object(dev_edge_in);
2288 dt_opencl_release_mem_object(dev_edge_out);
2289 dev_edge_in = dev_edge_out = NULL;
2290 }
2291 dt_dev_write_rawdetail_mask_cl(pipe, piece, dev_tmp, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
2292
2293
2294 // free remaining temporary buffers
2295 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2296 dev_tmp = NULL;
2297
2298 dt_opencl_release_mem_object(dev_xtrans);
2299 dev_xtrans = NULL;
2300
2301
2302 // color smoothing
2303 if(data->color_smoothing)
2304 {
2305 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
2306 goto error;
2307 }
2308
2309 return TRUE;
2310
2311error:
2312 if(dev_tmp != dev_out) dt_opencl_release_mem_object(dev_tmp);
2313
2314 for(int n = 0; n < 8; n++)
2316 for(int n = 0; n < 8; n++)
2318 for(int n = 0; n < 8; n++)
2320 for(int n = 0; n < 8; n++)
2321 dt_opencl_release_mem_object(dev_homosum[n]);
2322 dt_opencl_release_mem_object(dev_gminmax);
2323 dt_opencl_release_mem_object(dev_tmptmp);
2324 dt_opencl_release_mem_object(dev_xtrans);
2325 dt_opencl_release_mem_object(dev_allhex);
2326 dt_opencl_release_mem_object(dev_green_eq);
2328 dt_opencl_release_mem_object(dev_edge_in);
2329 dt_opencl_release_mem_object(dev_edge_out);
2330 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] couldn't enqueue kernel! %d\n", err);
2331 return FALSE;
2332}
2333
2334#endif // OPENCL
2335
2336#undef PIX_SWAP
2337#undef PIX_SORT
2338#undef CCLIP
2339#undef TS
2340
2341// clang-format off
2342// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
2343// vim: shiftwidth=2 expandtab tabstop=2 cindent
2344// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
2345// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
const float i
Definition colorspaces_inline_conversions.h:669
const float h
Definition colorspaces_inline_conversions.h:1366
const float c
Definition colorspaces_inline_conversions.h:1365
const float g
Definition colorspaces_inline_conversions.h:925
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
const float d
Definition colorspaces_inline_conversions.h:931
static const float const float const float min
Definition colorspaces_inline_conversions.h:667
const float max
Definition colorspaces_inline_conversions.h:721
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
const float n
Definition colorspaces_inline_conversions.h:929
const float top
Definition colorspaces_inline_conversions.h:672
static const int row
Definition colorspaces_inline_conversions.h:175
static dt_aligned_pixel_t rgb
Definition colorspaces_inline_conversions.h:530
int dt_conf_get_int(const char *name)
Definition control/conf.c:241
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1530
static void memset_zero(void *const buffer, size_t size)
Set the memory buffer to zero as a pack of unsigned char.
Definition darktable.h:813
#define dt_pixelpipe_cache_alloc_perthread(n, objsize, padded_size)
Definition darktable.h:938
@ DT_DEBUG_OPENCL
Definition darktable.h:642
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
#define dt_get_perthread(buf, padsize)
Definition darktable.h:967
static int FCxtrans(const int row, const int col, global const unsigned char(*const xtrans)[6])
Definition data/kernels/common.h:54
@ DT_IOP_DEMOSAIC_MARKESTEIJN_3
Definition demosaic.c:123
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:140
static const float x
Definition iop_profile.h:239
static const float v
Definition iop_profile.h:223
#define TRANSLATE(n, size)
#define CONV_FILT(VAR, FILT)
static void xtrans_fdc_interpolate(struct dt_iop_module_t *self, float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6])
Definition markesteijn.c:525
static void xtrans_markesteijn_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint8_t(*const xtrans)[6], const int passes)
Definition markesteijn.c:46
static const short * hexmap(const int row, const int col, short(*const allhex)[3][8])
Definition markesteijn.c:30
#define SQR(x)
Definition markesteijn.c:25
#define TS
Definition markesteijn.c:27
#define PIX_SORT(a, b)
#define CLAMPS(A, L, H)
Definition math.h:76
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:574
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:570
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:619
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:578
Definition pixelpipe_hb.h:95
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:96
Definition pixelpipe_hb.h:216
dt_image_t image_storage
Definition develop.h:243
float exif_iso
Definition common/image.h:254
Definition demosaic.c:217
Definition demosaic.c:159
Definition imageop.h:216
struct dt_develop_t * dev
Definition imageop.h:261
dt_iop_global_data_t * global_data
Definition imageop.h:279
Definition imageop.h:67
int x
Definition imageop.h:68
int width
Definition imageop.h:68
int height
Definition imageop.h:68
#define MIN(a, b)
Definition thinplate.c:32