Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
vng.c
Go to the documentation of this file.
1
2
3// VNG interpolate adapted from dcraw 9.20
4
5/*
6 This algorithm is officially called:
7
8 "Interpolation using a Threshold-based variable number of gradients"
9
10 described in http://scien.stanford.edu/pages/labsite/1999/psych221/projects/99/tingchen/algodep/vargra.html
11
12 I've extended the basic idea to work with non-Bayer filter arrays.
13 Gradients are numbered clockwise from NW=0 to W=7.
14 */
15static void vng_interpolate(float *out, const float *const in,
16 const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in,
17 const uint32_t filters, const uint8_t (*const xtrans)[6], const int only_vng_linear)
18{
19 static const signed char terms[]
20 = { -2, -2, +0, -1, 1, 0x01, -2, -2, +0, +0, 2, 0x01, -2, -1, -1, +0, 1, 0x01, -2, -1, +0, -1, 1, 0x02,
21 -2, -1, +0, +0, 1, 0x03, -2, -1, +0, +1, 2, 0x01, -2, +0, +0, -1, 1, 0x06, -2, +0, +0, +0, 2, 0x02,
22 -2, +0, +0, +1, 1, 0x03, -2, +1, -1, +0, 1, 0x04, -2, +1, +0, -1, 2, 0x04, -2, +1, +0, +0, 1, 0x06,
23 -2, +1, +0, +1, 1, 0x02, -2, +2, +0, +0, 2, 0x04, -2, +2, +0, +1, 1, 0x04, -1, -2, -1, +0, 1, 0x80,
24 -1, -2, +0, -1, 1, 0x01, -1, -2, +1, -1, 1, 0x01, -1, -2, +1, +0, 2, 0x01, -1, -1, -1, +1, 1, 0x88,
25 -1, -1, +1, -2, 1, 0x40, -1, -1, +1, -1, 1, 0x22, -1, -1, +1, +0, 1, 0x33, -1, -1, +1, +1, 2, 0x11,
26 -1, +0, -1, +2, 1, 0x08, -1, +0, +0, -1, 1, 0x44, -1, +0, +0, +1, 1, 0x11, -1, +0, +1, -2, 2, 0x40,
27 -1, +0, +1, -1, 1, 0x66, -1, +0, +1, +0, 2, 0x22, -1, +0, +1, +1, 1, 0x33, -1, +0, +1, +2, 2, 0x10,
28 -1, +1, +1, -1, 2, 0x44, -1, +1, +1, +0, 1, 0x66, -1, +1, +1, +1, 1, 0x22, -1, +1, +1, +2, 1, 0x10,
29 -1, +2, +0, +1, 1, 0x04, -1, +2, +1, +0, 2, 0x04, -1, +2, +1, +1, 1, 0x04, +0, -2, +0, +0, 2, 0x80,
30 +0, -1, +0, +1, 2, 0x88, +0, -1, +1, -2, 1, 0x40, +0, -1, +1, +0, 1, 0x11, +0, -1, +2, -2, 1, 0x40,
31 +0, -1, +2, -1, 1, 0x20, +0, -1, +2, +0, 1, 0x30, +0, -1, +2, +1, 2, 0x10, +0, +0, +0, +2, 2, 0x08,
32 +0, +0, +2, -2, 2, 0x40, +0, +0, +2, -1, 1, 0x60, +0, +0, +2, +0, 2, 0x20, +0, +0, +2, +1, 1, 0x30,
33 +0, +0, +2, +2, 2, 0x10, +0, +1, +1, +0, 1, 0x44, +0, +1, +1, +2, 1, 0x10, +0, +1, +2, -1, 2, 0x40,
34 +0, +1, +2, +0, 1, 0x60, +0, +1, +2, +1, 1, 0x20, +0, +1, +2, +2, 1, 0x10, +1, -2, +1, +0, 1, 0x80,
35 +1, -1, +1, +1, 1, 0x88, +1, +0, +1, +2, 1, 0x08, +1, +0, +2, -1, 1, 0x40, +1, +0, +2, +1, 1, 0x10 };
36 static const signed char chood[]
37 = { -1, -1, -1, 0, -1, +1, 0, +1, +1, +1, +1, 0, +1, -1, 0, -1 };
38 int *ip, *code[16][16];
39 // ring buffer pointing to three most recent rows processed (brow[3]
40 // is only used for rotating the buffer
41 float(*brow[4])[4];
42 const int width = roi_out->width, height = roi_out->height;
43 const int prow = (filters == 9) ? 6 : 8;
44 const int pcol = (filters == 9) ? 6 : 2;
45 const int colors = (filters == 9) ? 3 : 4;
46
47 // separate out G1 and G2 in RGGB Bayer patterns
48 uint32_t filters4 = filters;
49 if(filters == 9 || FILTERS_ARE_4BAYER(filters)) // x-trans or CYGM/RGBE
50 filters4 = filters;
51 else if((filters & 3) == 1)
52 filters4 = filters | 0x03030303u;
53 else
54 filters4 = filters | 0x0c0c0c0cu;
55
56 lin_interpolate(out, in, roi_out, roi_in, filters4, xtrans);
57
58 // if only linear interpolation is requested we can stop it here
59 if(only_vng_linear) return;
60
61 char *buffer
62 = (char *)dt_alloc_align(sizeof(**brow) * width * 3 + sizeof(*ip) * prow * pcol * 320);
63 if(!buffer)
64 {
65 fprintf(stderr, "[demosaic] not able to allocate VNG buffer\n");
66 return;
67 }
68 for(int row = 0; row < 3; row++) brow[row] = (float(*)[4])buffer + row * width;
69 ip = (int *)(buffer + sizeof(**brow) * width * 3);
70
71 for(int row = 0; row < prow; row++) /* Precalculate for VNG */
72 for(int col = 0; col < pcol; col++)
73 {
74 code[row][col] = ip;
75 const signed char *cp = terms;
76 for(int t = 0; t < 64; t++)
77 {
78 const int y1 = *cp++, x1 = *cp++;
79 const int y2 = *cp++, x2 = *cp++;
80 const int weight = *cp++;
81 const int grads = *cp++;
82 const int color = fcol(row + y1, col + x1, filters4, xtrans);
83 if(fcol(row + y2, col + x2, filters4, xtrans) != color) continue;
84 const int diag
85 = (fcol(row, col + 1, filters4, xtrans) == color && fcol(row + 1, col, filters4, xtrans) == color)
86 ? 2
87 : 1;
88 if(abs(y1 - y2) == diag && abs(x1 - x2) == diag) continue;
89 *ip++ = (y1 * width + x1) * 4 + color;
90 *ip++ = (y2 * width + x2) * 4 + color;
91 *ip++ = weight;
92 for(int g = 0; g < 8; g++)
93 if(grads & 1 << g) *ip++ = g;
94 *ip++ = -1;
95 }
96 *ip++ = INT_MAX;
97 cp = chood;
98 for(int g = 0; g < 8; g++)
99 {
100 const int y = *cp++, x = *cp++;
101 *ip++ = (y * width + x) * 4;
102 const int color = fcol(row, col, filters4, xtrans);
103 if(fcol(row + y, col + x, filters4, xtrans) != color
104 && fcol(row + y * 2, col + x * 2, filters4, xtrans) == color)
105 *ip++ = (y * width + x) * 8 + color;
106 else
107 *ip++ = 0;
108 }
109 }
110
111 for(int row = 2; row < height - 2; row++) /* Do VNG interpolation */
112 {
113#ifdef _OPENMP
114#pragma omp parallel for default(none) \
115 dt_omp_firstprivate(colors, pcol, prow, roi_in, width, xtrans) \
116 shared(row, code, brow, out, filters4) \
117 private(ip) \
118 schedule(static)
119#endif
120 for(int col = 2; col < width - 2; col++)
121 {
122 int g;
123 float gval[8] = { 0.0f };
124 float *pix = out + 4 * (row * width + col);
125 ip = code[(row + roi_in->y) % prow][(col + roi_in->x) % pcol];
126 while((g = ip[0]) != INT_MAX) /* Calculate gradients */
127 {
128 float diff = fabsf(pix[g] - pix[ip[1]]) * ip[2];
129 gval[ip[3]] += diff;
130 ip += 5;
131 if((g = ip[-1]) == -1) continue;
132 gval[g] += diff;
133 while((g = *ip++) != -1) gval[g] += diff;
134 }
135 ip++;
136 float gmin = gval[0], gmax = gval[0]; /* Choose a threshold */
137 for(g = 1; g < 8; g++)
138 {
139 if(gmin > gval[g]) gmin = gval[g];
140 if(gmax < gval[g]) gmax = gval[g];
141 }
142 if(gmax == 0)
143 {
144 memcpy(brow[2][col], pix, sizeof(*out) * 4);
145 continue;
146 }
147 const float thold = gmin + (gmax * 0.5f);
148 dt_aligned_pixel_t sum = { 0.0f };
149 const int color = fcol(row + roi_in->y, col + roi_in->x, filters4, xtrans);
150 int num = 0;
151 for(g = 0; g < 8; g++, ip += 2) /* Average the neighbors */
152 {
153 if(gval[g] <= thold)
154 {
155 for(int c = 0; c < colors; c++)
156 if(c == color && ip[1])
157 sum[c] += (pix[c] + pix[ip[1]]) * 0.5f;
158 else
159 sum[c] += pix[ip[0] + c];
160 num++;
161 }
162 }
163 for(int c = 0; c < colors; c++) /* Save to buffer */
164 {
165 float tot = pix[color];
166 if(c != color) tot += (sum[c] - sum[color]) / num;
167 brow[2][col][c] = tot;
168 }
169 }
170 if(row > 3) /* Write buffer to image */
171 memcpy(out + 4 * ((row - 2) * width + 2), brow[0] + 2, sizeof(*out) * 4 * (width - 4));
172 // rotate ring buffer
173 for(int g = 0; g < 4; g++) brow[(g - 1) & 3] = brow[g];
174 }
175 // copy the final two rows to the image
176 memcpy(out + (4 * ((height - 4) * width + 2)), brow[0] + 2, sizeof(*out) * 4 * (width - 4));
177 memcpy(out + (4 * ((height - 3) * width + 2)), brow[1] + 2, sizeof(*out) * 4 * (width - 4));
178 dt_free_align(buffer);
179
180 if(filters != 9 && !FILTERS_ARE_4BAYER(filters)) // x-trans or CYGM/RGBE
181// for Bayer mix the two greens to make VNG4
182#ifdef _OPENMP
183#pragma omp parallel for default(none) \
184 dt_omp_firstprivate(height, width) \
185 shared(out) \
186 schedule(static)
187#endif
188 for(int i = 0; i < height * width; i++) out[i * 4 + 1] = (out[i * 4 + 1] + out[i * 4 + 3]) / 2.0f;
189}
190
191#ifdef HAVE_OPENCL
192
193static int process_vng_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in,
194 cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
195 const gboolean smooth, const int only_vng_linear)
196{
199 const dt_image_t *img = &self->dev->image_storage;
200
201 const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->pipe->dsc.xtrans;
202
203 // separate out G1 and G2 in Bayer patterns
204 uint32_t filters4;
205 if(piece->pipe->dsc.filters == 9u)
206 filters4 = piece->pipe->dsc.filters;
207 else if((piece->pipe->dsc.filters & 3) == 1)
208 filters4 = piece->pipe->dsc.filters | 0x03030303u;
209 else
210 filters4 = piece->pipe->dsc.filters | 0x0c0c0c0cu;
211
212 const int size = (filters4 == 9u) ? 6 : 16;
213 const int colors = (filters4 == 9u) ? 3 : 4;
214 const int prow = (filters4 == 9u) ? 6 : 8;
215 const int pcol = (filters4 == 9u) ? 6 : 2;
216 const int devid = piece->pipe->devid;
217
218 const float processed_maximum[4]
219 = { piece->pipe->dsc.processed_maximum[0], piece->pipe->dsc.processed_maximum[1],
220 piece->pipe->dsc.processed_maximum[2], 1.0f };
221
222 const int qual_flags = demosaic_qual_flags(piece, img, roi_out);
223
224 int *ips = NULL;
225
226 cl_mem dev_tmp = NULL;
227 cl_mem dev_aux = NULL;
228 cl_mem dev_xtrans = NULL;
229 cl_mem dev_lookup = NULL;
230 cl_mem dev_code = NULL;
231 cl_mem dev_ips = NULL;
232 cl_mem dev_green_eq = NULL;
233 cl_int err = -999;
234
235 int32_t(*lookup)[16][32] = NULL;
236
237 if(piece->pipe->dsc.filters == 9u)
238 {
239 dev_xtrans
240 = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->pipe->dsc.xtrans), piece->pipe->dsc.xtrans);
241 if(dev_xtrans == NULL) goto error;
242 }
243
244 if(qual_flags & DEMOSAIC_FULL_SCALE)
245 {
246 // Full demosaic and then scaling if needed
247 const int scaled = (roi_out->width != roi_in->width || roi_out->height != roi_in->height);
248
249 // build interpolation lookup table for linear interpolation which for a given offset in the sensor
250 // lists neighboring pixels from which to interpolate:
251 // NUM_PIXELS # of neighboring pixels to read
252 // for (1..NUM_PIXELS):
253 // OFFSET # in bytes from current pixel
254 // WEIGHT # how much weight to give this neighbor
255 // COLOR # sensor color
256 // # weights of adjoining pixels not of this pixel's color
257 // COLORA TOT_WEIGHT
258 // COLORB TOT_WEIGHT
259 // COLORPIX # color of center pixel
260 const size_t lookup_size = (size_t)16 * 16 * 32 * sizeof(int32_t);
261 lookup = malloc(lookup_size);
262
263 for(int row = 0; row < size; row++)
264 for(int col = 0; col < size; col++)
265 {
266 int32_t *ip = &(lookup[row][col][1]);
267 int sum[4] = { 0 };
268 const int f = fcol(row + roi_in->y, col + roi_in->x, filters4, xtrans);
269 // make list of adjoining pixel offsets by weight & color
270 for(int y = -1; y <= 1; y++)
271 for(int x = -1; x <= 1; x++)
272 {
273 const int weight = 1 << ((y == 0) + (x == 0));
274 const int color = fcol(row + y + roi_in->y, col + x + roi_in->x, filters4, xtrans);
275 if(color == f) continue;
276 *ip++ = (y << 16) | (x & 0xffffu);
277 *ip++ = weight;
278 *ip++ = color;
279 sum[color] += weight;
280 }
281 lookup[row][col][0] = (ip - &(lookup[row][col][0])) / 3; /* # of neighboring pixels found */
282 for(int c = 0; c < colors; c++)
283 if(c != f)
284 {
285 *ip++ = c;
286 *ip++ = sum[c];
287 }
288 *ip = f;
289 }
290
291 // Precalculate for VNG
292 static const signed char terms[]
293 = { -2, -2, +0, -1, 1, 0x01, -2, -2, +0, +0, 2, 0x01, -2, -1, -1, +0, 1, 0x01, -2, -1, +0, -1, 1, 0x02,
294 -2, -1, +0, +0, 1, 0x03, -2, -1, +0, +1, 2, 0x01, -2, +0, +0, -1, 1, 0x06, -2, +0, +0, +0, 2, 0x02,
295 -2, +0, +0, +1, 1, 0x03, -2, +1, -1, +0, 1, 0x04, -2, +1, +0, -1, 2, 0x04, -2, +1, +0, +0, 1, 0x06,
296 -2, +1, +0, +1, 1, 0x02, -2, +2, +0, +0, 2, 0x04, -2, +2, +0, +1, 1, 0x04, -1, -2, -1, +0, 1, 0x80,
297 -1, -2, +0, -1, 1, 0x01, -1, -2, +1, -1, 1, 0x01, -1, -2, +1, +0, 2, 0x01, -1, -1, -1, +1, 1, 0x88,
298 -1, -1, +1, -2, 1, 0x40, -1, -1, +1, -1, 1, 0x22, -1, -1, +1, +0, 1, 0x33, -1, -1, +1, +1, 2, 0x11,
299 -1, +0, -1, +2, 1, 0x08, -1, +0, +0, -1, 1, 0x44, -1, +0, +0, +1, 1, 0x11, -1, +0, +1, -2, 2, 0x40,
300 -1, +0, +1, -1, 1, 0x66, -1, +0, +1, +0, 2, 0x22, -1, +0, +1, +1, 1, 0x33, -1, +0, +1, +2, 2, 0x10,
301 -1, +1, +1, -1, 2, 0x44, -1, +1, +1, +0, 1, 0x66, -1, +1, +1, +1, 1, 0x22, -1, +1, +1, +2, 1, 0x10,
302 -1, +2, +0, +1, 1, 0x04, -1, +2, +1, +0, 2, 0x04, -1, +2, +1, +1, 1, 0x04, +0, -2, +0, +0, 2, 0x80,
303 +0, -1, +0, +1, 2, 0x88, +0, -1, +1, -2, 1, 0x40, +0, -1, +1, +0, 1, 0x11, +0, -1, +2, -2, 1, 0x40,
304 +0, -1, +2, -1, 1, 0x20, +0, -1, +2, +0, 1, 0x30, +0, -1, +2, +1, 2, 0x10, +0, +0, +0, +2, 2, 0x08,
305 +0, +0, +2, -2, 2, 0x40, +0, +0, +2, -1, 1, 0x60, +0, +0, +2, +0, 2, 0x20, +0, +0, +2, +1, 1, 0x30,
306 +0, +0, +2, +2, 2, 0x10, +0, +1, +1, +0, 1, 0x44, +0, +1, +1, +2, 1, 0x10, +0, +1, +2, -1, 2, 0x40,
307 +0, +1, +2, +0, 1, 0x60, +0, +1, +2, +1, 1, 0x20, +0, +1, +2, +2, 1, 0x10, +1, -2, +1, +0, 1, 0x80,
308 +1, -1, +1, +1, 1, 0x88, +1, +0, +1, +2, 1, 0x08, +1, +0, +2, -1, 1, 0x40, +1, +0, +2, +1, 1, 0x10 };
309 static const signed char chood[]
310 = { -1, -1, -1, 0, -1, +1, 0, +1, +1, +1, +1, 0, +1, -1, 0, -1 };
311
312 const size_t ips_size = (size_t)prow * pcol * 352 * sizeof(int);
313 ips = malloc(ips_size);
314
315 int *ip = ips;
316 int code[16][16];
317
318 for(int row = 0; row < prow; row++)
319 for(int col = 0; col < pcol; col++)
320 {
321 code[row][col] = ip - ips;
322 const signed char *cp = terms;
323 for(int t = 0; t < 64; t++)
324 {
325 const int y1 = *cp++, x1 = *cp++;
326 const int y2 = *cp++, x2 = *cp++;
327 const int weight = *cp++;
328 const int grads = *cp++;
329 const int color = fcol(row + y1, col + x1, filters4, xtrans);
330 if(fcol(row + y2, col + x2, filters4, xtrans) != color) continue;
331 const int diag
332 = (fcol(row, col + 1, filters4, xtrans) == color && fcol(row + 1, col, filters4, xtrans) == color)
333 ? 2
334 : 1;
335 if(abs(y1 - y2) == diag && abs(x1 - x2) == diag) continue;
336 *ip++ = (y1 << 16) | (x1 & 0xffffu);
337 *ip++ = (y2 << 16) | (x2 & 0xffffu);
338 *ip++ = (color << 16) | (weight & 0xffffu);
339 for(int g = 0; g < 8; g++)
340 if(grads & 1 << g) *ip++ = g;
341 *ip++ = -1;
342 }
343 *ip++ = INT_MAX;
344 cp = chood;
345 for(int g = 0; g < 8; g++)
346 {
347 const int y = *cp++, x = *cp++;
348 *ip++ = (y << 16) | (x & 0xffffu);
349 const int color = fcol(row, col, filters4, xtrans);
350 if(fcol(row + y, col + x, filters4, xtrans) != color
351 && fcol(row + y * 2, col + x * 2, filters4, xtrans) == color)
352 {
353 *ip++ = (2*y << 16) | (2*x & 0xffffu);
354 *ip++ = color;
355 }
356 else
357 {
358 *ip++ = 0;
359 *ip++ = 0;
360 }
361 }
362 }
363
364
365 dev_lookup = dt_opencl_copy_host_to_device_constant(devid, lookup_size, lookup);
366 if(dev_lookup == NULL) goto error;
367
368 dev_code = dt_opencl_copy_host_to_device_constant(devid, sizeof(code), code);
369 if(dev_code == NULL) goto error;
370
371 dev_ips = dt_opencl_copy_host_to_device_constant(devid, ips_size, ips);
372 if(dev_ips == NULL) goto error;
373
374 // green equilibration for Bayer sensors
375 if(piece->pipe->dsc.filters != 9u && data->green_eq != DT_IOP_GREEN_EQ_NO)
376 {
377 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float));
378 if(dev_green_eq == NULL) goto error;
379
380 if(!green_equilibration_cl(self, piece, dev_in, dev_green_eq, roi_in))
381 goto error;
382
383 dev_in = dev_green_eq;
384 }
385
386 int width = roi_out->width;
387 int height = roi_out->height;
388
389 // need to reserve scaled auxiliary buffer or use dev_out
390 if(scaled)
391 {
392 dev_aux = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
393 if(dev_aux == NULL) goto error;
394 width = roi_in->width;
395 height = roi_in->height;
396 }
397 else
398 dev_aux = dev_out;
399
400 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
401 if(dev_tmp == NULL) goto error;
402
403 {
404 // manage borders for linear interpolation part
405 const int border = 1;
406
407 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
408 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
409 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 1, sizeof(cl_mem), (void *)&dev_tmp);
410 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 2, sizeof(int), (void *)&width);
411 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 3, sizeof(int), (void *)&height);
412 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 4, sizeof(int), (void *)&border);
413 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 5, sizeof(int), (void *)&roi_in->x);
414 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 6, sizeof(int), (void *)&roi_in->y);
415 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 7, sizeof(uint32_t), (void *)&filters4);
416 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
417 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_border_interpolate, sizes);
418 if(err != CL_SUCCESS) goto error;
419 }
420
421 {
422 // do linear interpolation
423 dt_opencl_local_buffer_t locopt
424 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
425 .cellsize = 1 * sizeof(float), .overhead = 0,
426 .sizex = 1 << 8, .sizey = 1 << 8 };
427
428 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_vng_lin_interpolate, &locopt))
429 goto error;
430
431 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
432 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
433 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
434 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 1, sizeof(cl_mem), (void *)&dev_tmp);
435 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 2, sizeof(int), (void *)&width);
436 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 3, sizeof(int), (void *)&height);
437 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 4, sizeof(uint32_t), (void *)&filters4);
438 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 5, sizeof(cl_mem), (void *)&dev_lookup);
439 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 6,
440 sizeof(float) * (locopt.sizex + 2) * (locopt.sizey + 2), NULL);
441 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_vng_lin_interpolate, sizes, local);
442 if(err != CL_SUCCESS) goto error;
443 }
444
445
446 if(qual_flags & DEMOSAIC_ONLY_VNG_LINEAR)
447 {
448 // leave it at linear interpolation and skip VNG
449 size_t origin[] = { 0, 0, 0 };
450 size_t region[] = { width, height, 1 };
451 err = dt_opencl_enqueue_copy_image(devid, dev_tmp, dev_aux, origin, origin, region);
452 if(err != CL_SUCCESS) goto error;
453 }
454 else
455 {
456 // do full VNG interpolation
457 dt_opencl_local_buffer_t locopt
458 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
459 .cellsize = 4 * sizeof(float), .overhead = 0,
460 .sizex = 1 << 8, .sizey = 1 << 8 };
461
462 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_vng_interpolate, &locopt))
463 goto error;
464
465 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
466 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
467 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 0, sizeof(cl_mem), (void *)&dev_tmp);
468 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 1, sizeof(cl_mem), (void *)&dev_aux);
469 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 2, sizeof(int), (void *)&width);
470 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 3, sizeof(int), (void *)&height);
471 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 4, sizeof(int), (void *)&roi_in->x);
472 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 5, sizeof(int), (void *)&roi_in->y);
473 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 6, sizeof(uint32_t), (void *)&filters4);
474 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 7, 4*sizeof(float), (void *)processed_maximum);
475 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
476 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 9, sizeof(cl_mem), (void *)&dev_ips);
477 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 10, sizeof(cl_mem), (void *)&dev_code);
478 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 11, sizeof(float) * 4 * (locopt.sizex + 4) * (locopt.sizey + 4), NULL);
479 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_vng_interpolate, sizes, local);
480 if(err != CL_SUCCESS) goto error;
481 }
482
483 {
484 // manage borders
485 const int border = 2;
486
487 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
488 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
489 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 1, sizeof(cl_mem), (void *)&dev_aux);
490 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 2, sizeof(int), (void *)&width);
491 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 3, sizeof(int), (void *)&height);
492 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 4, sizeof(int), (void *)&border);
493 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 5, sizeof(int), (void *)&roi_in->x);
494 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 6, sizeof(int), (void *)&roi_in->y);
495 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 7, sizeof(uint32_t), (void *)&filters4);
496 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
497 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_border_interpolate, sizes);
498 if(err != CL_SUCCESS) goto error;
499 }
500
501 if(filters4 != 9)
502 {
503 // for Bayer sensors mix the two green channels
504 size_t origin[] = { 0, 0, 0 };
505 size_t region[] = { width, height, 1 };
506 err = dt_opencl_enqueue_copy_image(devid, dev_aux, dev_tmp, origin, origin, region);
507 if(err != CL_SUCCESS) goto error;
508
509 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
510 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 0, sizeof(cl_mem), (void *)&dev_tmp);
511 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 1, sizeof(cl_mem), (void *)&dev_aux);
512 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 2, sizeof(int), (void *)&width);
513 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 3, sizeof(int), (void *)&height);
514 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_green_equilibrate, sizes);
515 if(err != CL_SUCCESS) goto error;
516 }
517 dt_dev_write_rawdetail_mask_cl(piece, dev_aux, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
518
519 if(scaled)
520 {
521 // scale temp buffer to output buffer
522 err = dt_iop_clip_and_zoom_roi_cl(devid, dev_out, dev_aux, roi_out, roi_in);
523 if(err != CL_SUCCESS) goto error;
524 }
525 }
526 else
527 {
528 // sample half-size or third-size image
529 if(piece->pipe->dsc.filters == 9u)
530 {
531 const int width = roi_out->width;
532 const int height = roi_out->height;
533
534 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
535 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 0, sizeof(cl_mem), (void *)&dev_in);
536 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 1, sizeof(cl_mem), (void *)&dev_out);
537 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 2, sizeof(int), (void *)&width);
538 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 3, sizeof(int), (void *)&height);
539 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 4, sizeof(int), (void *)&roi_in->x);
540 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 5, sizeof(int), (void *)&roi_in->y);
541 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 6, sizeof(int), (void *)&roi_in->width);
542 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 7, sizeof(int), (void *)&roi_in->height);
543 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 8, sizeof(float), (void *)&roi_out->scale);
544 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_third_size, 9, sizeof(cl_mem), (void *)&dev_xtrans);
545 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_zoom_third_size, sizes);
546 if(err != CL_SUCCESS) goto error;
547 }
548 else
549 {
550 const int zero = 0;
551 const int width = roi_out->width;
552 const int height = roi_out->height;
553
554 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
555 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 0, sizeof(cl_mem), (void *)&dev_in);
556 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 1, sizeof(cl_mem), (void *)&dev_out);
557 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 2, sizeof(int), (void *)&width);
558 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 3, sizeof(int), (void *)&height);
559 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 4, sizeof(int), (void *)&zero);
560 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 5, sizeof(int), (void *)&zero);
561 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 6, sizeof(int), (void *)&roi_in->width);
562 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 7, sizeof(int), (void *)&roi_in->height);
563 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 8, sizeof(float), (void *)&roi_out->scale);
564 dt_opencl_set_kernel_arg(devid, gd->kernel_zoom_half_size, 9, sizeof(uint32_t),
565 (void *)&piece->pipe->dsc.filters);
566 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_zoom_half_size, sizes);
567 if(err != CL_SUCCESS) goto error;
568 }
569 }
570
571 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
572 dev_aux = NULL;
573
575 dev_tmp = NULL;
576
578 dev_xtrans = NULL;
579
581 dev_lookup = NULL;
582
583 free(lookup);
584
586 dev_code = NULL;
587
589 dev_ips = NULL;
590
591 dt_opencl_release_mem_object(dev_green_eq);
592 dev_green_eq = NULL;
593
594 free(ips);
595 ips = NULL;
596
597 // color smoothing
598 if((data->color_smoothing) && smooth)
599 {
600 if(!color_smoothing_cl(self, piece, dev_out, dev_out, roi_out, data->color_smoothing))
601 goto error;
602 }
603
604 return TRUE;
605
606error:
607 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
611 free(lookup);
614 dt_opencl_release_mem_object(dev_green_eq);
615 free(ips);
616 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] couldn't enqueue kernel! %d\n", err);
617 return FALSE;
618}
619#endif // HAVE_OPENCL
620
621// clang-format off
622// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
623// vim: shiftwidth=2 expandtab tabstop=2 cindent
624// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
625// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:191
#define TRUE
Definition ashift_lsd.c:151
#define FALSE
Definition ashift_lsd.c:147
static void lin_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint8_t(*const xtrans)[6])
Definition basic.c:4
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static float lookup(read_only image2d_t lut, const float x)
Definition color_conversion.h:70
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1395
@ DT_DEBUG_OPENCL
Definition darktable.h:478
#define dt_free_align(A)
Definition darktable.h:334
@ DEMOSAIC_FULL_SCALE
Definition demosaic.c:103
@ DEMOSAIC_ONLY_VNG_LINEAR
Definition demosaic.c:104
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:93
static int demosaic_qual_flags(const dt_dev_pixelpipe_iop_t *const piece, const dt_image_t *const img, const dt_iop_roi_t *const roi_out)
Definition demosaic.c:239
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:110
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:29
const dt_collection_filter_flag_t colors[6]
Definition filter.c:274
static float f(const float t, const float c, const float x)
Definition graduatednd.c:173
#define FILTERS_ARE_4BAYER(filters)
Definition imageio.h:40
static int fcol(const int row, const int col, const uint32_t filters, const uint8_t(*const xtrans)[6])
Definition imageop_math.h:222
size_t size
Definition mipmap_cache.c:3
c
Definition derive_filmic_v6_gamut_mapping.py:11
g
Definition derive_filmic_v6_gamut_mapping.py:18
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:560
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:556
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:601
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:564
Definition pixelpipe_hb.h:46
void * data
Definition pixelpipe_hb.h:49
Definition common/image.h:195
Definition demosaic.c:187
Definition demosaic.c:129
Definition imageop.h:182
Definition imageop.h:32
int x
Definition imageop.h:33
int width
Definition imageop.h:33
int height
Definition imageop.h:33
int y
Definition imageop.h:33
#define dt_alloc_align(B)
Definition tests/cache.c:22
static void vng_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint8_t(*const xtrans)[6], const int only_vng_linear)
Definition vng.c:15