Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
vng.c
Go to the documentation of this file.
1/*
2 This file is part of the Ansel project.
3 Copyright (C) 2023, 2025-2026 Aurélien PIERRE.
4 Copyright (C) 2024 Alynx Zhou.
5
6 Ansel is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Ansel is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Ansel. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21// VNG interpolate adapted from dcraw 9.20
22
23/*
24 This algorithm is officially called:
25
26 "Interpolation using a Threshold-based variable number of gradients"
27
28 described in http://scien.stanford.edu/pages/labsite/1999/psych221/projects/99/tingchen/algodep/vargra.html
29
30 I've extended the basic idea to work with non-Bayer filter arrays.
31 Gradients are numbered clockwise from NW=0 to W=7.
32 */
33static int vng_interpolate(float *out, const float *const in,
34 const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in,
35 const uint32_t filters, const uint8_t (*const xtrans)[6], const int only_vng_linear)
36{
37 static const signed char terms[]
38 = { -2, -2, +0, -1, 1, 0x01, -2, -2, +0, +0, 2, 0x01, -2, -1, -1, +0, 1, 0x01, -2, -1, +0, -1, 1, 0x02,
39 -2, -1, +0, +0, 1, 0x03, -2, -1, +0, +1, 2, 0x01, -2, +0, +0, -1, 1, 0x06, -2, +0, +0, +0, 2, 0x02,
40 -2, +0, +0, +1, 1, 0x03, -2, +1, -1, +0, 1, 0x04, -2, +1, +0, -1, 2, 0x04, -2, +1, +0, +0, 1, 0x06,
41 -2, +1, +0, +1, 1, 0x02, -2, +2, +0, +0, 2, 0x04, -2, +2, +0, +1, 1, 0x04, -1, -2, -1, +0, 1, 0x80,
42 -1, -2, +0, -1, 1, 0x01, -1, -2, +1, -1, 1, 0x01, -1, -2, +1, +0, 2, 0x01, -1, -1, -1, +1, 1, 0x88,
43 -1, -1, +1, -2, 1, 0x40, -1, -1, +1, -1, 1, 0x22, -1, -1, +1, +0, 1, 0x33, -1, -1, +1, +1, 2, 0x11,
44 -1, +0, -1, +2, 1, 0x08, -1, +0, +0, -1, 1, 0x44, -1, +0, +0, +1, 1, 0x11, -1, +0, +1, -2, 2, 0x40,
45 -1, +0, +1, -1, 1, 0x66, -1, +0, +1, +0, 2, 0x22, -1, +0, +1, +1, 1, 0x33, -1, +0, +1, +2, 2, 0x10,
46 -1, +1, +1, -1, 2, 0x44, -1, +1, +1, +0, 1, 0x66, -1, +1, +1, +1, 1, 0x22, -1, +1, +1, +2, 1, 0x10,
47 -1, +2, +0, +1, 1, 0x04, -1, +2, +1, +0, 2, 0x04, -1, +2, +1, +1, 1, 0x04, +0, -2, +0, +0, 2, 0x80,
48 +0, -1, +0, +1, 2, 0x88, +0, -1, +1, -2, 1, 0x40, +0, -1, +1, +0, 1, 0x11, +0, -1, +2, -2, 1, 0x40,
49 +0, -1, +2, -1, 1, 0x20, +0, -1, +2, +0, 1, 0x30, +0, -1, +2, +1, 2, 0x10, +0, +0, +0, +2, 2, 0x08,
50 +0, +0, +2, -2, 2, 0x40, +0, +0, +2, -1, 1, 0x60, +0, +0, +2, +0, 2, 0x20, +0, +0, +2, +1, 1, 0x30,
51 +0, +0, +2, +2, 2, 0x10, +0, +1, +1, +0, 1, 0x44, +0, +1, +1, +2, 1, 0x10, +0, +1, +2, -1, 2, 0x40,
52 +0, +1, +2, +0, 1, 0x60, +0, +1, +2, +1, 1, 0x20, +0, +1, +2, +2, 1, 0x10, +1, -2, +1, +0, 1, 0x80,
53 +1, -1, +1, +1, 1, 0x88, +1, +0, +1, +2, 1, 0x08, +1, +0, +2, -1, 1, 0x40, +1, +0, +2, +1, 1, 0x10 };
54 static const signed char chood[]
55 = { -1, -1, -1, 0, -1, +1, 0, +1, +1, +1, +1, 0, +1, -1, 0, -1 };
56 int *ip, *code[16][16];
57 // ring buffer pointing to three most recent rows processed (brow[3]
58 // is only used for rotating the buffer
59 float(*brow[4])[4];
60 const int width = roi_out->width, height = roi_out->height;
61 const int prow = (filters == 9) ? 6 : 8;
62 const int pcol = (filters == 9) ? 6 : 2;
63 const int colors = (filters == 9) ? 3 : 4;
64
65 // separate out G1 and G2 in RGGB Bayer patterns
66 uint32_t filters4 = filters;
67 if(filters == 9 || FILTERS_ARE_4BAYER(filters)) // x-trans or CYGM/RGBE
68 filters4 = filters;
69 else if((filters & 3) == 1)
70 filters4 = filters | 0x03030303u;
71 else
72 filters4 = filters | 0x0c0c0c0cu;
73
74 lin_interpolate(out, in, roi_out, roi_in, filters4, xtrans);
75
76 // if only linear interpolation is requested we can stop it here
77 if(only_vng_linear) return 0;
78
79 char *buffer = (char *)dt_pixelpipe_cache_alloc_align_cache(
80 sizeof(**brow) * width * 3 + sizeof(*ip) * prow * pcol * 320,
81 0);
82 if(!buffer)
83 {
84 fprintf(stderr, "[demosaic] not able to allocate VNG buffer\n");
85 return 1;
86 }
87 for(int row = 0; row < 3; row++) brow[row] = (float(*)[4])buffer + row * width;
88 ip = (int *)(buffer + sizeof(**brow) * width * 3);
89
90 for(int row = 0; row < prow; row++) /* Precalculate for VNG */
91 for(int col = 0; col < pcol; col++)
92 {
93 code[row][col] = ip;
94 const signed char *cp = terms;
95 for(int t = 0; t < 64; t++)
96 {
97 const int y1 = *cp++, x1 = *cp++;
98 const int y2 = *cp++, x2 = *cp++;
99 const int weight = *cp++;
100 const int grads = *cp++;
101 const int color = fcol(row + y1, col + x1, filters4, xtrans);
102 if(fcol(row + y2, col + x2, filters4, xtrans) != color) continue;
103 const int diag
104 = (fcol(row, col + 1, filters4, xtrans) == color && fcol(row + 1, col, filters4, xtrans) == color)
105 ? 2
106 : 1;
107 if(abs(y1 - y2) == diag && abs(x1 - x2) == diag) continue;
108 *ip++ = (y1 * width + x1) * 4 + color;
109 *ip++ = (y2 * width + x2) * 4 + color;
110 *ip++ = weight;
111 for(int g = 0; g < 8; g++)
112 if(grads & 1 << g) *ip++ = g;
113 *ip++ = -1;
114 }
115 *ip++ = INT_MAX;
116 cp = chood;
117 for(int g = 0; g < 8; g++)
118 {
119 const int y = *cp++, x = *cp++;
120 *ip++ = (y * width + x) * 4;
121 const int color = fcol(row, col, filters4, xtrans);
122 if(fcol(row + y, col + x, filters4, xtrans) != color
123 && fcol(row + y * 2, col + x * 2, filters4, xtrans) == color)
124 *ip++ = (y * width + x) * 8 + color;
125 else
126 *ip++ = 0;
127 }
128 }
129
130 for(int row = 2; row < height - 2; row++) /* Do VNG interpolation */
131 {
132#ifdef _OPENMP
133#pragma omp parallel for default(none) \
134 dt_omp_firstprivate(colors, pcol, prow, roi_in, width, xtrans) \
135 shared(row, code, brow, out, filters4) \
136 private(ip) \
137 schedule(static)
138#endif
139 for(int col = 2; col < width - 2; col++)
140 {
141 int g;
142 float gval[8] = { 0.0f };
143 float *pix = out + 4 * (row * width + col);
144 ip = code[(row + roi_in->y) % prow][(col + roi_in->x) % pcol];
145 while((g = ip[0]) != INT_MAX) /* Calculate gradients */
146 {
147 float diff = fabsf(pix[g] - pix[ip[1]]) * ip[2];
148 gval[ip[3]] += diff;
149 ip += 5;
150 if((g = ip[-1]) == -1) continue;
151 gval[g] += diff;
152 while((g = *ip++) != -1) gval[g] += diff;
153 }
154 ip++;
155 float gmin = gval[0], gmax = gval[0]; /* Choose a threshold */
156 for(g = 1; g < 8; g++)
157 {
158 if(gmin > gval[g]) gmin = gval[g];
159 if(gmax < gval[g]) gmax = gval[g];
160 }
161 if(gmax == 0)
162 {
163 memcpy(brow[2][col], pix, sizeof(*out) * 4);
164 continue;
165 }
166 const float thold = gmin + (gmax * 0.5f);
167 dt_aligned_pixel_t sum = { 0.0f };
168 const int color = fcol(row + roi_in->y, col + roi_in->x, filters4, xtrans);
169 int num = 0;
170 for(g = 0; g < 8; g++, ip += 2) /* Average the neighbors */
171 {
172 if(gval[g] <= thold)
173 {
174 for(int c = 0; c < colors; c++)
175 if(c == color && ip[1])
176 sum[c] += (pix[c] + pix[ip[1]]) * 0.5f;
177 else
178 sum[c] += pix[ip[0] + c];
179 num++;
180 }
181 }
182 for(int c = 0; c < colors; c++) /* Save to buffer */
183 {
184 float tot = pix[color];
185 if(c != color) tot += (sum[c] - sum[color]) / num;
186 brow[2][col][c] = tot;
187 }
188 }
189 if(row > 3) /* Write buffer to image */
190 memcpy(out + 4 * ((row - 2) * width + 2), brow[0] + 2, sizeof(*out) * 4 * (width - 4));
191 // rotate ring buffer
192 for(int g = 0; g < 4; g++) brow[(g - 1) & 3] = brow[g];
193 }
194 // copy the final two rows to the image
195 memcpy(out + (4 * ((height - 4) * width + 2)), brow[0] + 2, sizeof(*out) * 4 * (width - 4));
196 memcpy(out + (4 * ((height - 3) * width + 2)), brow[1] + 2, sizeof(*out) * 4 * (width - 4));
198
199 if(filters != 9 && !FILTERS_ARE_4BAYER(filters)) // x-trans or CYGM/RGBE
200// for Bayer mix the two greens to make VNG4
201#ifdef _OPENMP
202#pragma omp parallel for default(none) \
203 dt_omp_firstprivate(height, width) \
204 shared(out) \
205 schedule(static)
206#endif
207 for(int i = 0; i < height * width; i++) out[i * 4 + 1] = (out[i * 4 + 1] + out[i * 4 + 3]) / 2.0f;
208 return 0;
209}
210
211#ifdef HAVE_OPENCL
212
213static int process_vng_cl(struct dt_iop_module_t *self, const dt_dev_pixelpipe_t *pipe,
214 const dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
215 const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
216 const gboolean smooth, const int only_vng_linear)
217{
220
221 const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->dsc_in.xtrans;
222
223 // separate out G1 and G2 in Bayer patterns
224 uint32_t filters4;
225 if(piece->dsc_in.filters == 9u)
226 filters4 = piece->dsc_in.filters;
227 else if((piece->dsc_in.filters & 3) == 1)
228 filters4 = piece->dsc_in.filters | 0x03030303u;
229 else
230 filters4 = piece->dsc_in.filters | 0x0c0c0c0cu;
231
232 const int size = (filters4 == 9u) ? 6 : 16;
233 const int colors = (filters4 == 9u) ? 3 : 4;
234 const int prow = (filters4 == 9u) ? 6 : 8;
235 const int pcol = (filters4 == 9u) ? 6 : 2;
236 const int devid = pipe->devid;
237
238 const float processed_maximum[4]
239 = { piece->dsc_in.processed_maximum[0], piece->dsc_in.processed_maximum[1],
240 piece->dsc_in.processed_maximum[2], 1.0f };
241
242 int *ips = NULL;
243
244 cl_mem dev_tmp = NULL;
245 cl_mem dev_aux = NULL;
246 cl_mem dev_xtrans = NULL;
247 cl_mem dev_lookup = NULL;
248 cl_mem dev_code = NULL;
249 cl_mem dev_ips = NULL;
250 cl_mem dev_green_eq = NULL;
251 cl_int err = -999;
252
253 int32_t(*lookup)[16][32] = NULL;
254
255 if(piece->dsc_in.filters == 9u)
256 {
257 dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->dsc_in.xtrans), (void *)piece->dsc_in.xtrans);
258 if(dev_xtrans == NULL) goto error;
259 }
260
261 // build interpolation lookup table for linear interpolation which for a given offset in the sensor
262 // lists neighboring pixels from which to interpolate:
263 // NUM_PIXELS # of neighboring pixels to read
264 // for (1..NUM_PIXELS):
265 // OFFSET # in bytes from current pixel
266 // WEIGHT # how much weight to give this neighbor
267 // COLOR # sensor color
268 // # weights of adjoining pixels not of this pixel's color
269 // COLORA TOT_WEIGHT
270 // COLORB TOT_WEIGHT
271 // COLORPIX # color of center pixel
272 const size_t lookup_size = (size_t)16 * 16 * 32 * sizeof(int32_t);
273 lookup = malloc(lookup_size);
274
275 for(int row = 0; row < size; row++)
276 for(int col = 0; col < size; col++)
277 {
278 int32_t *ip = &(lookup[row][col][1]);
279 int sum[4] = { 0 };
280 const int f = fcol(row + roi_in->y, col + roi_in->x, filters4, xtrans);
281 // make list of adjoining pixel offsets by weight & color
282 for(int y = -1; y <= 1; y++)
283 for(int x = -1; x <= 1; x++)
284 {
285 const int weight = 1 << ((y == 0) + (x == 0));
286 const int color = fcol(row + y + roi_in->y, col + x + roi_in->x, filters4, xtrans);
287 if(color == f) continue;
288 *ip++ = (y << 16) | (x & 0xffffu);
289 *ip++ = weight;
290 *ip++ = color;
291 sum[color] += weight;
292 }
293 lookup[row][col][0] = (ip - &(lookup[row][col][0])) / 3; /* # of neighboring pixels found */
294 for(int c = 0; c < colors; c++)
295 if(c != f)
296 {
297 *ip++ = c;
298 *ip++ = sum[c];
299 }
300 *ip = f;
301 }
302
303 // Precalculate for VNG
304 static const signed char terms[]
305 = { -2, -2, +0, -1, 1, 0x01, -2, -2, +0, +0, 2, 0x01, -2, -1, -1, +0, 1, 0x01, -2, -1, +0, -1, 1, 0x02,
306 -2, -1, +0, +0, 1, 0x03, -2, -1, +0, +1, 2, 0x01, -2, +0, +0, -1, 1, 0x06, -2, +0, +0, +0, 2, 0x02,
307 -2, +0, +0, +1, 1, 0x03, -2, +1, -1, +0, 1, 0x04, -2, +1, +0, -1, 2, 0x04, -2, +1, +0, +0, 1, 0x06,
308 -2, +1, +0, +1, 1, 0x02, -2, +2, +0, +0, 2, 0x04, -2, +2, +0, +1, 1, 0x04, -1, -2, -1, +0, 1, 0x80,
309 -1, -2, +0, -1, 1, 0x01, -1, -2, +1, -1, 1, 0x01, -1, -2, +1, +0, 2, 0x01, -1, -1, -1, +1, 1, 0x88,
310 -1, -1, +1, -2, 1, 0x40, -1, -1, +1, -1, 1, 0x22, -1, -1, +1, +0, 1, 0x33, -1, -1, +1, +1, 2, 0x11,
311 -1, +0, -1, +2, 1, 0x08, -1, +0, +0, -1, 1, 0x44, -1, +0, +0, +1, 1, 0x11, -1, +0, +1, -2, 2, 0x40,
312 -1, +0, +1, -1, 1, 0x66, -1, +0, +1, +0, 2, 0x22, -1, +0, +1, +1, 1, 0x33, -1, +0, +1, +2, 2, 0x10,
313 -1, +1, +1, -1, 2, 0x44, -1, +1, +1, +0, 1, 0x66, -1, +1, +1, +1, 1, 0x22, -1, +1, +1, +2, 1, 0x10,
314 -1, +2, +0, +1, 1, 0x04, -1, +2, +1, +0, 2, 0x04, -1, +2, +1, +1, 1, 0x04, +0, -2, +0, +0, 2, 0x80,
315 +0, -1, +0, +1, 2, 0x88, +0, -1, +1, -2, 1, 0x40, +0, -1, +1, +0, 1, 0x11, +0, -1, +2, -2, 1, 0x40,
316 +0, -1, +2, -1, 1, 0x20, +0, -1, +2, +0, 1, 0x30, +0, -1, +2, +1, 2, 0x10, +0, +0, +0, +2, 2, 0x08,
317 +0, +0, +2, -2, 2, 0x40, +0, +0, +2, -1, 1, 0x60, +0, +0, +2, +0, 2, 0x20, +0, +0, +2, +1, 1, 0x30,
318 +0, +0, +2, +2, 2, 0x10, +0, +1, +1, +0, 1, 0x44, +0, +1, +1, +2, 1, 0x10, +0, +1, +2, -1, 2, 0x40,
319 +0, +1, +2, +0, 1, 0x60, +0, +1, +2, +1, 1, 0x20, +0, +1, +2, +2, 1, 0x10, +1, -2, +1, +0, 1, 0x80,
320 +1, -1, +1, +1, 1, 0x88, +1, +0, +1, +2, 1, 0x08, +1, +0, +2, -1, 1, 0x40, +1, +0, +2, +1, 1, 0x10 };
321 static const signed char chood[]
322 = { -1, -1, -1, 0, -1, +1, 0, +1, +1, +1, +1, 0, +1, -1, 0, -1 };
323
324 const size_t ips_size = (size_t)prow * pcol * 352 * sizeof(int);
325 ips = malloc(ips_size);
326
327 int *ip = ips;
328 int code[16][16];
329
330 for(int row = 0; row < prow; row++)
331 for(int col = 0; col < pcol; col++)
332 {
333 code[row][col] = ip - ips;
334 const signed char *cp = terms;
335 for(int t = 0; t < 64; t++)
336 {
337 const int y1 = *cp++, x1 = *cp++;
338 const int y2 = *cp++, x2 = *cp++;
339 const int weight = *cp++;
340 const int grads = *cp++;
341 const int color = fcol(row + y1, col + x1, filters4, xtrans);
342 if(fcol(row + y2, col + x2, filters4, xtrans) != color) continue;
343 const int diag
344 = (fcol(row, col + 1, filters4, xtrans) == color && fcol(row + 1, col, filters4, xtrans) == color)
345 ? 2
346 : 1;
347 if(abs(y1 - y2) == diag && abs(x1 - x2) == diag) continue;
348 *ip++ = (y1 << 16) | (x1 & 0xffffu);
349 *ip++ = (y2 << 16) | (x2 & 0xffffu);
350 *ip++ = (color << 16) | (weight & 0xffffu);
351 for(int g = 0; g < 8; g++)
352 if(grads & 1 << g) *ip++ = g;
353 *ip++ = -1;
354 }
355 *ip++ = INT_MAX;
356 cp = chood;
357 for(int g = 0; g < 8; g++)
358 {
359 const int y = *cp++, x = *cp++;
360 *ip++ = (y << 16) | (x & 0xffffu);
361 const int color = fcol(row, col, filters4, xtrans);
362 if(fcol(row + y, col + x, filters4, xtrans) != color
363 && fcol(row + y * 2, col + x * 2, filters4, xtrans) == color)
364 {
365 *ip++ = (2*y << 16) | (2*x & 0xffffu);
366 *ip++ = color;
367 }
368 else
369 {
370 *ip++ = 0;
371 *ip++ = 0;
372 }
373 }
374 }
375
376
377 dev_lookup = dt_opencl_copy_host_to_device_constant(devid, lookup_size, lookup);
378 if(dev_lookup == NULL) goto error;
379
380 dev_code = dt_opencl_copy_host_to_device_constant(devid, sizeof(code), code);
381 if(dev_code == NULL) goto error;
382
383 dev_ips = dt_opencl_copy_host_to_device_constant(devid, ips_size, ips);
384 if(dev_ips == NULL) goto error;
385
386 // green equilibration for Bayer sensors
387 if(piece->dsc_in.filters != 9u && data->green_eq != DT_IOP_GREEN_EQ_NO)
388 {
389 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float));
390 if(dev_green_eq == NULL) goto error;
391
392 if(!green_equilibration_cl(self, pipe, piece, dev_in, dev_green_eq, roi_in))
393 goto error;
394
395 dev_in = dev_green_eq;
396 }
397
398 int width = roi_out->width;
399 int height = roi_out->height;
400
401 dev_aux = dev_out;
402
403 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, sizeof(float) * 4);
404 if(dev_tmp == NULL) goto error;
405
406 {
407 // manage borders for linear interpolation part
408 const int border = 1;
409
410 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
411 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
412 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 1, sizeof(cl_mem), (void *)&dev_tmp);
413 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 2, sizeof(int), (void *)&width);
414 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 3, sizeof(int), (void *)&height);
415 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 4, sizeof(int), (void *)&border);
416 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 5, sizeof(int), (void *)&roi_in->x);
417 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 6, sizeof(int), (void *)&roi_in->y);
418 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 7, sizeof(uint32_t), (void *)&filters4);
419 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
420 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_border_interpolate, sizes);
421 if(err != CL_SUCCESS) goto error;
422 }
423
424 {
425 // do linear interpolation
426 dt_opencl_local_buffer_t locopt
427 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
428 .cellsize = 1 * sizeof(float), .overhead = 0,
429 .sizex = 1 << 8, .sizey = 1 << 8 };
430
431 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_vng_lin_interpolate, &locopt))
432 goto error;
433
434 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
435 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
436 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
437 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 1, sizeof(cl_mem), (void *)&dev_tmp);
438 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 2, sizeof(int), (void *)&width);
439 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 3, sizeof(int), (void *)&height);
440 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 4, sizeof(uint32_t), (void *)&filters4);
441 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 5, sizeof(cl_mem), (void *)&dev_lookup);
442 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_lin_interpolate, 6,
443 sizeof(float) * (locopt.sizex + 2) * (locopt.sizey + 2), NULL);
444 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_vng_lin_interpolate, sizes, local);
445 if(err != CL_SUCCESS) goto error;
446 }
447
448 {
449 // do full VNG interpolation
450 dt_opencl_local_buffer_t locopt
451 = (dt_opencl_local_buffer_t){ .xoffset = 2*2, .xfactor = 1, .yoffset = 2*2, .yfactor = 1,
452 .cellsize = 4 * sizeof(float), .overhead = 0,
453 .sizex = 1 << 8, .sizey = 1 << 8 };
454
455 if(!dt_opencl_local_buffer_opt(devid, gd->kernel_vng_interpolate, &locopt))
456 goto error;
457
458 size_t sizes[3] = { ROUNDUP(width, locopt.sizex), ROUNDUP(height, locopt.sizey), 1 };
459 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
460 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 0, sizeof(cl_mem), (void *)&dev_tmp);
461 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 1, sizeof(cl_mem), (void *)&dev_aux);
462 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 2, sizeof(int), (void *)&width);
463 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 3, sizeof(int), (void *)&height);
464 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 4, sizeof(int), (void *)&roi_in->x);
465 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 5, sizeof(int), (void *)&roi_in->y);
466 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 6, sizeof(uint32_t), (void *)&filters4);
467 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 7, 4*sizeof(float), (void *)processed_maximum);
468 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
469 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 9, sizeof(cl_mem), (void *)&dev_ips);
470 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 10, sizeof(cl_mem), (void *)&dev_code);
471 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_interpolate, 11, sizeof(float) * 4 * (locopt.sizex + 4) * (locopt.sizey + 4), NULL);
472 err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_vng_interpolate, sizes, local);
473 if(err != CL_SUCCESS) goto error;
474 }
475
476 {
477 // manage borders
478 const int border = 2;
479
480 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
481 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 0, sizeof(cl_mem), (void *)&dev_in);
482 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 1, sizeof(cl_mem), (void *)&dev_aux);
483 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 2, sizeof(int), (void *)&width);
484 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 3, sizeof(int), (void *)&height);
485 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 4, sizeof(int), (void *)&border);
486 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 5, sizeof(int), (void *)&roi_in->x);
487 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 6, sizeof(int), (void *)&roi_in->y);
488 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 7, sizeof(uint32_t), (void *)&filters4);
489 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_border_interpolate, 8, sizeof(cl_mem), (void *)&dev_xtrans);
490 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_border_interpolate, sizes);
491 if(err != CL_SUCCESS) goto error;
492 }
493
494 if(filters4 != 9)
495 {
496 // for Bayer sensors mix the two green channels
497 size_t origin[] = { 0, 0, 0 };
498 size_t region[] = { width, height, 1 };
499 err = dt_opencl_enqueue_copy_image(devid, dev_aux, dev_tmp, origin, origin, region);
500 if(err != CL_SUCCESS) goto error;
501
502 size_t sizes[3] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
503 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 0, sizeof(cl_mem), (void *)&dev_tmp);
504 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 1, sizeof(cl_mem), (void *)&dev_aux);
505 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 2, sizeof(int), (void *)&width);
506 dt_opencl_set_kernel_arg(devid, gd->kernel_vng_green_equilibrate, 3, sizeof(int), (void *)&height);
507 err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_vng_green_equilibrate, sizes);
508 if(err != CL_SUCCESS) goto error;
509 }
510 dt_dev_write_rawdetail_mask_cl(pipe, piece, dev_aux, roi_in, DT_DEV_DETAIL_MASK_DEMOSAIC);
511
512 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
513 dev_aux = NULL;
514
516 dev_tmp = NULL;
517
519 dev_xtrans = NULL;
520
522 dev_lookup = NULL;
523
525
527 dev_code = NULL;
528
530 dev_ips = NULL;
531
532 dt_opencl_release_mem_object(dev_green_eq);
533 dev_green_eq = NULL;
534
535 dt_free(ips);
536
537 // color smoothing
538 if((data->color_smoothing) && smooth)
539 {
540 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
541 goto error;
542 }
543
544 return TRUE;
545
546error:
547 if(dev_aux != dev_out) dt_opencl_release_mem_object(dev_aux);
554 dt_opencl_release_mem_object(dev_green_eq);
555 dt_free(ips);
556 dt_print(DT_DEBUG_OPENCL, "[opencl_demosaic] couldn't enqueue kernel! %d\n", err);
557 return FALSE;
558}
559#endif // HAVE_OPENCL
560
561// clang-format off
562// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
563// vim: shiftwidth=2 expandtab tabstop=2 cindent
564// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
565// clang-format on
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
static void lin_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint8_t(*const xtrans)[6])
Definition basic.c:21
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
static float lookup(read_only image2d_t lut, const float x)
Definition color_conversion.h:84
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const float g
Definition colorspaces_inline_conversions.h:925
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
static const int row
Definition colorspaces_inline_conversions.h:175
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1530
@ DT_DEBUG_OPENCL
Definition darktable.h:642
#define dt_pixelpipe_cache_alloc_align_cache(size, id)
Definition darktable.h:357
#define dt_free(ptr)
Definition darktable.h:380
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:132
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:140
static void weight(const float *c1, const float *c2, const float sharpen, dt_aligned_pixel_t weight)
Definition eaw.c:33
const dt_collection_filter_flag_t colors[6]
Definition filter.c:303
#define FILTERS_ARE_4BAYER(filters)
Definition imageio.h:54
static int fcol(const int row, const int col, const uint32_t filters, const uint8_t(*const xtrans)[6])
Definition imageop_math.h:233
static const float x
Definition iop_profile.h:239
const int t
Definition iop_profile.h:227
size_t size
Definition mipmap_cache.c:3
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:574
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:570
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:619
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:578
Definition pixelpipe_hb.h:95
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:96
Definition pixelpipe_hb.h:216
Definition demosaic.c:217
Definition demosaic.c:159
Definition imageop.h:216
Definition imageop.h:67
int x
Definition imageop.h:68
int width
Definition imageop.h:68
int height
Definition imageop.h:68
int y
Definition imageop.h:68
static int vng_interpolate(float *out, const float *const in, const dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters, const uint8_t(*const xtrans)[6], const int only_vng_linear)
Definition vng.c:33