54 #define RCD_TILESIZE 112
74 #pragma GCC push_options
75 #pragma GCC optimize ("fast-math", "fp-contract=fast", "finite-math-only", "no-math-errno")
80#define RCD_TILEVALID (RCD_TILESIZE - 2 * RCD_BORDER)
81#define w1 RCD_TILESIZE
82#define w2 (2 * RCD_TILESIZE)
83#define w3 (3 * RCD_TILESIZE)
84#define w4 (4 * RCD_TILESIZE)
92 return fmaxf(0.0f,
a) * scale;
98 const int border = margin + 3;
101 for(
int j = 0; j <
height; j++)
107 memset(sum, 0,
sizeof(
float) * 8);
108 for(
int y = j - 1; y != j + 2; y++)
110 for(
int x =
i - 1;
x !=
i + 2;
x++)
114 const int f =
FC(y,
x, filters);
115 sum[
f] += fmaxf(0.0f, in[(
size_t)y *
width +
x]);
120 const int f =
FC(j,
i, filters);
121 for(
int c = 0;
c < 3;
c++)
123 if(
c !=
f && sum[
c + 4] > 0.0f)
124 out[4 * ((size_t)j *
width +
i) +
c] = sum[
c] / sum[
c + 4];
126 out[4 * ((size_t)j *
width +
i) +
c] = fmaxf(0.0f, in[(
size_t)j *
width +
i]);
131 const float *input = in;
134#pragma omp parallel for default(none) \
135 dt_omp_firstprivate(filters, out, width, height, border) \
139 for(
int j = 3; j <
height - 3; j++)
141 float *buf =
out + (size_t)4 *
width * j + 4 * 3;
142 const float *buf_in = input + (size_t)
width * j + 3;
145 if(
i == border && j >= border && j <
height - border)
148 buf =
out + (size_t)4 *
width * j + 4 *
i;
149 buf_in = input + (size_t)
width * j +
i;
153 const int c =
FC(j,
i, filters);
154 dt_aligned_pixel_t color;
155 const float pc = fmaxf(0.0f, buf_in[0]);
159 const float pym = fmaxf(0.0f, buf_in[-
width * 1]);
160 const float pym2 = fmaxf(0.0f, buf_in[-
width * 2]);
161 const float pym3 = fmaxf(0.0f, buf_in[-
width * 3]);
162 const float pyM = fmaxf(0.0f, buf_in[+
width * 1]);
163 const float pyM2 = fmaxf(0.0f, buf_in[+
width * 2]);
164 const float pyM3 = fmaxf(0.0f, buf_in[+
width * 3]);
165 const float pxm = fmaxf(0.0f, buf_in[-1]);
166 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
167 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
168 const float pxM = fmaxf(0.0f, buf_in[+1]);
169 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
170 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
172 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
173 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
174 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
175 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
176 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
177 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
181 const float m = fminf(pym, pyM);
182 const float M = fmaxf(pym, pyM);
183 color[1] = fmaxf(fminf(guessy * .25f,
M),
m);
187 const float m = fminf(pxm, pxM);
188 const float M = fmaxf(pxm, pxM);
189 color[1] = fmaxf(fminf(guessx * .25f,
M),
m);
196 memcpy(buf, color,
sizeof(
float) * 4);
203#pragma omp parallel for default(none) \
204 dt_omp_firstprivate(filters, out, width, height, margin) \
207 for(
int j = 1; j <
height - 1; j++)
209 float *buf =
out + (size_t)4 *
width * j + 4;
212 if(
i == margin && j >= margin && j <
height - margin)
217 const int c =
FC(j,
i, filters);
218 dt_aligned_pixel_t color = { buf[0], buf[1], buf[2], buf[3] };
219 const int linesize = 4 *
width;
222 if(__builtin_expect(
c & 1, 1))
226 const float *nt = buf - linesize;
227 const float *nb = buf + linesize;
228 const float *nl = buf - 4;
229 const float *nr = buf + 4;
230 if(
FC(j,
i + 1, filters) == 0)
232 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
233 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
238 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
239 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
245 const float *ntl = buf - 4 - linesize;
246 const float *ntr = buf + 4 - linesize;
247 const float *nbl = buf - 4 + linesize;
248 const float *nbr = buf + 4 + linesize;
253 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
254 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
255 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
256 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
258 color[2] = guess2 * .5f;
259 else if(diff1 < diff2)
260 color[2] = guess1 * .5f;
262 color[2] = (guess1 + guess2) * .25f;
266 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
267 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
268 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
269 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
271 color[0] = guess2 * .5f;
272 else if(diff1 < diff2)
273 color[0] = guess1 * .5f;
275 color[0] = (guess1 + guess2) * .25f;
278 memcpy(buf, color,
sizeof(
float) * 4);
285 #pragma omp declare simd aligned(in, out)
288 const dt_iop_roi_t *
const roi_in,
const uint32_t filters)
302 const float revscaler = 1.0f / scaler;
308 #pragma omp parallel \
309 dt_omp_firstprivate(width, height, filters, out, in, scaler, revscaler)
332 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
334 for(
int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
336 for(
int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
356 for(
int row = rowStart;
row < rowEnd;
row++)
358 const int c0 =
FC(
row, colStart, filters);
359 const int c1 =
FC(
row, colStart + 1, filters);
360 for(
int col = colStart, indx = (
row - rowStart) *
RCD_TILESIZE, in_indx =
row *
width + colStart; col < colEnd; col++, indx++, in_indx++)
362 cfa[indx] =
rgb[c0][indx] =
rgb[
c1][indx] =
safe_in(in[in_indx], revscaler);
371 for(
int col = 4, indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
373 bufferV[
row - 3][col - 4] =
sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
382 float* V0 = bufferV[0];
383 float* V1 = bufferV[1];
384 float* V2 = bufferV[2];
385 for(
int row = 4;
row < tileRows - 4;
row++ )
387 for(
int col = 3, indx =
row *
RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
389 bufferH[col - 3] =
sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
391 for(
int col = 4, indx = (
row + 1) *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
393 V2[col - 4] =
sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
395 for(
int col = 4, indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
397 const float V_Stat = fmaxf(
epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
398 const float H_Stat = fmaxf(
epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
399 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
402 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
407 for(
int row = 2;
row < tileRows - 2;
row++)
409 for(
int col = 2 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
411 lpf[lp_indx] = cfa[indx]
412 + 0.5f * (cfa[indx -
w1] + cfa[indx +
w1] + cfa[indx - 1] + cfa[indx + 1])
413 + 0.25f * (cfa[indx -
w1 - 1] + cfa[indx -
w1 + 1] + cfa[indx +
w1 - 1] + cfa[indx +
w1 + 1]);
419 for(
int row = 4;
row < tileRows - 4;
row++)
421 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
423 const float cfai = cfa[indx];
426 const float N_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx -
w2]) + fabs(cfa[indx -
w1] - cfa[indx -
w3]) + fabs(cfa[indx -
w2] - cfa[indx -
w4]);
427 const float S_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx +
w2]) + fabs(cfa[indx +
w1] - cfa[indx +
w3]) + fabs(cfa[indx +
w2] - cfa[indx +
w4]);
428 const float W_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
429 const float E_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
432 const float lpfi = lpf[lpindx];
433 const float N_Est = cfa[indx -
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx -
w1]);
434 const float S_Est = cfa[indx +
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx +
w1]);
435 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx - 1]);
436 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx + 1]);
439 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
440 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
444 const float VH_Central_Value = VH_Dir[indx];
445 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
446 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
448 rgb[1][indx] =
intp(VH_Disc, H_Est, V_Est);
455 for(
int row = 3;
row < tileRows - 3;
row++)
457 for(
int col = 3, indx =
row *
RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
459 P_CDiff_Hpf[indx2] =
sqf((cfa[indx -
w3 - 3] - cfa[indx -
w1 - 1] - cfa[indx +
w1 + 1] + cfa[indx +
w3 + 3]) - 3.0f * (cfa[indx -
w2 - 2] + cfa[indx +
w2 + 2]) + 6.0f * cfa[indx]);
460 Q_CDiff_Hpf[indx2] =
sqf((cfa[indx -
w3 + 3] - cfa[indx -
w1 + 1] - cfa[indx +
w1 - 1] + cfa[indx +
w3 - 3]) - 3.0f * (cfa[indx -
w2 + 2] + cfa[indx +
w2 - 2]) + 6.0f * cfa[indx]);
464 for(
int row = 4;
row < tileRows - 4;
row++)
466 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx -
w1 - 1) / 2, indx4 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
468 const float P_Stat = fmaxf(
epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
469 const float Q_Stat = fmaxf(
epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
470 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
475 for(
int row = 4;
row < tileRows - 4;
row++)
477 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col,
c = 2 -
FC(
row, col, filters), pqindx = indx / 2, pqindx2 = (indx -
w1 - 1) / 2, pqindx3 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
480 const float PQ_Central_Value = PQ_Dir[pqindx];
481 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
483 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
486 const float NW_Grad =
eps + fabs(
rgb[
c][indx -
w1 - 1] -
rgb[
c][indx +
w1 + 1]) + fabs(
rgb[
c][indx -
w1 - 1] -
rgb[
c][indx -
w3 - 3]) + fabs(
rgb[1][indx] -
rgb[1][indx -
w2 - 2]);
487 const float NE_Grad =
eps + fabs(
rgb[
c][indx -
w1 + 1] -
rgb[
c][indx +
w1 - 1]) + fabs(
rgb[
c][indx -
w1 + 1] -
rgb[
c][indx -
w3 + 3]) + fabs(
rgb[1][indx] -
rgb[1][indx -
w2 + 2]);
488 const float SW_Grad =
eps + fabs(
rgb[
c][indx -
w1 + 1] -
rgb[
c][indx +
w1 - 1]) + fabs(
rgb[
c][indx +
w1 - 1] -
rgb[
c][indx +
w3 - 3]) + fabs(
rgb[1][indx] -
rgb[1][indx +
w2 - 2]);
489 const float SE_Grad =
eps + fabs(
rgb[
c][indx -
w1 - 1] -
rgb[
c][indx +
w1 + 1]) + fabs(
rgb[
c][indx +
w1 + 1] -
rgb[
c][indx +
w3 + 3]) + fabs(
rgb[1][indx] -
rgb[1][indx +
w2 + 2]);
492 const float NW_Est =
rgb[
c][indx -
w1 - 1] -
rgb[1][indx -
w1 - 1];
493 const float NE_Est =
rgb[
c][indx -
w1 + 1] -
rgb[1][indx -
w1 + 1];
494 const float SW_Est =
rgb[
c][indx +
w1 - 1] -
rgb[1][indx +
w1 - 1];
495 const float SE_Est =
rgb[
c][indx +
w1 + 1] -
rgb[1][indx +
w1 + 1];
498 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
499 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
502 rgb[
c][indx] =
rgb[1][indx] +
intp(PQ_Disc, Q_Est, P_Est);
507 for(
int row = 4;
row < tileRows - 4;
row++)
509 for(
int col = 4 + (
FC(
row, 1, filters) & 1), indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
512 const float VH_Central_Value = VH_Dir[indx];
513 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
514 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
515 const float rgb1 =
rgb[1][indx];
516 const float N1 =
eps + fabs(rgb1 -
rgb[1][indx -
w2]);
517 const float S1 =
eps + fabs(rgb1 -
rgb[1][indx +
w2]);
518 const float W1 =
eps + fabs(rgb1 -
rgb[1][indx - 2]);
519 const float E1 =
eps + fabs(rgb1 -
rgb[1][indx + 2]);
521 const float rgb1mw1 =
rgb[1][indx -
w1];
522 const float rgb1pw1 =
rgb[1][indx +
w1];
523 const float rgb1m1 =
rgb[1][indx - 1];
524 const float rgb1p1 =
rgb[1][indx + 1];
526 for(
int c = 0;
c <= 2;
c += 2)
528 const float SNabs = fabs(
rgb[
c][indx -
w1] -
rgb[
c][indx +
w1]);
529 const float EWabs = fabs(
rgb[
c][indx - 1] -
rgb[
c][indx + 1]);
532 const float N_Grad = N1 + SNabs + fabs(
rgb[
c][indx -
w1] -
rgb[
c][indx -
w3]);
533 const float S_Grad = S1 + SNabs + fabs(
rgb[
c][indx +
w1] -
rgb[
c][indx +
w3]);
534 const float W_Grad = W1 + EWabs + fabs(
rgb[
c][indx - 1] -
rgb[
c][indx - 3]);
535 const float E_Grad = E1 + EWabs + fabs(
rgb[
c][indx + 1] -
rgb[
c][indx + 3]);
538 const float N_Est =
rgb[
c][indx -
w1] - rgb1mw1;
539 const float S_Est =
rgb[
c][indx +
w1] - rgb1pw1;
540 const float W_Est =
rgb[
c][indx - 1] - rgb1m1;
541 const float E_Est =
rgb[
c][indx + 1] - rgb1p1;
544 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
545 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
548 rgb[
c][indx] = rgb1 +
intp(VH_Disc, H_Est, V_Est);
555 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ?
RCD_MARGIN :
RCD_BORDER);
557 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ?
RCD_MARGIN :
RCD_BORDER);
558 for(
int row = first_vertical;
row < last_vertical;
row++)
560 for(
int col = first_horizontal, idx = (
row - rowStart) *
RCD_TILESIZE + col - colStart, o_idx = (
row *
width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
562 out[o_idx] = scaler * fmaxf(0.0f,
rgb[0][idx]);
563 out[o_idx+1] = scaler * fmaxf(0.0f,
rgb[1][idx]);
564 out[o_idx+2] = scaler * fmaxf(0.0f,
rgb[2][idx]);
584 const gboolean smooth)
589 const int devid = pipe->devid;
591 cl_mem dev_aux = NULL;
592 cl_mem dev_tmp = NULL;
593 cl_mem dev_green_eq = NULL;
598 cl_mem VH_dir = NULL;
599 cl_mem PQ_dir = NULL;
600 cl_mem VP_diff = NULL;
601 cl_mem HQ_diff = NULL;
605 int width = roi_out->width;
606 int height = roi_out->height;
611 dev_green_eq = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height,
sizeof(
float));
612 if(dev_green_eq == NULL)
goto error;
613 if(!green_equilibration_cl(self, pipe, piece, dev_in, dev_green_eq, roi_in))
goto error;
614 dev_in = dev_green_eq;
620 dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height,
sizeof(
float) * 4);
621 if(dev_tmp == NULL)
goto error;
624 const int myborder = 3;
625 size_t sizes[3] = { ROUNDUPDWD(
width, devid), ROUNDUPDHT(
height, devid), 1 };
633 if(err != CL_SUCCESS)
goto error;
637 dt_opencl_local_buffer_t locopt
638 = (dt_opencl_local_buffer_t){ .xoffset = 2*3, .xfactor = 1, .yoffset = 2*3, .yfactor = 1,
639 .cellsize =
sizeof(float) * 1, .overhead = 0,
640 .sizex = 64, .sizey = 64 };
643 const int myborder = 32;
644 size_t sizes[3] = { ROUNDUP(
width, locopt.sizex), ROUNDUP(
height, locopt.sizey), 1 };
645 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
654 if(err != CL_SUCCESS)
goto error;
658 dt_opencl_local_buffer_t locopt
659 = (dt_opencl_local_buffer_t){ .xoffset = 2*1, .xfactor = 1, .yoffset = 2*1, .yfactor = 1,
660 .cellsize = 4 *
sizeof(float), .overhead = 0,
661 .sizex = 64, .sizey = 64 };
664 const int myborder = 16;
665 size_t sizes[3] = { ROUNDUP(
width, locopt.sizex), ROUNDUP(
height, locopt.sizey), 1 };
666 size_t local[3] = { locopt.sizex, locopt.sizey, 1 };
675 if(err != CL_SUCCESS)
goto error;
680 cfa = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
681 if(cfa == NULL)
goto error;
682 VH_dir = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
683 if(VH_dir == NULL)
goto error;
684 PQ_dir = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
685 if(PQ_dir == NULL)
goto error;
686 VP_diff = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
687 if(VP_diff == NULL)
goto error;
688 HQ_diff = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
689 if(HQ_diff == NULL)
goto error;
690 rgb0 = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
691 if(rgb0 == NULL)
goto error;
692 rgb1 = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
693 if(rgb1 == NULL)
goto error;
694 rgb2 = dt_opencl_alloc_device_buffer(devid,
sizeof(
float) * roi_in->width * roi_in->height);
695 if(rgb2 == NULL)
goto error;
699 size_t sizes[3] = { ROUNDUPDWD(
width, devid), ROUNDUPDHT(
height, devid), 1 };
700 const float scaler = 1.0f / fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
711 if(err != CL_SUCCESS)
goto error;
716 size_t sizes[3] = { ROUNDUPDWD(
width, devid), ROUNDUPDHT(
height, devid), 1 };
723 if(err != CL_SUCCESS)
goto error;
728 size_t sizes[3] = { ROUNDUPDWD(
width, devid), ROUNDUPDHT(
height, devid), 1 };
735 if(err != CL_SUCCESS)
goto error;
740 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
747 if(err != CL_SUCCESS)
goto error;
752 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
761 if(err != CL_SUCCESS)
goto error;
766 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
774 if(err != CL_SUCCESS)
goto error;
779 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
787 if(err != CL_SUCCESS)
goto error;
792 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
801 if(err != CL_SUCCESS)
goto error;
806 size_t sizes[3] = { ROUNDUPDWD(
width / 2, devid), ROUNDUPDHT(
height, devid), 1 };
815 if(err != CL_SUCCESS)
goto error;
817 const float scaler = fmaxf(piece->dsc_in.processed_maximum[0], fmaxf(piece->dsc_in.processed_maximum[1], piece->dsc_in.processed_maximum[2]));
821 const int myborder = 6;
822 size_t sizes[3] = { ROUNDUPDWD(
width, devid), ROUNDUPDHT(
height, devid), 1 };
832 if(err != CL_SUCCESS)
goto error;
844 dev_green_eq = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
852 if((data->color_smoothing) && smooth)
854 if(!color_smoothing_cl(self, pipe, piece, dev_out, dev_out, roi_out, data->color_smoothing))
872 dev_aux = dev_green_eq = dev_tmp = cfa = rgb0 = rgb1 = rgb2 = VH_dir = PQ_dir = VP_diff = HQ_diff = NULL;
880 #pragma GCC pop_options
static void error(char *msg)
Definition ashift_lsd.c:202
#define TRUE
Definition ashift_lsd.c:162
#define FALSE
Definition ashift_lsd.c:158
#define m
Definition basecurve.c:277
int width
Definition bilateral.h:1
int height
Definition bilateral.h:1
#define INLINE
Definition cacorrect.c:167
static float intp(const float a, const float b, const float c)
Definition cacorrect.c:180
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
static const dt_colormatrix_t M
Definition colorspaces_inline_conversions.h:933
const float a
Definition colorspaces_inline_conversions.h:1292
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
static const int row
Definition colorspaces_inline_conversions.h:175
static dt_aligned_pixel_t rgb
Definition colorspaces_inline_conversions.h:530
void dt_control_log(const char *msg,...)
Definition control.c:530
void dt_print(dt_debug_thread_t thread, const char *msg,...)
Definition darktable.c:1530
#define DT_ALIGNED_PIXEL
Definition darktable.h:313
@ DT_DEBUG_OPENCL
Definition darktable.h:642
#define dt_pixelpipe_cache_alloc_align_float_cache(pixels, id)
Definition darktable.h:371
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
static int FC(const int row, const int col, const unsigned int filters)
Definition data/kernels/common.h:47
@ DT_IOP_GREEN_EQ_NO
Definition demosaic.c:132
@ DT_DEV_DETAIL_MASK_DEMOSAIC
Definition develop.h:140
static const float x
Definition iop_profile.h:239
static float sqf(const float x)
Definition math.h:223
static int dt_opencl_enqueue_kernel_2d(const int dev, const int kernel, const size_t *sizes)
Definition opencl.h:574
static int dt_opencl_set_kernel_arg(const int dev, const int kernel, const size_t size, const void *arg)
Definition opencl.h:570
static void dt_opencl_release_mem_object(void *mem)
Definition opencl.h:619
static int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, const size_t *sizes, const size_t *local)
Definition opencl.h:578
static INLINE float safe_in(float a, float scale)
Definition rcd.c:90
#define w2
Definition rcd.c:82
#define RCD_TILESIZE
Definition rcd.c:54
static void rcd_ppg_border(float *const out, const float *const in, const int width, const int height, const uint32_t filters, const int margin)
Definition rcd.c:96
#define w1
Definition rcd.c:81
#define w4
Definition rcd.c:84
#define eps
Definition rcd.c:86
#define w3
Definition rcd.c:83
#define epssq
Definition rcd.c:87
static void rcd_demosaic(const dt_dev_pixelpipe_iop_t *piece, float *const restrict out, const float *const restrict in, dt_iop_roi_t *const roi_out, const dt_iop_roi_t *const roi_in, const uint32_t filters)
Definition rcd.c:287
#define RCD_BORDER
Definition rcd.c:78
#define RCD_TILEVALID
Definition rcd.c:80
#define RCD_MARGIN
Definition rcd.c:79
Definition pixelpipe_hb.h:95
dt_iop_buffer_dsc_t dsc_in
Definition pixelpipe_hb.h:140
struct dt_iop_module_t *void * data
Definition pixelpipe_hb.h:96
Definition pixelpipe_hb.h:216
dt_aligned_pixel_t processed_maximum
Definition develop/format.h:73
Definition demosaic.c:217
Definition demosaic.c:159
int kernel_rcd_step_5_1
Definition demosaic.c:206
int kernel_rcd_write_output
Definition demosaic.c:199
int kernel_rcd_step_1_1
Definition demosaic.c:200
int kernel_rcd_step_4_2
Definition demosaic.c:205
int kernel_rcd_step_5_2
Definition demosaic.c:207
int kernel_rcd_step_4_1
Definition demosaic.c:204
int kernel_rcd_border_redblue
Definition demosaic.c:208
int kernel_rcd_step_3_1
Definition demosaic.c:203
int kernel_rcd_step_1_2
Definition demosaic.c:201
int kernel_rcd_border_green
Definition demosaic.c:209
int kernel_rcd_step_2_1
Definition demosaic.c:202
int kernel_rcd_populate
Definition demosaic.c:198
int kernel_border_interpolate
Definition demosaic.c:172
int width
Definition imageop.h:68
int height
Definition imageop.h:68
#define c1
Definition colorspaces_inline_conversions.h:1054
#define MIN(a, b)
Definition thinplate.c:32