94 const int border = margin + 3;
98 for(
int j = 0; j <
height; j++)
103 memset(sum, 0,
sizeof(
float) * 8);
104 for(
int y = j - 1; y != j + 2; y++)
106 for(
int x =
i - 1;
x !=
i + 2;
x++)
110 const int f =
FC(y,
x, filters);
111 sum[
f] += fmaxf(0.0f, in[(
size_t)y *
width +
x]);
116 const int f =
FC(j,
i, filters);
117 for(
int c = 0; c < 3; c++)
119 if(c !=
f && sum[c + 4] > 0.0f)
120 out[4 * ((size_t)j *
width +
i) + c] = sum[c] / sum[c + 4];
122 out[4 * ((size_t)j *
width +
i) + c] = fmaxf(0.0f, in[(
size_t)j *
width +
i]);
128 const float *input = in;
130 for(
int j = 3; j <
height - 3; j++)
132 float *buf =
out + (size_t)4 *
width * j + 4 * 3;
133 const float *buf_in = input + (size_t)
width * j + 3;
136 if(
i == border && j >= border && j <
height - border)
139 buf =
out + (size_t)4 *
width * j + 4 *
i;
140 buf_in = input + (size_t)
width * j +
i;
144 const int c =
FC(j,
i, filters);
146 const float pc = fmaxf(0.0f, buf_in[0]);
150 const float pym = fmaxf(0.0f, buf_in[-
width * 1]);
151 const float pym2 = fmaxf(0.0f, buf_in[-
width * 2]);
152 const float pym3 = fmaxf(0.0f, buf_in[-
width * 3]);
153 const float pyM = fmaxf(0.0f, buf_in[+
width * 1]);
154 const float pyM2 = fmaxf(0.0f, buf_in[+
width * 2]);
155 const float pyM3 = fmaxf(0.0f, buf_in[+
width * 3]);
156 const float pxm = fmaxf(0.0f, buf_in[-1]);
157 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
158 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
159 const float pxM = fmaxf(0.0f, buf_in[+1]);
160 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
161 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
163 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
164 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
165 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
166 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
167 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
168 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
172 const float m = fminf(pym, pyM);
173 const float M = fmaxf(pym, pyM);
174 color[1] = fmaxf(fminf(guessy * .25f,
M),
m);
178 const float m = fminf(pxm, pxM);
179 const float M = fmaxf(pxm, pxM);
180 color[1] = fmaxf(fminf(guessx * .25f,
M),
m);
187 memcpy(buf, color,
sizeof(
float) * 4);
196 for(
int j = 1; j <
height - 1; j++)
198 float *buf =
out + (size_t)4 *
width * j + 4;
201 if(
i == margin && j >= margin && j <
height - margin)
206 const int c =
FC(j,
i, filters);
208 const int linesize = 4 *
width;
211 if(__builtin_expect(c & 1, 1))
215 const float *nt = buf - linesize;
216 const float *nb = buf + linesize;
217 const float *nl = buf - 4;
218 const float *nr = buf + 4;
219 if(
FC(j,
i + 1, filters) == 0)
221 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
222 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
227 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
228 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
234 const float *ntl = buf - 4 - linesize;
235 const float *ntr = buf + 4 - linesize;
236 const float *nbl = buf - 4 + linesize;
237 const float *nbr = buf + 4 + linesize;
242 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
243 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
244 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
245 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
247 color[2] = guess2 * .5f;
248 else if(diff1 < diff2)
249 color[2] = guess1 * .5f;
251 color[2] = (guess1 + guess2) * .25f;
255 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
256 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
257 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
258 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
260 color[0] = guess2 * .5f;
261 else if(diff1 < diff2)
262 color[0] = guess1 * .5f;
264 color[0] = (guess1 + guess2) * .25f;
267 memcpy(buf, color,
sizeof(
float) * 4);
275 const dt_iop_roi_t *
const roi_in,
const uint32_t filters)
289 const float revscaler = 1.0f / scaler;
319 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
321 for(
int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
323 for(
int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
343 for(
int row = rowStart;
row < rowEnd;
row++)
345 const int c0 =
FC(
row, colStart, filters);
346 const int c1 =
FC(
row, colStart + 1, filters);
347 for(
int col = colStart, indx = (
row - rowStart) *
RCD_TILESIZE, in_indx =
row *
width + colStart; col < colEnd; col++, indx++, in_indx++)
349 cfa[indx] =
rgb[c0][indx] =
rgb[
c1][indx] =
safe_in(in[in_indx], revscaler);
358 for(
int col = 4, indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
360 bufferV[
row - 3][col - 4] = sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
369 float* V0 = bufferV[0];
370 float* V1 = bufferV[1];
371 float* V2 = bufferV[2];
372 for(
int row = 4;
row < tileRows - 4;
row++ )
374 for(
int col = 3, indx =
row *
RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
376 bufferH[col - 3] = sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
378 for(
int col = 4, indx = (
row + 1) *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
380 V2[col - 4] = sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
382 for(
int col = 4, indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
384 const float V_Stat = fmaxf(
epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
385 const float H_Stat = fmaxf(
epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
386 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
389 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
394 for(
int row = 2;
row < tileRows - 2;
row++)
396 for(
int col = 2 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
398 lpf[lp_indx] = cfa[indx]
399 + 0.5f * (cfa[indx -
w1] + cfa[indx +
w1] + cfa[indx - 1] + cfa[indx + 1])
400 + 0.25f * (cfa[indx -
w1 - 1] + cfa[indx -
w1 + 1] + cfa[indx +
w1 - 1] + cfa[indx +
w1 + 1]);
406 for(
int row = 4;
row < tileRows - 4;
row++)
408 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
410 const float cfai = cfa[indx];
413 const float N_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx -
w2]) + fabs(cfa[indx -
w1] - cfa[indx -
w3]) + fabs(cfa[indx -
w2] - cfa[indx -
w4]);
414 const float S_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx +
w2]) + fabs(cfa[indx +
w1] - cfa[indx +
w3]) + fabs(cfa[indx +
w2] - cfa[indx +
w4]);
415 const float W_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
416 const float E_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
419 const float lpfi = lpf[lpindx];
420 const float N_Est = cfa[indx -
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx -
w1]);
421 const float S_Est = cfa[indx +
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx +
w1]);
422 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx - 1]);
423 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx + 1]);
426 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
427 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
431 const float VH_Central_Value = VH_Dir[indx];
432 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
433 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
435 rgb[1][indx] =
intp(VH_Disc, H_Est, V_Est);
442 for(
int row = 3;
row < tileRows - 3;
row++)
444 for(
int col = 3, indx =
row *
RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
446 P_CDiff_Hpf[indx2] = sqf((cfa[indx -
w3 - 3] - cfa[indx -
w1 - 1] - cfa[indx +
w1 + 1] + cfa[indx +
w3 + 3]) - 3.0f * (cfa[indx -
w2 - 2] + cfa[indx +
w2 + 2]) + 6.0f * cfa[indx]);
447 Q_CDiff_Hpf[indx2] = sqf((cfa[indx -
w3 + 3] - cfa[indx -
w1 + 1] - cfa[indx +
w1 - 1] + cfa[indx +
w3 - 3]) - 3.0f * (cfa[indx -
w2 + 2] + cfa[indx +
w2 - 2]) + 6.0f * cfa[indx]);
451 for(
int row = 4;
row < tileRows - 4;
row++)
453 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx -
w1 - 1) / 2, indx4 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
455 const float P_Stat = fmaxf(
epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
456 const float Q_Stat = fmaxf(
epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
457 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
462 for(
int row = 4;
row < tileRows - 4;
row++)
464 for(
int col = 4 + (
FC(
row, 0, filters) & 1), indx =
row *
RCD_TILESIZE + col, c = 2 -
FC(
row, col, filters), pqindx = indx / 2, pqindx2 = (indx -
w1 - 1) / 2, pqindx3 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
467 const float PQ_Central_Value = PQ_Dir[pqindx];
468 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
470 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
473 const float NW_Grad =
eps + fabs(
rgb[c][indx -
w1 - 1] -
rgb[c][indx +
w1 + 1]) + fabs(
rgb[c][indx -
w1 - 1] -
rgb[c][indx -
w3 - 3]) + fabs(
rgb[1][indx] -
rgb[1][indx -
w2 - 2]);
474 const float NE_Grad =
eps + fabs(
rgb[c][indx -
w1 + 1] -
rgb[c][indx +
w1 - 1]) + fabs(
rgb[c][indx -
w1 + 1] -
rgb[c][indx -
w3 + 3]) + fabs(
rgb[1][indx] -
rgb[1][indx -
w2 + 2]);
475 const float SW_Grad =
eps + fabs(
rgb[c][indx -
w1 + 1] -
rgb[c][indx +
w1 - 1]) + fabs(
rgb[c][indx +
w1 - 1] -
rgb[c][indx +
w3 - 3]) + fabs(
rgb[1][indx] -
rgb[1][indx +
w2 - 2]);
476 const float SE_Grad =
eps + fabs(
rgb[c][indx -
w1 - 1] -
rgb[c][indx +
w1 + 1]) + fabs(
rgb[c][indx +
w1 + 1] -
rgb[c][indx +
w3 + 3]) + fabs(
rgb[1][indx] -
rgb[1][indx +
w2 + 2]);
479 const float NW_Est =
rgb[c][indx -
w1 - 1] -
rgb[1][indx -
w1 - 1];
480 const float NE_Est =
rgb[c][indx -
w1 + 1] -
rgb[1][indx -
w1 + 1];
481 const float SW_Est =
rgb[c][indx +
w1 - 1] -
rgb[1][indx +
w1 - 1];
482 const float SE_Est =
rgb[c][indx +
w1 + 1] -
rgb[1][indx +
w1 + 1];
485 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
486 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
489 rgb[c][indx] =
rgb[1][indx] +
intp(PQ_Disc, Q_Est, P_Est);
494 for(
int row = 4;
row < tileRows - 4;
row++)
496 for(
int col = 4 + (
FC(
row, 1, filters) & 1), indx =
row *
RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
499 const float VH_Central_Value = VH_Dir[indx];
500 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
501 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
502 const float rgb1 =
rgb[1][indx];
503 const float N1 =
eps + fabs(rgb1 -
rgb[1][indx -
w2]);
504 const float S1 =
eps + fabs(rgb1 -
rgb[1][indx +
w2]);
505 const float W1 =
eps + fabs(rgb1 -
rgb[1][indx - 2]);
506 const float E1 =
eps + fabs(rgb1 -
rgb[1][indx + 2]);
508 const float rgb1mw1 =
rgb[1][indx -
w1];
509 const float rgb1pw1 =
rgb[1][indx +
w1];
510 const float rgb1m1 =
rgb[1][indx - 1];
511 const float rgb1p1 =
rgb[1][indx + 1];
513 for(
int c = 0; c <= 2; c += 2)
515 const float SNabs = fabs(
rgb[c][indx -
w1] -
rgb[c][indx +
w1]);
516 const float EWabs = fabs(
rgb[c][indx - 1] -
rgb[c][indx + 1]);
519 const float N_Grad = N1 + SNabs + fabs(
rgb[c][indx -
w1] -
rgb[c][indx -
w3]);
520 const float S_Grad = S1 + SNabs + fabs(
rgb[c][indx +
w1] -
rgb[c][indx +
w3]);
521 const float W_Grad = W1 + EWabs + fabs(
rgb[c][indx - 1] -
rgb[c][indx - 3]);
522 const float E_Grad = E1 + EWabs + fabs(
rgb[c][indx + 1] -
rgb[c][indx + 3]);
525 const float N_Est =
rgb[c][indx -
w1] - rgb1mw1;
526 const float S_Est =
rgb[c][indx +
w1] - rgb1pw1;
527 const float W_Est =
rgb[c][indx - 1] - rgb1m1;
528 const float E_Est =
rgb[c][indx + 1] - rgb1p1;
531 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
532 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
535 rgb[c][indx] = rgb1 +
intp(VH_Disc, H_Est, V_Est);
542 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ?
RCD_MARGIN :
RCD_BORDER);
544 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ?
RCD_MARGIN :
RCD_BORDER);
545 for(
int row = first_vertical;
row < last_vertical;
row++)
547 for(
int col = first_horizontal, idx = (
row - rowStart) *
RCD_TILESIZE + col - colStart, o_idx = (
row *
width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
549 out[o_idx] = scaler * fmaxf(0.0f,
rgb[0][idx]);
550 out[o_idx+1] = scaler * fmaxf(0.0f,
rgb[1][idx]);
551 out[o_idx+2] = scaler * fmaxf(0.0f,
rgb[2][idx]);