91static void rcd_ppg_border(
float *
const out,
const float *
const in,
const int width,
const int height,
const uint32_t filters,
const int margin)
93 const int border = margin + 3;
96 for(
int j = 0; j <
height; j++)
98 for(
int i = 0; i <
width; i++)
100 if(i == 3 && j >= 3 && j <
height - 3) i =
width - 3;
101 if(i ==
width)
break;
102 memset(sum, 0,
sizeof(
float) * 8);
103 for(
int y = j - 1; y != j + 2; y++)
105 for(
int x = i - 1; x != i + 2; x++)
107 if((y >= 0) && (x >= 0) && (y <
height) && (x <
width))
109 const int f =
FC(y, x, filters);
110 sum[
f] += fmaxf(0.0f, in[(
size_t)y *
width + x]);
115 const int f =
FC(j, i, filters);
116 for(
int c = 0; c < 3; c++)
118 if(c !=
f && sum[c + 4] > 0.0f)
119 out[4 * ((size_t)j *
width + i) + c] = sum[c] / sum[c + 4];
121 out[4 * ((size_t)j *
width + i) + c] = fmaxf(0.0f, in[(
size_t)j *
width + i]);
126 const float *input = in;
129#pragma omp parallel for default(none) \
130 dt_omp_firstprivate(filters, out, width, height, border) \
134 for(
int j = 3; j <
height - 3; j++)
136 float *buf = out + (size_t)4 *
width * j + 4 * 3;
137 const float *buf_in = input + (size_t)
width * j + 3;
138 for(
int i = 3; i <
width - 3; i++)
140 if(i == border && j >= border && j <
height - border)
143 buf = out + (size_t)4 *
width * j + 4 * i;
144 buf_in = input + (size_t)
width * j + i;
146 if(i ==
width)
break;
148 const int c =
FC(j, i, filters);
149 dt_aligned_pixel_t color;
150 const float pc = fmaxf(0.0f, buf_in[0]);
154 const float pym = fmaxf(0.0f, buf_in[-
width * 1]);
155 const float pym2 = fmaxf(0.0f, buf_in[-
width * 2]);
156 const float pym3 = fmaxf(0.0f, buf_in[-
width * 3]);
157 const float pyM = fmaxf(0.0f, buf_in[+
width * 1]);
158 const float pyM2 = fmaxf(0.0f, buf_in[+
width * 2]);
159 const float pyM3 = fmaxf(0.0f, buf_in[+
width * 3]);
160 const float pxm = fmaxf(0.0f, buf_in[-1]);
161 const float pxm2 = fmaxf(0.0f, buf_in[-2]);
162 const float pxm3 = fmaxf(0.0f, buf_in[-3]);
163 const float pxM = fmaxf(0.0f, buf_in[+1]);
164 const float pxM2 = fmaxf(0.0f, buf_in[+2]);
165 const float pxM3 = fmaxf(0.0f, buf_in[+3]);
167 const float guessx = (pxm + pc + pxM) * 2.0f - pxM2 - pxm2;
168 const float diffx = (fabsf(pxm2 - pc) + fabsf(pxM2 - pc) + fabsf(pxm - pxM)) * 3.0f
169 + (fabsf(pxM3 - pxM) + fabsf(pxm3 - pxm)) * 2.0f;
170 const float guessy = (pym + pc + pyM) * 2.0f - pyM2 - pym2;
171 const float diffy = (fabsf(pym2 - pc) + fabsf(pyM2 - pc) + fabsf(pym - pyM)) * 3.0f
172 + (fabsf(pyM3 - pyM) + fabsf(pym3 - pym)) * 2.0f;
176 const float m = fminf(pym, pyM);
177 const float M = fmaxf(pym, pyM);
178 color[1] = fmaxf(fminf(guessy * .25f, M),
m);
182 const float m = fminf(pxm, pxM);
183 const float M = fmaxf(pxm, pxM);
184 color[1] = fmaxf(fminf(guessx * .25f, M),
m);
191 memcpy(buf, color,
sizeof(
float) * 4);
198#pragma omp parallel for default(none) \
199 dt_omp_firstprivate(filters, out, width, height, margin) \
202 for(
int j = 1; j <
height - 1; j++)
204 float *buf = out + (size_t)4 *
width * j + 4;
205 for(
int i = 1; i <
width - 1; i++)
207 if(i == margin && j >= margin && j <
height - margin)
210 buf = out + (size_t)4 * (
width * j + i);
212 const int c =
FC(j, i, filters);
213 dt_aligned_pixel_t color = { buf[0], buf[1], buf[2], buf[3] };
214 const int linesize = 4 *
width;
217 if(__builtin_expect(c & 1, 1))
221 const float *nt = buf - linesize;
222 const float *nb = buf + linesize;
223 const float *nl = buf - 4;
224 const float *nr = buf + 4;
225 if(
FC(j, i + 1, filters) == 0)
227 color[2] = (nt[2] + nb[2] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
228 color[0] = (nl[0] + nr[0] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
233 color[0] = (nt[0] + nb[0] + 2.0f * color[1] - nt[1] - nb[1]) * .5f;
234 color[2] = (nl[2] + nr[2] + 2.0f * color[1] - nl[1] - nr[1]) * .5f;
240 const float *ntl = buf - 4 - linesize;
241 const float *ntr = buf + 4 - linesize;
242 const float *nbl = buf - 4 + linesize;
243 const float *nbr = buf + 4 + linesize;
248 const float diff1 = fabsf(ntl[2] - nbr[2]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
249 const float guess1 = ntl[2] + nbr[2] + 2.0f * color[1] - ntl[1] - nbr[1];
250 const float diff2 = fabsf(ntr[2] - nbl[2]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
251 const float guess2 = ntr[2] + nbl[2] + 2.0f * color[1] - ntr[1] - nbl[1];
253 color[2] = guess2 * .5f;
254 else if(diff1 < diff2)
255 color[2] = guess1 * .5f;
257 color[2] = (guess1 + guess2) * .25f;
261 const float diff1 = fabsf(ntl[0] - nbr[0]) + fabsf(ntl[1] - color[1]) + fabsf(nbr[1] - color[1]);
262 const float guess1 = ntl[0] + nbr[0] + 2.0f * color[1] - ntl[1] - nbr[1];
263 const float diff2 = fabsf(ntr[0] - nbl[0]) + fabsf(ntr[1] - color[1]) + fabsf(nbl[1] - color[1]);
264 const float guess2 = ntr[0] + nbl[0] + 2.0f * color[1] - ntr[1] - nbl[1];
266 color[0] = guess2 * .5f;
267 else if(diff1 < diff2)
268 color[0] = guess1 * .5f;
270 color[0] = (guess1 + guess2) * .25f;
273 memcpy(buf, color,
sizeof(
float) * 4);
283 const dt_iop_roi_t *
const roi_in,
const uint32_t filters)
296 const float scaler = fmaxf(piece->
pipe->dsc.processed_maximum[0], fmaxf(piece->
pipe->dsc.processed_maximum[1], piece->
pipe->dsc.processed_maximum[2]));
297 const float revscaler = 1.0f / scaler;
303 #pragma omp parallel \
304 dt_omp_firstprivate(width, height, filters, out, in, scaler, revscaler)
321 float *
const lpf = PQ_Dir;
326 #pragma omp for schedule(simd:dynamic, 6) collapse(2) nowait
328 for(
int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
330 for(
int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
350 for(
int row = rowStart; row < rowEnd; row++)
352 const int c0 =
FC(row, colStart, filters);
353 const int c1 =
FC(row, colStart + 1, filters);
354 for(
int col = colStart, indx = (row - rowStart) *
RCD_TILESIZE, in_indx = row *
width + colStart; col < colEnd; col++, indx++, in_indx++)
356 cfa[indx] = rgb[c0][indx] = rgb[
c1][indx] =
safe_in(in[in_indx], revscaler);
363 for(
int row = 3; row <
MIN(tileRows - 3, 5); row++ )
365 for(
int col = 4, indx = row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
367 bufferV[row - 3][col - 4] =
sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
376 float* V0 = bufferV[0];
377 float* V1 = bufferV[1];
378 float* V2 = bufferV[2];
379 for(
int row = 4; row < tileRows - 4; row++ )
381 for(
int col = 3, indx = row *
RCD_TILESIZE + col; col < tileCols - 3; col++, indx++)
383 bufferH[col - 3] =
sqf((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.0f * (cfa[indx - 2] + cfa[indx + 2]) + 6.0f * cfa[indx]);
385 for(
int col = 4, indx = (row + 1) *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++)
387 V2[col - 4] =
sqf((cfa[indx -
w3] - cfa[indx -
w1] - cfa[indx +
w1] + cfa[indx +
w3]) - 3.0f * (cfa[indx -
w2] + cfa[indx +
w2]) + 6.0f * cfa[indx]);
389 for(
int col = 4, indx = row *
RCD_TILESIZE + col; col < tileCols - 4; col++, indx++ )
391 const float V_Stat = fmaxf(
epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]);
392 const float H_Stat = fmaxf(
epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]);
393 VH_Dir[indx] = V_Stat / ( V_Stat + H_Stat );
396 float* tmp = V0; V0 = V1; V1 = V2; V2 = tmp;
401 for(
int row = 2; row < tileRows - 2; row++)
403 for(
int col = 2 + (
FC(row, 0, filters) & 1), indx = row *
RCD_TILESIZE + col, lp_indx = indx / 2; col < tileCols - 2; col += 2, indx +=2, lp_indx++)
405 lpf[lp_indx] = cfa[indx]
406 + 0.5f * (cfa[indx -
w1] + cfa[indx +
w1] + cfa[indx - 1] + cfa[indx + 1])
407 + 0.25f * (cfa[indx -
w1 - 1] + cfa[indx -
w1 + 1] + cfa[indx +
w1 - 1] + cfa[indx +
w1 + 1]);
413 for(
int row = 4; row < tileRows - 4; row++)
415 for(
int col = 4 + (
FC(row, 0, filters) & 1), indx = row *
RCD_TILESIZE + col, lpindx = indx / 2; col < tileCols - 4; col += 2, indx += 2, lpindx++)
417 const float cfai = cfa[indx];
420 const float N_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx -
w2]) + fabs(cfa[indx -
w1] - cfa[indx -
w3]) + fabs(cfa[indx -
w2] - cfa[indx -
w4]);
421 const float S_Grad =
eps + fabs(cfa[indx -
w1] - cfa[indx +
w1]) + fabs(cfai - cfa[indx +
w2]) + fabs(cfa[indx +
w1] - cfa[indx +
w3]) + fabs(cfa[indx +
w2] - cfa[indx +
w4]);
422 const float W_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx - 2]) + fabs(cfa[indx - 1] - cfa[indx - 3]) + fabs(cfa[indx - 2] - cfa[indx - 4]);
423 const float E_Grad =
eps + fabs(cfa[indx - 1] - cfa[indx + 1]) + fabs(cfai - cfa[indx + 2]) + fabs(cfa[indx + 1] - cfa[indx + 3]) + fabs(cfa[indx + 2] - cfa[indx + 4]);
426 const float lpfi = lpf[lpindx];
427 const float N_Est = cfa[indx -
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx -
w1]);
428 const float S_Est = cfa[indx +
w1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx +
w1]);
429 const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx - 1]);
430 const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (
eps + lpfi + lpf[lpindx + 1]);
433 const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad);
434 const float H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad);
438 const float VH_Central_Value = VH_Dir[indx];
439 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
440 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value)) ? VH_Neighbourhood_Value : VH_Central_Value;
442 rgb[1][indx] =
intp(VH_Disc, H_Est, V_Est);
449 for(
int row = 3; row < tileRows - 3; row++)
451 for(
int col = 3, indx = row *
RCD_TILESIZE + col, indx2 = indx / 2; col < tileCols - 3; col+=2, indx+=2, indx2++)
453 P_CDiff_Hpf[indx2] =
sqf((cfa[indx -
w3 - 3] - cfa[indx -
w1 - 1] - cfa[indx +
w1 + 1] + cfa[indx +
w3 + 3]) - 3.0f * (cfa[indx -
w2 - 2] + cfa[indx +
w2 + 2]) + 6.0f * cfa[indx]);
454 Q_CDiff_Hpf[indx2] =
sqf((cfa[indx -
w3 + 3] - cfa[indx -
w1 + 1] - cfa[indx +
w1 - 1] + cfa[indx +
w3 - 3]) - 3.0f * (cfa[indx -
w2 + 2] + cfa[indx +
w2 - 2]) + 6.0f * cfa[indx]);
458 for(
int row = 4; row < tileRows - 4; row++)
460 for(
int col = 4 + (
FC(row, 0, filters) & 1), indx = row *
RCD_TILESIZE + col, indx2 = indx / 2, indx3 = (indx -
w1 - 1) / 2, indx4 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ )
462 const float P_Stat = fmaxf(
epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]);
463 const float Q_Stat = fmaxf(
epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]);
464 PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);
469 for(
int row = 4; row < tileRows - 4; row++)
471 for(
int col = 4 + (
FC(row, 0, filters) & 1), indx = row *
RCD_TILESIZE + col, c = 2 -
FC(row, col, filters), pqindx = indx / 2, pqindx2 = (indx -
w1 - 1) / 2, pqindx3 = (indx +
w1 - 1) / 2; col < tileCols - 4; col += 2, indx += 2, pqindx++, pqindx2++, pqindx3++)
474 const float PQ_Central_Value = PQ_Dir[pqindx];
475 const float PQ_Neighbourhood_Value = 0.25f * (PQ_Dir[pqindx2] + PQ_Dir[pqindx2 + 1] + PQ_Dir[pqindx3] + PQ_Dir[pqindx3 + 1]);
477 const float PQ_Disc = (fabs(0.5f - PQ_Central_Value) < fabs(0.5f - PQ_Neighbourhood_Value)) ? PQ_Neighbourhood_Value : PQ_Central_Value;
480 const float NW_Grad =
eps + fabs(rgb[c][indx -
w1 - 1] - rgb[c][indx +
w1 + 1]) + fabs(rgb[c][indx -
w1 - 1] - rgb[c][indx -
w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx -
w2 - 2]);
481 const float NE_Grad =
eps + fabs(rgb[c][indx -
w1 + 1] - rgb[c][indx +
w1 - 1]) + fabs(rgb[c][indx -
w1 + 1] - rgb[c][indx -
w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx -
w2 + 2]);
482 const float SW_Grad =
eps + fabs(rgb[c][indx -
w1 + 1] - rgb[c][indx +
w1 - 1]) + fabs(rgb[c][indx +
w1 - 1] - rgb[c][indx +
w3 - 3]) + fabs(rgb[1][indx] - rgb[1][indx +
w2 - 2]);
483 const float SE_Grad =
eps + fabs(rgb[c][indx -
w1 - 1] - rgb[c][indx +
w1 + 1]) + fabs(rgb[c][indx +
w1 + 1] - rgb[c][indx +
w3 + 3]) + fabs(rgb[1][indx] - rgb[1][indx +
w2 + 2]);
486 const float NW_Est = rgb[c][indx -
w1 - 1] - rgb[1][indx -
w1 - 1];
487 const float NE_Est = rgb[c][indx -
w1 + 1] - rgb[1][indx -
w1 + 1];
488 const float SW_Est = rgb[c][indx +
w1 - 1] - rgb[1][indx +
w1 - 1];
489 const float SE_Est = rgb[c][indx +
w1 + 1] - rgb[1][indx +
w1 + 1];
492 const float P_Est = (NW_Grad * SE_Est + SE_Grad * NW_Est) / (NW_Grad + SE_Grad);
493 const float Q_Est = (NE_Grad * SW_Est + SW_Grad * NE_Est) / (NE_Grad + SW_Grad);
496 rgb[c][indx] = rgb[1][indx] +
intp(PQ_Disc, Q_Est, P_Est);
501 for(
int row = 4; row < tileRows - 4; row++)
503 for(
int col = 4 + (
FC(row, 1, filters) & 1), indx = row *
RCD_TILESIZE + col; col < tileCols - 4; col += 2, indx +=2)
506 const float VH_Central_Value = VH_Dir[indx];
507 const float VH_Neighbourhood_Value = 0.25f * (VH_Dir[indx -
w1 - 1] + VH_Dir[indx -
w1 + 1] + VH_Dir[indx +
w1 - 1] + VH_Dir[indx +
w1 + 1]);
508 const float VH_Disc = (fabs(0.5f - VH_Central_Value) < fabs(0.5f - VH_Neighbourhood_Value) ) ? VH_Neighbourhood_Value : VH_Central_Value;
509 const float rgb1 = rgb[1][indx];
510 const float N1 =
eps + fabs(rgb1 - rgb[1][indx -
w2]);
511 const float S1 =
eps + fabs(rgb1 - rgb[1][indx +
w2]);
512 const float W1 =
eps + fabs(rgb1 - rgb[1][indx - 2]);
513 const float E1 =
eps + fabs(rgb1 - rgb[1][indx + 2]);
515 const float rgb1mw1 = rgb[1][indx -
w1];
516 const float rgb1pw1 = rgb[1][indx +
w1];
517 const float rgb1m1 = rgb[1][indx - 1];
518 const float rgb1p1 = rgb[1][indx + 1];
520 for(
int c = 0; c <= 2; c += 2)
522 const float SNabs = fabs(rgb[c][indx -
w1] - rgb[c][indx +
w1]);
523 const float EWabs = fabs(rgb[c][indx - 1] - rgb[c][indx + 1]);
526 const float N_Grad = N1 + SNabs + fabs(rgb[c][indx -
w1] - rgb[c][indx -
w3]);
527 const float S_Grad = S1 + SNabs + fabs(rgb[c][indx +
w1] - rgb[c][indx +
w3]);
528 const float W_Grad = W1 + EWabs + fabs(rgb[c][indx - 1] - rgb[c][indx - 3]);
529 const float E_Grad = E1 + EWabs + fabs(rgb[c][indx + 1] - rgb[c][indx + 3]);
532 const float N_Est = rgb[c][indx -
w1] - rgb1mw1;
533 const float S_Est = rgb[c][indx +
w1] - rgb1pw1;
534 const float W_Est = rgb[c][indx - 1] - rgb1m1;
535 const float E_Est = rgb[c][indx + 1] - rgb1p1;
538 const float V_Est = (N_Grad * S_Est + S_Grad * N_Est) / (N_Grad + S_Grad);
539 const float H_Est = (E_Grad * W_Est + W_Grad * E_Est) / (E_Grad + W_Grad);
542 rgb[c][indx] = rgb1 +
intp(VH_Disc, H_Est, V_Est);
549 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ?
RCD_MARGIN :
RCD_BORDER);
551 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ?
RCD_MARGIN :
RCD_BORDER);
552 for(
int row = first_vertical; row < last_vertical; row++)
554 for(
int col = first_horizontal, idx = (row - rowStart) *
RCD_TILESIZE + col - colStart, o_idx = (row *
width + col) * 4; col < last_horizontal; col++, o_idx += 4, idx++)
556 out[o_idx] = scaler * fmaxf(0.0f, rgb[0][idx]);
557 out[o_idx+1] = scaler * fmaxf(0.0f, rgb[1][idx]);
558 out[o_idx+2] = scaler * fmaxf(0.0f, rgb[2][idx]);