135 const dt_iop_roi_t *
const roi_in,
const uint32_t filters,
const uint32_t mode,
float *
const restrict gamma_in,
float *
const restrict gamma_out)
147 float h1 = expf( -1.0f / 8.0f);
148 float h2 = expf( -4.0f / 8.0f);
149 float h3 = expf( -9.0f / 8.0f);
150 float h4 = expf(-16.0f / 8.0f);
151 float hs = h0 + 2.0f * (h1 + h2 + h3 + h4);
159 const int medians = (mode < 2) ? mode : 3;
161 const int refine = (mode > 2) ? mode - 2 : 0;
163 const float scaler = fmaxf(piece->
pipe->dsc.processed_maximum[0], fmaxf(piece->
pipe->dsc.processed_maximum[1], piece->
pipe->dsc.processed_maximum[2]));
164 const float revscaler = 1.0f / scaler;
169 #pragma omp parallel \
170 dt_omp_firstprivate(width, height, out, in, scaler, revscaler, filters)
177 for(
int i = 1; i < 6; i++)
184 #pragma omp for schedule(simd:dynamic, 6) collapse(2)
186 for(
int tile_vertical = 0; tile_vertical < num_vertical; tile_vertical++)
188 for(
int tile_horizontal = 0; tile_horizontal < num_horizontal; tile_horizontal++)
206 int idx = row *
width + colStart;
209 cfa[0] =
calc_gamma(revscaler * in[idx], gamma_in);
214 for(
int rr = 2; rr < last_rr - 2; rr++)
217 for(
int cc = 2 + (
FC(rr, 2, filters) & 1); cc < last_cc - 2; cc += 2)
219 float *cfa = qix[5] + rr *
LMMSE_GRP + cc;
220 const float v0 = 0.0625f * (cfa[-
w1 - 1] + cfa[-
w1 + 1] + cfa[
w1 - 1] + cfa[
w1 + 1]) + 0.25f * cfa[0];
222 float *hdiff = qix[0] + rr *
LMMSE_GRP + cc;
223 hdiff[0] = -0.25f * (cfa[ -2] + cfa[ 2]) + 0.5f * (cfa[ -1] + cfa[0] + cfa[ 1]);
224 const float Y0 = v0 + 0.5f * hdiff[0];
225 hdiff[0] = (cfa[0] > 1.75f * Y0) ?
median3f(hdiff[0], cfa[ -1], cfa[ 1]) :
limf(hdiff[0], 0.0f, 1.0f);
229 float *vdiff = qix[1] + rr *
LMMSE_GRP + cc;
230 vdiff[0] = -0.25f * (cfa[-
w2] + cfa[
w2]) + 0.5f * (cfa[-
w1] + cfa[0] + cfa[
w1]);
231 const float Y1 = v0 + 0.5f * vdiff[0];
232 vdiff[0] = (cfa[0] > 1.75f * Y1) ?
median3f(vdiff[0], cfa[-
w1], cfa[
w1]) :
limf(vdiff[0], 0.0f, 1.0f);
237 for(
int ccc = 2 + (
FC(rr, 3, filters) & 1); ccc < last_cc - 2; ccc += 2)
239 float *cfa = qix[5] + rr *
LMMSE_GRP + ccc;
240 float *hdiff = qix[0] + rr *
LMMSE_GRP + ccc;
241 float *vdiff = qix[1] + rr *
LMMSE_GRP + ccc;
242 hdiff[0] = 0.25f * (cfa[ -2] + cfa[ 2]) - 0.5f * (cfa[ -1] + cfa[0] + cfa[ 1]);
243 vdiff[0] = 0.25f * (cfa[-
w2] + cfa[
w2]) - 0.5f * (cfa[-
w1] + cfa[0] + cfa[
w1]);
244 hdiff[0] =
limf(hdiff[0], -1.0f, 0.0f) + cfa[0];
245 vdiff[0] =
limf(vdiff[0], -1.0f, 0.0f) + cfa[0];
250 for (
int rr = 4; rr < last_rr - 4; rr++)
252 for(
int cc = 4; cc < last_cc - 4; cc++)
254 float *hdiff = qix[0] + rr *
LMMSE_GRP + cc;
255 float *vdiff = qix[1] + rr *
LMMSE_GRP + cc;
256 float *hlp = qix[2] + rr *
LMMSE_GRP + cc;
257 float *vlp = qix[3] + rr *
LMMSE_GRP + cc;
258 hlp[0] = h0 * hdiff[0] + h1 * (hdiff[ -1] + hdiff[ 1]) + h2 * (hdiff[ -2] + hdiff[ 2]) + h3 * (hdiff[ -3] + hdiff[ 3]) + h4 * (hdiff[ -4] + hdiff[ 4]);
259 vlp[0] = h0 * vdiff[0] + h1 * (vdiff[-
w1] + vdiff[
w1]) + h2 * (vdiff[-
w2] + vdiff[
w2]) + h3 * (vdiff[-
w3] + vdiff[
w3]) + h4 * (vdiff[-
w4] + vdiff[
w4]);
263 for(
int rr = 4; rr < last_rr - 4; rr++)
265 for(
int cc = 4 + (
FC(rr, 4, filters) & 1); cc < last_cc - 4; cc += 2)
267 float *hdiff = qix[0] + rr *
LMMSE_GRP + cc;
268 float *vdiff = qix[1] + rr *
LMMSE_GRP + cc;
269 float *hlp = qix[2] + rr *
LMMSE_GRP + cc;
270 float *vlp = qix[3] + rr *
LMMSE_GRP + cc;
271 float *interp = qix[4] + rr *
LMMSE_GRP + cc;
282 float mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.0f;
283 float vx = 1e-7f +
sqf(p1 - mu) +
sqf(p2 - mu) +
sqf(p3 - mu) +
sqf(p4 - mu) +
sqf(p5 - mu) +
sqf(p6 - mu) +
sqf(p7 - mu) +
sqf(p8 - mu) +
sqf(p9 - mu);
294 float xh = (hdiff[0] * vx + hlp[0] * vn) / (vx + vn);
295 float vh = vx * vn / (vx + vn);
307 mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.0f;
308 vx = 1e-7f +
sqf(p1 - mu) +
sqf(p2 - mu) +
sqf(p3 - mu) +
sqf(p4 - mu) +
sqf(p5 - mu) +
sqf(p6 - mu) +
sqf(p7 - mu) +
sqf(p8 - mu) +
sqf(p9 - mu);
319 float xv = (vdiff[0] * vx + vlp[0] * vn) / (vx + vn);
320 float vv = vx * vn / (vx + vn);
322 interp[0] = (xh * vv + xv * vh) / (vh + vv);
327 for(
int rr = 0, row_in = rowStart -
BORDER_AROUND; rr < last_rr; rr++, row_in++)
329 for(
int cc = 0, col_in = colStart -
BORDER_AROUND; cc < last_cc; cc++, col_in++)
331 const int c =
FC(rr, cc, filters);
332 const gboolean inside = ((row_in >= 0) && (row_in <
height) && (col_in >= 0) && (col_in <
width));
333 float *colc = qix[c] + rr *
LMMSE_GRP + cc;
334 colc[0] = (inside) ? qix[5][rr *
LMMSE_GRP + cc] : 0.0f;
337 float *col1 = qix[1] + rr *
LMMSE_GRP + cc;
338 float *interp = qix[4] + rr *
LMMSE_GRP + cc;
339 col1[0] = (inside) ? colc[0] + interp[0] : 0.0f;
346 for(
int rr = 1; rr < last_rr - 1; rr++)
348 for(
int cc = 1 + (
FC(rr, 2, filters) & 1), c =
FC(rr, cc + 1, filters); cc < last_cc - 1; cc += 2)
350 float *colc = qix[c] + rr *
LMMSE_GRP + cc;
351 float *col1 = qix[1] + rr *
LMMSE_GRP + cc;
352 colc[0] = col1[0] + 0.5f * (colc[ -1] - col1[ -1] + colc[ 1] - col1[ 1]);
355 colc[0] = col1[0] + 0.5f * (colc[-
w1] - col1[-
w1] + colc[
w1] - col1[
w1]);
361 for(
int rr = 1; rr < last_rr - 1; rr++)
363 for(
int cc = 1 + (
FC(rr, 1, filters) & 1), c = 2 -
FC(rr, cc, filters); cc < last_cc - 1; cc += 2)
365 float *colc = qix[c] + rr *
LMMSE_GRP + cc;
366 float *col1 = qix[1] + rr *
LMMSE_GRP + cc;
367 colc[0] = col1[0] + 0.25f * (colc[-
w1] - col1[-
w1] + colc[ -1] - col1[ -1] + colc[ 1] - col1[ 1] + colc[
w1] - col1[
w1]);
373 const int ccmin = (tile_horizontal == 0) ? 6 : 0 ;
374 const int ccmax = last_cc - ((tile_horizontal == num_horizontal - 1) ? 6 : 0);
375 const int rrmin = (tile_vertical == 0) ? 6 : 0 ;
376 const int rrmax = last_rr - ((tile_vertical == num_vertical - 1) ? 6 : 0);
379 for(
int pass = 0; pass < medians; pass++)
383 for(
int rr = 1; rr < last_rr - 1; rr++)
385 for(
int c = 0; c < 3; c += 2)
387 const int d = c + 3 - (c == 0 ? 0 : 1);
388 for(
int cc = 1; cc < last_cc - 1; cc++)
390 float *corr = qix[d] + rr *
LMMSE_GRP + cc;
391 float *colc = qix[c] + rr *
LMMSE_GRP + cc;
392 float *col1 = qix[1] + rr *
LMMSE_GRP + cc;
395 colc[-
w1 ] - col1[-
w1 ],
396 colc[-
w1+1] - col1[-
w1+1],
397 colc[ -1] - col1[ -1],
400 colc[
w1-1] - col1[
w1-1],
401 colc[
w1 ] - col1[
w1 ],
402 colc[
w1+1] - col1[
w1+1]);
408 for(
int rr = rrmin; rr < rrmax - 1; rr++)
410 float *col0 = qix[0] + rr *
LMMSE_GRP + ccmin;
411 float *col1 = qix[1] + rr *
LMMSE_GRP + ccmin;
412 float *col2 = qix[2] + rr *
LMMSE_GRP + ccmin;
413 float *corr3 = qix[3] + rr *
LMMSE_GRP + ccmin;
414 float *corr4 = qix[4] + rr *
LMMSE_GRP + ccmin;
415 int c0 =
FC(rr, 0, filters);
416 int c1 =
FC(rr, 1, filters);
421 const int d =
c1 + 3 - (
c1 == 0 ? 0 : 1);
424 float *corr_d = qix[d] + rr *
LMMSE_GRP + ccmin;
425 for(cc = ccmin; cc < ccmax - 1; cc += 2)
427 col0[0] = col1[0] + corr3[0];
428 col2[0] = col1[0] + corr4[0];
436 col_c1[0] = col1[0] + corr_d[0];
437 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
449 col0[0] = col1[0] + corr3[0];
450 col2[0] = col1[0] + corr4[0];
456 const int d = c0 + 3 - (c0 == 0 ? 0 : 1);
457 float *col_c0 = qix[c0] + rr *
LMMSE_GRP + ccmin;
458 float *corr_d = qix[d] + rr *
LMMSE_GRP + ccmin;
460 for(cc = ccmin; cc < ccmax - 1; cc += 2)
462 col_c0[0] = col1[0] + corr_d[0];
463 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
471 col0[0] = col1[0] + corr3[0];
472 col2[0] = col1[0] + corr4[0];
484 col_c0[0] = col1[0] + corr_d[0];
485 col1[0] = 0.5f * (col0[0] - corr3[0] + col2[0] - corr4[0]);
492 for(
int rrr = 4; rrr < last_rr - 4; rrr++)
494 for(
int ccc = 4; ccc < last_cc - 4; ccc++)
497 const int c =
FC(rrr, ccc, filters);
498 qix[c][idx] = qix[5][idx];
503 for(
int step = 0; step <
refine; step++)
506 for(
int rr = rrmin + 2; rr < rrmax - 2; rr++)
508 for(
int cc = ccmin + 2 + (
FC(rr, 2, filters) & 1), c =
FC(rr, cc, filters); cc < ccmax - 2; cc += 2)
510 float *rgb1 = qix[1] + rr *
LMMSE_GRP + cc;
511 float *rgbc = qix[c] + rr *
LMMSE_GRP + cc;
513 const float dL = 1.0f / (1.0f + fabsf(rgbc[ -2] - rgbc[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
514 const float dR = 1.0f / (1.0f + fabsf(rgbc[ 2] - rgbc[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
515 const float dU = 1.0f / (1.0f + fabsf(rgbc[-
w2] - rgbc[0]) + fabsf(rgb1[
w1] - rgb1[-
w1]));
516 const float dD = 1.0f / (1.0f + fabsf(rgbc[
w2] - rgbc[0]) + fabsf(rgb1[
w1] - rgb1[-
w1]));
517 rgb1[0] = (rgbc[0] + ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-
w1] - rgbc[-
w1]) * dU + (rgb1[
w1] - rgbc[
w1]) * dD ) / (dL + dR + dU + dD));
521 for(
int rr = rrmin + 2; rr < rrmax - 2; rr++)
523 for(
int cc = ccmin + 2 + (
FC(rr, 3, filters) & 1), c =
FC(rr, cc + 1, filters); cc < ccmax - 2; cc += 2)
525 for(
int i = 0; i < 2; c = 2 - c, i++)
527 float *rgb1 = qix[1] + rr *
LMMSE_GRP + cc;
528 float *rgbc = qix[c] + rr *
LMMSE_GRP + cc;
530 const float dL = 1.0f / (1.0f + fabsf(rgb1[ -2] - rgb1[0]) + fabsf(rgbc[ 1] - rgbc[ -1]));
531 const float dR = 1.0f / (1.0f + fabsf(rgb1[ 2] - rgb1[0]) + fabsf(rgbc[ 1] - rgbc[ -1]));
532 const float dU = 1.0f / (1.0f + fabsf(rgb1[-
w2] - rgb1[0]) + fabsf(rgbc[
w1] - rgbc[-
w1]));
533 const float dD = 1.0f / (1.0f + fabsf(rgb1[
w2] - rgb1[0]) + fabsf(rgbc[
w1] - rgbc[-
w1]));
534 rgbc[0] = (rgb1[0] - ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-
w1] - rgbc[-
w1]) * dU + (rgb1[
w1] - rgbc[
w1]) * dD ) / (dL + dR + dU + dD));
539 for(
int rr = rrmin + 2; rr < rrmax - 2; rr++)
541 for(
int cc = ccmin + 2 + (
FC(rr, 2, filters) & 1), c = 2 -
FC(rr, cc, filters); cc < ccmax - 2; cc += 2)
544 float *rgb1 = qix[1] + rr *
LMMSE_GRP + cc;
545 float *rgbc = qix[c] + rr *
LMMSE_GRP + cc;
546 float *rgbd = qix[d] + rr *
LMMSE_GRP + cc;
548 const float dL = 1.0f / (1.0f + fabsf(rgbd[ -2] - rgbd[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
549 const float dR = 1.0f / (1.0f + fabsf(rgbd[ 2] - rgbd[0]) + fabsf(rgb1[ 1] - rgb1[ -1]));
550 const float dU = 1.0f / (1.0f + fabsf(rgbd[-
w2] - rgbd[0]) + fabsf(rgb1[
w1] - rgb1[-
w1]));
551 const float dD = 1.0f / (1.0f + fabsf(rgbd[
w2] - rgbd[0]) + fabsf(rgb1[
w1] - rgb1[-
w1]));
552 rgbc[0] = (rgb1[0] - ((rgb1[-1] - rgbc[-1]) * dL + (rgb1[1] - rgbc[1]) * dR + (rgb1[-
w1] - rgbc[-
w1]) * dU + (rgb1[
w1] - rgbc[
w1]) * dD ) / (dL + dR + dU + dD));
559 const int first_vertical = rowStart + ((tile_vertical == 0) ? 0 :
LMMSE_OVERLAP);
560 const int last_vertical = rowEnd - ((tile_vertical == num_vertical - 1) ? 0 :
LMMSE_OVERLAP);
561 const int first_horizontal = colStart + ((tile_horizontal == 0) ? 0 :
LMMSE_OVERLAP);
562 const int last_horizontal = colEnd - ((tile_horizontal == num_horizontal - 1) ? 0 :
LMMSE_OVERLAP);
563 for(
int row = first_vertical, rr = row - rowStart +
BORDER_AROUND; row < last_vertical; row++, rr++)
565 float *dest = out + 4 * (row *
width + first_horizontal);
567 float *col0 = qix[0] + idx;
568 float *col1 = qix[1] + idx;
569 float *col2 = qix[2] + idx;
570 for(
int col = first_horizontal; col < last_horizontal; col++, dest +=4, col0++, col1++, col2++)
572 dest[0] = scaler *
calc_gamma(col0[0], gamma_out);
573 dest[1] = scaler *
calc_gamma(col1[0], gamma_out);
574 dest[2] = scaler *
calc_gamma(col2[0], gamma_out);