183 int winx = roi_out->
x;
184 int winy = roi_out->
y;
185 int winw = roi_in->
width;
186 int winh = roi_in->
height;
191 const float clip_pt8 = 0.8f * clip_pt;
202 constexpr int tsh = ts / 2;
208 if(
FC(0, 0, filters) == 1)
210 if(
FC(0, 1, filters) == 0)
223 if(
FC(0, 0, filters) == 0)
236 constexpr int v1 = ts, v2 = 2 * ts, v3 = 3 * ts, p1 = -ts + 1, p2 = -2 * ts + 2, p3 = -3 * ts + 3,
237 m1 = ts + 1, m2 = 2 * ts + 2, m3 = 3 * ts + 3;
240 constexpr float eps = 1e-5,
epssq = 1e-10;
243 constexpr float arthresh = 0.75;
246 constexpr float gaussodd[4]
247 = { 0.14659727707323927f, 0.103592713382435f, 0.0732036125103057f, 0.0365543548389495f };
249 constexpr float nyqthresh = 0.5;
252 constexpr float gaussgrad[6] = { nyqthresh * 0.07384411893421103f, nyqthresh * 0.06207511968171489f,
253 nyqthresh * 0.0521818194747806f, nyqthresh * 0.03687419286733595f,
254 nyqthresh * 0.03099732204057846f, nyqthresh * 0.018413194161458882f };
256 constexpr float gausseven[2] = { 0.13719494435797422f, 0.05640252782101291f };
258 constexpr float gquinc[4] = { 0.169917f, 0.108947f, 0.069855f, 0.0287182f };
272 constexpr int cldf = 2;
275 = (
char *)calloc(
sizeof(
float) * 14 * ts * ts +
sizeof(char) * ts * tsh + 18 * cldf * 64 + 63, 1);
277 char *data = (
char *)((uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
280 float *rgbgreen = (float(*))data;
282 float *delhvsqsum = (float(*))((
char *)rgbgreen +
sizeof(float) * ts * ts + cldf * 64);
284 float *dirwts0 = (float(*))((
char *)delhvsqsum +
sizeof(float) * ts * ts + cldf * 64);
285 float *dirwts1 = (float(*))((
char *)dirwts0 +
sizeof(float) * ts * ts + cldf * 64);
287 float *vcd = (float(*))((
char *)dirwts1 +
sizeof(float) * ts * ts + cldf * 64);
289 float *hcd = (float(*))((
char *)vcd +
sizeof(float) * ts * ts + cldf * 64);
291 float *vcdalt = (float(*))((
char *)hcd +
sizeof(float) * ts * ts + cldf * 64);
293 float *hcdalt = (float(*))((
char *)vcdalt +
sizeof(float) * ts * ts + cldf * 64);
295 float *cddiffsq = (float(*))((
char *)hcdalt +
sizeof(float) * ts * ts + cldf * 64);
297 float *hvwt = (float(*))((
char *)cddiffsq +
sizeof(float) * ts * ts + 2 * cldf * 64);
299 float(*Dgrb)[ts * tsh] = (float(*)[ts * tsh])vcdalt;
301 float *delp = (float(*))cddiffsq;
303 float *delm = (float(*))((
char *)delp +
sizeof(float) * ts * tsh + cldf * 64);
305 float *rbint = (float(*))delm;
308 s_hv *Dgrb2 = (s_hv(*))((
char *)hvwt +
sizeof(float) * ts * tsh + cldf * 64);
310 float *dgintv = (float(*))Dgrb2;
312 float *dginth = (float(*))((
char *)dgintv +
sizeof(float) * ts * ts + cldf * 64);
314 float *Dgrbsq1m = (float(*))((
char *)dginth +
sizeof(float) * ts * ts + cldf * 64);
315 float *Dgrbsq1p = (float(*))((
char *)Dgrbsq1m +
sizeof(float) * ts * tsh + cldf * 64);
317 float *cfa = (float(*))((
char *)Dgrbsq1p +
sizeof(float) * ts * tsh + cldf * 64);
319 float *pmwt = (float(*))delhvsqsum;
321 float *rbm = (float(*))vcd;
322 float *rbp = (float(*))((
char *)rbm +
sizeof(float) * ts * tsh + cldf * 64);
324 unsigned char *nyquist = (
unsigned char(*))((
char *)cfa +
sizeof(float) * ts * ts + cldf * 64);
325 unsigned char *nyquist2 = (
unsigned char(*))cddiffsq;
326 float *nyqutest = (float(*))((
char *)nyquist +
sizeof(
unsigned char) * ts * tsh + cldf * 64);
335 for(
int left = winx - 16; left < winx +
width; left += ts - 32)
337 memset(&nyquist[3 * tsh], 0,
sizeof(
unsigned char) * (ts - 6) * tsh);
341 int right =
MIN(left + ts, winx +
width + 16);
343 int rr1 = bottom -
top;
345 int cc1 = right - left;
348 int rrmin =
top < winy ? 16 : 0;
349 int ccmin = left < winx ? 16 : 0;
351 int ccmax = right > (winx +
width) ? winx +
width - left : cc1;
362 for(
int rr = 0; rr < 16; rr++)
363 for(
int cc = ccmin,
row = 32 - rr +
top; cc < ccmax; cc++)
365 cfa[rr * ts + cc] = (in[
row *
width + (cc + left)]);
366 rgbgreen[rr * ts + cc] = cfa[rr * ts + cc];
371 for(
int rr = rrmin; rr < rrmax; rr++)
375 for(
int cc = ccmin; cc < ccmax; cc++)
377 int indx1 = rr * ts + cc;
378 cfa[indx1] = (in[
row *
width + (cc + left)]);
379 rgbgreen[indx1] = cfa[indx1];
386 for(
int rr = 0; rr < 16; rr++)
387 for(
int cc = ccmin; cc < ccmax; cc++)
389 cfa[(rrmax + rr) * ts + cc] = (in[(winy +
height - rr - 2) *
width + (left + cc)]);
390 rgbgreen[(rrmax + rr) * ts + cc] = cfa[(rrmax + rr) * ts + cc];
399 for(
int rr = rrmin; rr < rrmax; rr++)
400 for(
int cc = 0,
row = rr +
top; cc < 16; cc++)
402 cfa[rr * ts + cc] = (in[
row *
width + (32 - cc + left)]);
403 rgbgreen[rr * ts + cc] = cfa[rr * ts + cc];
410 for(
int rr = rrmin; rr < rrmax; rr++)
411 for(
int cc = 0; cc < 16; cc++)
413 cfa[rr * ts + ccmax + cc] = (in[(
top + rr) *
width + ((winx +
width - cc - 2))]);
414 rgbgreen[rr * ts + ccmax + cc] = cfa[rr * ts + ccmax + cc];
419 if(rrmin > 0 && ccmin > 0)
421 for(
int rr = 0; rr < 16; rr++)
422 for(
int cc = 0; cc < 16; cc++)
424 cfa[(rr)*ts + cc] = (in[(winy + 32 - rr) *
width + (winx + 32 - cc)]);
425 rgbgreen[(rr)*ts + cc] = cfa[(rr)*ts + cc];
429 if(rrmax < rr1 && ccmax < cc1)
431 for(
int rr = 0; rr < 16; rr++)
432 for(
int cc = 0; cc < 16; cc++)
434 cfa[(rrmax + rr) * ts + ccmax + cc]
436 rgbgreen[(rrmax + rr) * ts + ccmax + cc] = cfa[(rrmax + rr) * ts + ccmax + cc];
440 if(rrmin > 0 && ccmax < cc1)
442 for(
int rr = 0; rr < 16; rr++)
443 for(
int cc = 0; cc < 16; cc++)
445 cfa[(rr)*ts + ccmax + cc] = (in[(winy + 32 - rr) *
width + ((winx +
width - cc - 2))]);
446 rgbgreen[(rr)*ts + ccmax + cc] = cfa[(rr)*ts + ccmax + cc];
450 if(rrmax < rr1 && ccmin > 0)
452 for(
int rr = 0; rr < 16; rr++)
453 for(
int cc = 0; cc < 16; cc++)
455 cfa[(rrmax + rr) * ts + cc] = (in[(winy +
height - rr - 2) *
width + ((winx + 32 - cc))]);
456 rgbgreen[(rrmax + rr) * ts + cc] = cfa[(rrmax + rr) * ts + cc];
463 for(
int rr = 2; rr < rr1 - 2; rr++)
464 for(
int cc = 2, indx = (rr)*ts + cc; cc < cc1 - 2; cc++, indx++)
466 float delh = fabsf(cfa[indx + 1] - cfa[indx - 1]);
467 float delv = fabsf(cfa[indx + v1] - cfa[indx - v1]);
469 =
eps + fabsf(cfa[indx + v2] - cfa[indx]) + fabsf(cfa[indx] - cfa[indx - v2]) + delv;
470 dirwts1[indx] =
eps + fabsf(cfa[indx + 2] - cfa[indx]) + fabsf(cfa[indx] - cfa[indx - 2]) + delh;
471 delhvsqsum[indx] =
SQR(delh) +
SQR(delv);
476 for(
int rr = 4; rr < rr1 - 4; rr++)
478 bool fcswitch =
FC(rr, 4, filters) & 1;
480 for(
int cc = 4, indx = rr * ts + cc; cc < cc1 - 4; cc++, indx++)
484 float cru = cfa[indx - v1] * (dirwts0[indx - v2] + dirwts0[indx])
485 / (dirwts0[indx - v2] * (
eps + cfa[indx]) + dirwts0[indx] * (
eps + cfa[indx - v2]));
486 float crd = cfa[indx + v1] * (dirwts0[indx + v2] + dirwts0[indx])
487 / (dirwts0[indx + v2] * (
eps + cfa[indx]) + dirwts0[indx] * (
eps + cfa[indx + v2]));
488 float crl = cfa[indx - 1] * (dirwts1[indx - 2] + dirwts1[indx])
489 / (dirwts1[indx - 2] * (
eps + cfa[indx]) + dirwts1[indx] * (
eps + cfa[indx - 2]));
490 float crr = cfa[indx + 1] * (dirwts1[indx + 2] + dirwts1[indx])
491 / (dirwts1[indx + 2] * (
eps + cfa[indx]) + dirwts1[indx] * (
eps + cfa[indx + 2]));
494 float guha = cfa[indx - v1] +
xdiv2f(cfa[indx] - cfa[indx - v2]);
495 float gdha = cfa[indx + v1] +
xdiv2f(cfa[indx] - cfa[indx + v2]);
496 float glha = cfa[indx - 1] +
xdiv2f(cfa[indx] - cfa[indx - 2]);
497 float grha = cfa[indx + 1] +
xdiv2f(cfa[indx] - cfa[indx + 2]);
500 float guar, gdar, glar, grar;
502 if(fabsf(1.f - cru) < arthresh)
504 guar = cfa[indx] * cru;
511 if(fabsf(1.f - crd) < arthresh)
513 gdar = cfa[indx] * crd;
520 if(fabsf(1.f - crl) < arthresh)
522 glar = cfa[indx] * crl;
529 if(fabsf(1.f - crr) < arthresh)
531 grar = cfa[indx] * crr;
539 float hwt = dirwts1[indx - 1] / (dirwts1[indx - 1] + dirwts1[indx + 1]);
540 float vwt = dirwts0[indx - v1] / (dirwts0[indx + v1] + dirwts0[indx - v1]);
543 float Gintvha = vwt * gdha + (1.f - vwt) * guha;
544 float Ginthha = hwt * grha + (1.f - hwt) * glha;
549 vcd[indx] = cfa[indx] - (vwt * gdar + (1.f - vwt) * guar);
550 hcd[indx] = cfa[indx] - (hwt * grar + (1.f - hwt) * glar);
551 vcdalt[indx] = cfa[indx] - Gintvha;
552 hcdalt[indx] = cfa[indx] - Ginthha;
557 vcd[indx] = (vwt * gdar + (1.f - vwt) * guar) - cfa[indx];
558 hcd[indx] = (hwt * grar + (1.f - hwt) * glar) - cfa[indx];
559 vcdalt[indx] = Gintvha - cfa[indx];
560 hcdalt[indx] = Ginthha - cfa[indx];
563 fcswitch = !fcswitch;
565 if(cfa[indx] > clip_pt8 || Gintvha > clip_pt8 || Ginthha > clip_pt8)
572 vcd[indx] = vcdalt[indx];
573 hcd[indx] = hcdalt[indx];
577 dgintv[indx] =
MIN(
SQR(guha - gdha),
SQR(guar - gdar));
578 dginth[indx] =
MIN(
SQR(glha - grha),
SQR(glar - grar));
583 for(
int rr = 4; rr < rr1 - 4; rr++)
585 for(
int cc = 4, indx = rr * ts + cc, c =
FC(rr, cc, filters) & 1; cc < cc1 - 4; cc++, indx++)
587 float hcdvar = 3.f * (
SQR(hcd[indx - 2]) +
SQR(hcd[indx]) +
SQR(hcd[indx + 2]))
588 -
SQR(hcd[indx - 2] + hcd[indx] + hcd[indx + 2]);
589 float hcdaltvar = 3.f * (
SQR(hcdalt[indx - 2]) +
SQR(hcdalt[indx]) +
SQR(hcdalt[indx + 2]))
590 -
SQR(hcdalt[indx - 2] + hcdalt[indx] + hcdalt[indx + 2]);
591 float vcdvar = 3.f * (
SQR(vcd[indx - v2]) +
SQR(vcd[indx]) +
SQR(vcd[indx + v2]))
592 -
SQR(vcd[indx - v2] + vcd[indx] + vcd[indx + v2]);
593 float vcdaltvar = 3.f * (
SQR(vcdalt[indx - v2]) +
SQR(vcdalt[indx]) +
SQR(vcdalt[indx + v2]))
594 -
SQR(vcdalt[indx - v2] + vcdalt[indx] + vcdalt[indx + v2]);
597 if(hcdaltvar < hcdvar)
599 hcd[indx] = hcdalt[indx];
602 if(vcdaltvar < vcdvar)
604 vcd[indx] = vcdalt[indx];
613 Ginth = -hcd[indx] + cfa[indx];
614 Gintv = -vcd[indx] + cfa[indx];
618 if(3.f * hcd[indx] > (Ginth + cfa[indx]))
620 hcd[indx] = -
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx];
624 float hwt = 1.f - 3.f * hcd[indx] / (
eps + Ginth + cfa[indx]);
625 hcd[indx] = hwt * hcd[indx]
626 + (1.f - hwt) * (-
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx]);
632 if(3.f * vcd[indx] > (Gintv + cfa[indx]))
634 vcd[indx] = -
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx];
638 float vwt = 1.f - 3.f * vcd[indx] / (
eps + Gintv + cfa[indx]);
639 vcd[indx] = vwt * vcd[indx]
640 + (1.f - vwt) * (-
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx]);
646 hcd[indx] = -
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) + cfa[indx];
651 vcd[indx] = -
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) + cfa[indx];
657 Ginth = hcd[indx] + cfa[indx];
658 Gintv = vcd[indx] + cfa[indx];
662 if(3.f * hcd[indx] < -(Ginth + cfa[indx]))
664 hcd[indx] =
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx];
668 float hwt = 1.f + 3.f * hcd[indx] / (
eps + Ginth + cfa[indx]);
669 hcd[indx] = hwt * hcd[indx]
670 + (1.f - hwt) * (
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx]);
676 if(3.f * vcd[indx] < -(Gintv + cfa[indx]))
678 vcd[indx] =
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx];
682 float vwt = 1.f + 3.f * vcd[indx] / (
eps + Gintv + cfa[indx]);
683 vcd[indx] = vwt * vcd[indx]
684 + (1.f - vwt) * (
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx]);
690 hcd[indx] =
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]) - cfa[indx];
695 vcd[indx] =
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]) - cfa[indx];
698 cddiffsq[indx] =
SQR(vcd[indx] - hcd[indx]);
705 for(
int rr = 6; rr < rr1 - 6; rr++)
707 for(
int cc = 6 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc; cc < cc1 - 6; cc += 2, indx += 2)
712 float uave = vcd[indx] + vcd[indx - v1] + vcd[indx - v2] + vcd[indx - v3];
713 float dave = vcd[indx] + vcd[indx + v1] + vcd[indx + v2] + vcd[indx + v3];
714 float lave = hcd[indx] + hcd[indx - 1] + hcd[indx - 2] + hcd[indx - 3];
715 float rave = hcd[indx] + hcd[indx + 1] + hcd[indx + 2] + hcd[indx + 3];
718 float Dgrbvvaru =
SQR(vcd[indx] - uave) +
SQR(vcd[indx - v1] - uave) +
SQR(vcd[indx - v2] - uave)
719 +
SQR(vcd[indx - v3] - uave);
720 float Dgrbvvard =
SQR(vcd[indx] - dave) +
SQR(vcd[indx + v1] - dave) +
SQR(vcd[indx + v2] - dave)
721 +
SQR(vcd[indx + v3] - dave);
722 float Dgrbhvarl =
SQR(hcd[indx] - lave) +
SQR(hcd[indx - 1] - lave) +
SQR(hcd[indx - 2] - lave)
723 +
SQR(hcd[indx - 3] - lave);
724 float Dgrbhvarr =
SQR(hcd[indx] - rave) +
SQR(hcd[indx + 1] - rave) +
SQR(hcd[indx + 2] - rave)
725 +
SQR(hcd[indx + 3] - rave);
727 float hwt = dirwts1[indx - 1] / (dirwts1[indx - 1] + dirwts1[indx + 1]);
728 float vwt = dirwts0[indx - v1] / (dirwts0[indx + v1] + dirwts0[indx - v1]);
730 float vcdvar =
epssq + vwt * Dgrbvvard + (1.f - vwt) * Dgrbvvaru;
731 float hcdvar =
epssq + hwt * Dgrbhvarr + (1.f - hwt) * Dgrbhvarl;
734 Dgrbvvaru = (dgintv[indx]) + (dgintv[indx - v1]) + (dgintv[indx - v2]);
735 Dgrbvvard = (dgintv[indx]) + (dgintv[indx + v1]) + (dgintv[indx + v2]);
736 Dgrbhvarl = (dginth[indx]) + (dginth[indx - 1]) + (dginth[indx - 2]);
737 Dgrbhvarr = (dginth[indx]) + (dginth[indx + 1]) + (dginth[indx + 2]);
739 float vcdvar1 =
epssq + vwt * Dgrbvvard + (1.f - vwt) * Dgrbvvaru;
740 float hcdvar1 =
epssq + hwt * Dgrbhvarr + (1.f - hwt) * Dgrbhvarl;
743 float varwt = hcdvar / (vcdvar + hcdvar);
744 float diffwt = hcdvar1 / (vcdvar1 + hcdvar1);
749 if((0.5 - varwt) * (0.5 - diffwt) > 0 && fabsf(0.5f - diffwt) < fabsf(0.5f - varwt))
751 hvwt[indx >> 1] = varwt;
755 hvwt[indx >> 1] = diffwt;
761 for(
int rr = 6; rr < rr1 - 6; rr++)
763 int cc = 6 + (
FC(rr, 2, filters) & 1);
764 int indx = rr * ts + cc;
766 for(; cc < cc1 - 6; cc += 2, indx += 2)
769 = (gaussodd[0] * cddiffsq[indx]
770 + gaussodd[1] * (cddiffsq[(indx - m1)] + cddiffsq[(indx + p1)] + cddiffsq[(indx - p1)]
771 + cddiffsq[(indx + m1)])
772 + gaussodd[2] * (cddiffsq[(indx - v2)] + cddiffsq[(indx - 2)] + cddiffsq[(indx + 2)]
773 + cddiffsq[(indx + v2)])
774 + gaussodd[3] * (cddiffsq[(indx - m2)] + cddiffsq[(indx + p2)] + cddiffsq[(indx - p2)]
775 + cddiffsq[(indx + m2)]))
776 - (gaussgrad[0] * delhvsqsum[indx]
777 + gaussgrad[1] * (delhvsqsum[indx - v1] + delhvsqsum[indx + 1] + delhvsqsum[indx - 1]
778 + delhvsqsum[indx + v1])
779 + gaussgrad[2] * (delhvsqsum[indx - m1] + delhvsqsum[indx + p1] + delhvsqsum[indx - p1]
780 + delhvsqsum[indx + m1])
781 + gaussgrad[3] * (delhvsqsum[indx - v2] + delhvsqsum[indx - 2] + delhvsqsum[indx + 2]
782 + delhvsqsum[indx + v2])
783 + gaussgrad[4] * (delhvsqsum[indx - v2 - 1] + delhvsqsum[indx - v2 + 1]
784 + delhvsqsum[indx - ts - 2] + delhvsqsum[indx - ts + 2]
785 + delhvsqsum[indx + ts - 2] + delhvsqsum[indx + ts + 2]
786 + delhvsqsum[indx + v2 - 1] + delhvsqsum[indx + v2 + 1])
787 + gaussgrad[5] * (delhvsqsum[indx - m2] + delhvsqsum[indx + p2] + delhvsqsum[indx - p2]
788 + delhvsqsum[indx + m2]));
795 int nystartcol = ts + 1;
798 for(
int rr = 6; rr < rr1 - 6; rr++)
800 for(
int cc = 6 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc; cc < cc1 - 6; cc += 2, indx += 2)
805 if(nyqutest[indx >> 1] > 0.f)
807 nyquist[indx >> 1] = 1;
808 nystartrow = nystartrow ? nystartrow : rr;
810 nystartcol = nystartcol > cc ? cc : nystartcol;
811 nyendcol = nyendcol < cc ? cc : nyendcol;
817 bool doNyquist = nystartrow != nyendrow && nystartcol != nyendcol;
823 nystartcol -= (nystartcol & 1);
824 nystartrow = std::max(8, nystartrow);
825 nyendrow = std::min(rr1 - 8, nyendrow);
826 nystartcol = std::max(8, nystartcol);
827 nyendcol = std::min(cc1 - 8, nyendcol);
828 memset(&nyquist2[4 * tsh], 0,
sizeof(
char) * (ts - 8) * tsh);
830 for(
int rr = nystartrow; rr < nyendrow; rr++)
832 for(
int indx = rr * ts + nystartcol + (
FC(rr, 2, filters) & 1); indx < rr * ts + nyendcol;
835 unsigned int nyquisttemp
836 = (nyquist[(indx - v2) >> 1] + nyquist[(indx - m1) >> 1] + nyquist[(indx + p1) >> 1]
837 + nyquist[(indx - 2) >> 1] + nyquist[(indx + 2) >> 1] + nyquist[(indx - p1) >> 1]
838 + nyquist[(indx + m1) >> 1] + nyquist[(indx + v2) >> 1]);
840 nyquist2[indx >> 1] = nyquisttemp > 4 ? 1 : (nyquisttemp < 4 ? 0 : nyquist[indx >> 1]);
847 for(
int rr = nystartrow; rr < nyendrow; rr++)
848 for(
int indx = rr * ts + nystartcol + (
FC(rr, 2, filters) & 1); indx < rr * ts + nyendcol;
852 if(nyquist2[indx >> 1])
856 float sumcfa = 0.f, sumh = 0.f, sumv = 0.f, sumsqh = 0.f, sumsqv = 0.f, areawt = 0.f;
858 for(
int i = -6;
i < 7;
i += 2)
860 int indx1 = indx + (
i * ts) - 6;
862 for(
int j = -6; j < 7; j += 2, indx1 += 2)
864 if(nyquist2[indx1 >> 1])
866 float cfatemp = cfa[indx1];
868 sumh += (cfa[indx1 - 1] + cfa[indx1 + 1]);
869 sumv += (cfa[indx1 - v1] + cfa[indx1 + v1]);
870 sumsqh +=
SQR(cfatemp - cfa[indx1 - 1]) +
SQR(cfatemp - cfa[indx1 + 1]);
871 sumsqv +=
SQR(cfatemp - cfa[indx1 - v1]) +
SQR(cfatemp - cfa[indx1 + v1]);
878 sumh = sumcfa -
xdiv2f(sumh);
879 sumv = sumcfa -
xdiv2f(sumv);
881 float hcdvar =
epssq + fabsf(areawt * sumsqh - sumh * sumh);
882 float vcdvar =
epssq + fabsf(areawt * sumsqv - sumv * sumv);
883 hvwt[indx >> 1] = hcdvar / (vcdvar + hcdvar);
892 for(
int rr = 8; rr < rr1 - 8; rr++)
893 for(
int indx = rr * ts + 8 + (
FC(rr, 2, filters) & 1); indx < rr * ts + cc1 - 8; indx += 2)
897 float hvwtalt =
xdivf(hvwt[(indx - m1) >> 1] + hvwt[(indx + p1) >> 1] + hvwt[(indx - p1) >> 1]
898 + hvwt[(indx + m1) >> 1],
902 = fabsf(0.5f - hvwt[indx >> 1]) < fabsf(0.5f - hvwtalt) ? hvwtalt : hvwt[indx >> 1];
905 Dgrb[0][indx >> 1] =
intp(hvwt[indx >> 1], vcd[indx], hcd[indx]);
907 rgbgreen[indx] = cfa[indx] + Dgrb[0][indx >> 1];
910 Dgrb2[indx >> 1].h = nyquist2[indx >> 1]
911 ?
SQR(rgbgreen[indx] -
xdiv2f(rgbgreen[indx - 1] + rgbgreen[indx + 1]))
913 Dgrb2[indx >> 1].v = nyquist2[indx >> 1]
914 ?
SQR(rgbgreen[indx] -
xdiv2f(rgbgreen[indx - v1] + rgbgreen[indx + v1]))
924 for(
int rr = nystartrow; rr < nyendrow; rr++)
925 for(
int indx = rr * ts + nystartcol + (
FC(rr, 2, filters) & 1); indx < rr * ts + nyendcol;
929 if(nyquist2[indx >> 1])
933 =
epssq + (gquinc[0] * Dgrb2[indx >> 1].h
934 + gquinc[1] * (Dgrb2[(indx - m1) >> 1].h + Dgrb2[(indx + p1) >> 1].h
935 + Dgrb2[(indx - p1) >> 1].h + Dgrb2[(indx + m1) >> 1].h)
936 + gquinc[2] * (Dgrb2[(indx - v2) >> 1].h + Dgrb2[(indx - 2) >> 1].h
937 + Dgrb2[(indx + 2) >> 1].h + Dgrb2[(indx + v2) >> 1].h)
938 + gquinc[3] * (Dgrb2[(indx - m2) >> 1].h + Dgrb2[(indx + p2) >> 1].h
939 + Dgrb2[(indx - p2) >> 1].h + Dgrb2[(indx + m2) >> 1].h));
941 =
epssq + (gquinc[0] * Dgrb2[indx >> 1].v
942 + gquinc[1] * (Dgrb2[(indx - m1) >> 1].
v + Dgrb2[(indx + p1) >> 1].v
943 + Dgrb2[(indx - p1) >> 1].
v + Dgrb2[(indx + m1) >> 1].v)
944 + gquinc[2] * (Dgrb2[(indx - v2) >> 1].v + Dgrb2[(indx - 2) >> 1].
v
945 + Dgrb2[(indx + 2) >> 1].v + Dgrb2[(indx + v2) >> 1].
v)
946 + gquinc[3] * (Dgrb2[(indx - m2) >> 1].
v + Dgrb2[(indx + p2) >> 1].v
947 + Dgrb2[(indx - p2) >> 1].
v + Dgrb2[(indx + m2) >> 1].v));
949 Dgrb[0][indx >> 1] = (hcd[indx] * gvarv + vcd[indx] * gvarh) / (gvarv + gvarh);
950 rgbgreen[indx] = cfa[indx] + Dgrb[0][indx >> 1];
955 for(
int rr = 6; rr < rr1 - 6; rr++)
957 if((
FC(rr, 2, filters) & 1) == 0)
959 for(
int cc = 6, indx = rr * ts + cc; cc < cc1 - 6; cc += 2, indx += 2)
961 delp[indx >> 1] = fabsf(cfa[indx + p1] - cfa[indx - p1]);
962 delm[indx >> 1] = fabsf(cfa[indx + m1] - cfa[indx - m1]);
964 = (
SQR(cfa[indx + 1] - cfa[indx + 1 - p1]) +
SQR(cfa[indx + 1] - cfa[indx + 1 + p1]));
966 = (
SQR(cfa[indx + 1] - cfa[indx + 1 - m1]) +
SQR(cfa[indx + 1] - cfa[indx + 1 + m1]));
971 for(
int cc = 6, indx = rr * ts + cc; cc < cc1 - 6; cc += 2, indx += 2)
973 Dgrbsq1p[indx >> 1] = (
SQR(cfa[indx] - cfa[indx - p1]) +
SQR(cfa[indx] - cfa[indx + p1]));
974 Dgrbsq1m[indx >> 1] = (
SQR(cfa[indx] - cfa[indx - m1]) +
SQR(cfa[indx] - cfa[indx + m1]));
975 delp[indx >> 1] = fabsf(cfa[indx + 1 + p1] - cfa[indx + 1 - p1]);
976 delm[indx >> 1] = fabsf(cfa[indx + 1 + m1] - cfa[indx + 1 - m1]);
984 for(
int rr = 8; rr < rr1 - 8; rr++)
986 for(
int cc = 8 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc, indx1 = indx >> 1; cc < cc1 - 8;
987 cc += 2, indx += 2, indx1++)
991 float crse =
xmul2f(cfa[indx + m1]) / (
eps + cfa[indx] + (cfa[indx + m2]));
992 float crnw =
xmul2f(cfa[indx - m1]) / (
eps + cfa[indx] + (cfa[indx - m2]));
993 float crne =
xmul2f(cfa[indx + p1]) / (
eps + cfa[indx] + (cfa[indx + p2]));
994 float crsw =
xmul2f(cfa[indx - p1]) / (
eps + cfa[indx] + (cfa[indx - p2]));
996 float rbse, rbnw, rbne, rbsw;
999 if(fabsf(1.f - crse) < arthresh)
1001 rbse = cfa[indx] * crse;
1005 rbse = (cfa[indx + m1]) +
xdiv2f(cfa[indx] - cfa[indx + m2]);
1008 if(fabsf(1.f - crnw) < arthresh)
1010 rbnw = cfa[indx] * crnw;
1014 rbnw = (cfa[indx - m1]) +
xdiv2f(cfa[indx] - cfa[indx - m2]);
1017 if(fabsf(1.f - crne) < arthresh)
1019 rbne = cfa[indx] * crne;
1023 rbne = (cfa[indx + p1]) +
xdiv2f(cfa[indx] - cfa[indx + p2]);
1026 if(fabsf(1.f - crsw) < arthresh)
1028 rbsw = cfa[indx] * crsw;
1032 rbsw = (cfa[indx - p1]) +
xdiv2f(cfa[indx] - cfa[indx - p2]);
1035 float wtse =
eps + delm[indx1] + delm[(indx + m1) >> 1]
1036 + delm[(indx + m2) >> 1];
1037 float wtnw =
eps + delm[indx1] + delm[(indx - m1) >> 1] + delm[(indx - m2) >> 1];
1038 float wtne =
eps + delp[indx1] + delp[(indx + p1) >> 1] + delp[(indx + p2) >> 1];
1039 float wtsw =
eps + delp[indx1] + delp[(indx - p1) >> 1] + delp[(indx - p2) >> 1];
1042 rbm[indx1] = (wtse * rbnw + wtnw * rbse) / (wtse + wtnw);
1043 rbp[indx1] = (wtne * rbsw + wtsw * rbne) / (wtne + wtsw);
1048 + (gausseven[0] * (Dgrbsq1m[(indx - v1) >> 1] + Dgrbsq1m[(indx - 1) >> 1]
1049 + Dgrbsq1m[(indx + 1) >> 1] + Dgrbsq1m[(indx + v1) >> 1])
1050 + gausseven[1] * (Dgrbsq1m[(indx - v2 - 1) >> 1] + Dgrbsq1m[(indx - v2 + 1) >> 1]
1051 + Dgrbsq1m[(indx - 2 - v1) >> 1] + Dgrbsq1m[(indx + 2 - v1) >> 1]
1052 + Dgrbsq1m[(indx - 2 + v1) >> 1] + Dgrbsq1m[(indx + 2 + v1) >> 1]
1053 + Dgrbsq1m[(indx + v2 - 1) >> 1] + Dgrbsq1m[(indx + v2 + 1) >> 1]));
1056 / ((
epssq + (gausseven[0] * (Dgrbsq1p[(indx - v1) >> 1] + Dgrbsq1p[(indx - 1) >> 1]
1057 + Dgrbsq1p[(indx + 1) >> 1] + Dgrbsq1p[(indx + v1) >> 1])
1059 * (Dgrbsq1p[(indx - v2 - 1) >> 1] + Dgrbsq1p[(indx - v2 + 1) >> 1]
1060 + Dgrbsq1p[(indx - 2 - v1) >> 1] + Dgrbsq1p[(indx + 2 - v1) >> 1]
1061 + Dgrbsq1p[(indx - 2 + v1) >> 1] + Dgrbsq1p[(indx + 2 + v1) >> 1]
1062 + Dgrbsq1p[(indx + v2 - 1) >> 1] + Dgrbsq1p[(indx + v2 + 1) >> 1])))
1067 if(rbp[indx1] < cfa[indx])
1069 if(
xmul2f(rbp[indx1]) < cfa[indx])
1071 rbp[indx1] =
ULIM(rbp[indx1], cfa[indx - p1], cfa[indx + p1]);
1075 float pwt =
xmul2f(cfa[indx] - rbp[indx1]) / (
eps + rbp[indx1] + cfa[indx]);
1077 = pwt * rbp[indx1] + (1.f - pwt) *
ULIM(rbp[indx1], cfa[indx - p1], cfa[indx + p1]);
1081 if(rbm[indx1] < cfa[indx])
1083 if(
xmul2f(rbm[indx1]) < cfa[indx])
1085 rbm[indx1] =
ULIM(rbm[indx1], cfa[indx - m1], cfa[indx + m1]);
1089 float mwt =
xmul2f(cfa[indx] - rbm[indx1]) / (
eps + rbm[indx1] + cfa[indx]);
1091 = mwt * rbm[indx1] + (1.f - mwt) *
ULIM(rbm[indx1], cfa[indx - m1], cfa[indx + m1]);
1095 if(rbp[indx1] > clip_pt)
1097 rbp[indx1] =
ULIM(rbp[indx1], cfa[indx - p1], cfa[indx + p1]);
1100 if(rbm[indx1] > clip_pt)
1102 rbm[indx1] =
ULIM(rbm[indx1], cfa[indx - m1], cfa[indx + m1]);
1107 for(
int rr = 10; rr < rr1 - 10; rr++)
1108 for(
int cc = 10 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc, indx1 = indx >> 1; cc < cc1 - 10;
1109 cc += 2, indx += 2, indx1++)
1113 float pmwtalt =
xdivf(pmwt[(indx - m1) >> 1] + pmwt[(indx + p1) >> 1] + pmwt[(indx - p1) >> 1]
1114 + pmwt[(indx + m1) >> 1],
1117 if(fabsf(0.5f - pmwt[indx1]) < fabsf(0.5f - pmwtalt))
1119 pmwt[indx1] = pmwtalt;
1122 rbint[indx1] =
xdiv2f(cfa[indx] + rbm[indx1] * (1.f - pmwt[indx1])
1123 + rbp[indx1] * pmwt[indx1]);
1127 for(
int rr = 12; rr < rr1 - 12; rr++)
1128 for(
int cc = 12 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc, indx1 = indx >> 1; cc < cc1 - 12;
1129 cc += 2, indx += 2, indx1++)
1132 if(fabsf(0.5f - pmwt[indx >> 1]) < fabsf(0.5f - hvwt[indx >> 1]))
1141 float cru = cfa[indx - v1] * 2.0 / (
eps + rbint[indx1] + rbint[(indx1 - v1)]);
1142 float crd = cfa[indx + v1] * 2.0 / (
eps + rbint[indx1] + rbint[(indx1 + v1)]);
1143 float crl = cfa[indx - 1] * 2.0 / (
eps + rbint[indx1] + rbint[(indx1 - 1)]);
1144 float crr = cfa[indx + 1] * 2.0 / (
eps + rbint[indx1] + rbint[(indx1 + 1)]);
1147 float gu, gd, gl, gr;
1150 if(fabsf(1.f - cru) < arthresh)
1152 gu = rbint[indx1] * cru;
1156 gu = cfa[indx - v1] +
xdiv2f(rbint[indx1] - rbint[(indx1 - v1)]);
1159 if(fabsf(1.f - crd) < arthresh)
1161 gd = rbint[indx1] * crd;
1165 gd = cfa[indx + v1] +
xdiv2f(rbint[indx1] - rbint[(indx1 + v1)]);
1168 if(fabsf(1.f - crl) < arthresh)
1170 gl = rbint[indx1] * crl;
1174 gl = cfa[indx - 1] +
xdiv2f(rbint[indx1] - rbint[(indx1 - 1)]);
1177 if(fabsf(1.f - crr) < arthresh)
1179 gr = rbint[indx1] * crr;
1183 gr = cfa[indx + 1] +
xdiv2f(rbint[indx1] - rbint[(indx1 + 1)]);
1187 float Gintv = (dirwts0[indx - v1] * gd + dirwts0[indx + v1] * gu)
1188 / (dirwts0[indx + v1] + dirwts0[indx - v1]);
1190 = (dirwts1[indx - 1] * gr + dirwts1[indx + 1] * gl) / (dirwts1[indx - 1] + dirwts1[indx + 1]);
1193 if(Gintv < rbint[indx1])
1195 if(2 * Gintv < rbint[indx1])
1197 Gintv =
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]);
1201 float vwt = 2.0 * (rbint[indx1] - Gintv) / (
eps + Gintv + rbint[indx1]);
1202 Gintv = vwt * Gintv + (1.f - vwt) *
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]);
1206 if(Ginth < rbint[indx1])
1208 if(2 * Ginth < rbint[indx1])
1210 Ginth =
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]);
1214 float hwt = 2.0 * (rbint[indx1] - Ginth) / (
eps + Ginth + rbint[indx1]);
1215 Ginth = hwt * Ginth + (1.f - hwt) *
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]);
1221 Ginth =
ULIM(Ginth, cfa[indx - 1], cfa[indx + 1]);
1226 Gintv =
ULIM(Gintv, cfa[indx - v1], cfa[indx + v1]);
1229 rgbgreen[indx] = Ginth * (1.f - hvwt[indx1]) + Gintv * hvwt[indx1];
1230 Dgrb[0][indx >> 1] = rgbgreen[indx] - cfa[indx];
1237 for(
int rr = 13 - ey; rr < rr1 - 12; rr += 2)
1238 for(
int indx1 = (rr * ts + 13 - ex) >> 1; indx1<(rr * ts + cc1 - 12)>> 1; indx1++)
1240 Dgrb[1][indx1] = Dgrb[0][indx1];
1244 for(
int rr = 14; rr < rr1 - 14; rr++)
1245 for(
int cc = 14 + (
FC(rr, 2, filters) & 1), indx = rr * ts + cc, c = 1 -
FC(rr, cc, filters) / 2;
1246 cc < cc1 - 14; cc += 2, indx += 2)
1248 float wtnw = 1.f / (
eps + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx + m1) >> 1])
1249 + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx - m3) >> 1])
1250 + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - m3) >> 1]));
1251 float wtne = 1.f / (
eps + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx - p1) >> 1])
1252 + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx + p3) >> 1])
1253 + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + p3) >> 1]));
1254 float wtsw = 1.f / (
eps + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + p1) >> 1])
1255 + fabsf(Dgrb[c][(indx - p1) >> 1] - Dgrb[c][(indx + m3) >> 1])
1256 + fabsf(Dgrb[c][(indx + p1) >> 1] - Dgrb[c][(indx - p3) >> 1]));
1257 float wtse = 1.f / (
eps + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - m1) >> 1])
1258 + fabsf(Dgrb[c][(indx + m1) >> 1] - Dgrb[c][(indx - p3) >> 1])
1259 + fabsf(Dgrb[c][(indx - m1) >> 1] - Dgrb[c][(indx + m3) >> 1]));
1262 = (wtnw * (1.325f * Dgrb[c][(indx - m1) >> 1] - 0.175f * Dgrb[c][(indx - m3) >> 1]
1263 - 0.075f * Dgrb[c][(indx - m1 - 2) >> 1] - 0.075f * Dgrb[c][(indx - m1 - v2) >> 1])
1264 + wtne * (1.325f * Dgrb[c][(indx + p1) >> 1] - 0.175f * Dgrb[c][(indx + p3) >> 1]
1265 - 0.075f * Dgrb[c][(indx + p1 + 2) >> 1]
1266 - 0.075f * Dgrb[c][(indx + p1 + v2) >> 1])
1267 + wtsw * (1.325f * Dgrb[c][(indx - p1) >> 1] - 0.175f * Dgrb[c][(indx - p3) >> 1]
1268 - 0.075f * Dgrb[c][(indx - p1 - 2) >> 1]
1269 - 0.075f * Dgrb[c][(indx - p1 - v2) >> 1])
1270 + wtse * (1.325f * Dgrb[c][(indx + m1) >> 1] - 0.175f * Dgrb[c][(indx + m3) >> 1]
1271 - 0.075f * Dgrb[c][(indx + m1 + 2) >> 1]
1272 - 0.075f * Dgrb[c][(indx + m1 + v2) >> 1]))
1273 / (wtnw + wtne + wtsw + wtse);
1276 for(
int rr = 16; rr < rr1 - 16; rr++)
1279 int col = left + 16;
1280 int indx = rr * ts + 16;
1282 if((
FC(rr, 2, filters) & 1) == 1)
1284 for(; indx < rr * ts + cc1 - 16 - (cc1 & 1); indx++, col++)
1288 float temp = 1.f / (hvwt[(indx - v1) >> 1] + 2.f - hvwt[(indx + 1) >> 1]
1289 - hvwt[(indx - 1) >> 1] + hvwt[(indx + v1) >> 1]);
1292 - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1]
1293 + (1.f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1]
1294 + (1.f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1]
1295 + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1])
1300 - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1]
1301 + (1.f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1]
1302 + (1.f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1]
1303 + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1])
1313 =
clampnan(rgbgreen[indx] - Dgrb[0][indx >> 1], 0.0, 1.0);
1315 =
clampnan(rgbgreen[indx] - Dgrb[1][indx >> 1], 0.0, 1.0);
1323 float temp = 1.f / (hvwt[(indx - v1) >> 1] + 2.f - hvwt[(indx + 1) >> 1]
1324 - hvwt[(indx - 1) >> 1] + hvwt[(indx + v1) >> 1]);
1327 - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1]
1328 + (1.f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1]
1329 + (1.f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1]
1330 + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1])
1335 - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1]
1336 + (1.f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1]
1337 + (1.f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1]
1338 + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1])
1346 for(; indx < rr * ts + cc1 - 16 - (cc1 & 1); indx++, col++)
1351 =
clampnan(rgbgreen[indx] - Dgrb[0][indx >> 1], 0.0f, 1.0f);
1353 =
clampnan(rgbgreen[indx] - Dgrb[1][indx >> 1], 0.0f, 1.0f);
1360 float temp = 1.f / (hvwt[(indx - v1) >> 1] + 2.f - hvwt[(indx + 1) >> 1]
1361 - hvwt[(indx - 1) >> 1] + hvwt[(indx + v1) >> 1]);
1365 - ((hvwt[(indx - v1) >> 1]) * Dgrb[0][(indx - v1) >> 1]
1366 + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[0][(indx + 1) >> 1]
1367 + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[0][(indx - 1) >> 1]
1368 + (hvwt[(indx + v1) >> 1]) * Dgrb[0][(indx + v1) >> 1])
1374 - ((hvwt[(indx - v1) >> 1]) * Dgrb[1][(indx - v1) >> 1]
1375 + (1.0f - hvwt[(indx + 1) >> 1]) * Dgrb[1][(indx + 1) >> 1]
1376 + (1.0f - hvwt[(indx - 1) >> 1]) * Dgrb[1][(indx - 1) >> 1]
1377 + (hvwt[(indx + v1) >> 1]) * Dgrb[1][(indx + v1) >> 1])
1388 =
clampnan(rgbgreen[indx] - Dgrb[0][indx >> 1], 0.0f, 1.0f);
1390 =
clampnan(rgbgreen[indx] - Dgrb[1][indx >> 1], 0.0f, 1.0f);
1397 for(
int rr = 16; rr < rr1 - 16; rr++)
1401 for(; cc < cc1 - 16; cc++)
1403 int col = cc + left;
1404 int indx = rr * ts + cc;