29#define MAX(a, b) ((a) < (b) ? (b) : (a))
32#define MIN(a, b) ((a) > (b) ? (b) : (a))
51 union {
float f; uint32_t
i; } vx = {
x };
53 y *= 8.2629582881927490e-8f;
54 return y - 87.989971088f;
61 = sqrt((
x[0] - y[0]) * (
x[0] - y[0]) + (
x[1] - y[1]) * (
x[1] - y[1]) + (
x[2] - y[2]) * (
x[2] - y[2]));
62 return r *
r * logf(
MAX(1e-8f,
r));
70 const double **target,
71 const double *residual_L,
72 const double *residual_a,
73 const double *residual_b,
80 for(
int i = 0;
i < wd;
i++)
83 const double Lt = target[0][
i];
91 const double localerr = sqrt(residual_L[
i] * residual_L[
i] + residual_a[
i] * residual_a[
i] + residual_b[
i] * residual_b[
i]);
94 merr =
MAX(merr, localerr);
101 err += (residual_L[
i]*residual_L[
i] +
102 residual_a[
i]*residual_a[
i] + residual_b[
i]*residual_b[
i])/wd;
104 const double Lt = target[0][
i];
111 const double Lt = target[0][
i];
114 err =
MAX(err, dL*dL +
115 residual_a[
i]*residual_a[
i] + residual_b[
i]*residual_b[
i]);
119 if(maxerr) *maxerr = merr;
123static inline int solve(
double *As,
double *w,
double *
v,
const double *b,
double *
coeff,
int wd,
int s,
int S)
130 dsvd(As, wd, s + 1,
S, w,
v);
134 double *tmp = malloc(
sizeof(
double) *
S);
137 for(
int i = 0;
i <= s;
i++)
140 for(
int j = 0; j < wd; j++) tmp[
i] += As[j *
S +
i] * b[j];
142 for(
int i = 0;
i <= s;
i++)
144 for(
int j = 0; j <= s; j++)
147 for(
int i = 0;
i <= s;
i++)
coeff[j] +=
v[j * (s + 1) +
i] * tmp[
i];
153#pragma GCC diagnostic push
154#pragma GCC diagnostic ignored "-Wvla"
161 const double **target,
169 if(avgerr) *avgerr = 0.0;
170 if(maxerr) *maxerr = 0.0;
172 const int wd =
N + 4;
173 double *
A = malloc(
sizeof(
double) * wd * wd);
184 for(
int j = 0; j <
N; j++)
188 for(
int i = 0;
i <
N;
i++)
A[
i * wd +
N + 0] =
A[(
N + 0) * wd +
i] = 1.0f;
189 for(
int i = 0;
i <
N;
i++)
A[
i * wd +
N + 1] =
A[(
N + 1) * wd +
i] =
point[3 *
i + 0];
190 for(
int i = 0;
i <
N;
i++)
A[
i * wd +
N + 2] =
A[(
N + 2) * wd +
i] =
point[3 *
i + 1];
191 for(
int i = 0;
i <
N;
i++)
A[
i * wd +
N + 3] =
A[(
N + 3) * wd +
i] =
point[3 *
i + 2];
193 for(
int j =
N; j < wd; j++)
194 for(
int i =
N;
i < wd;
i++)
A[j * wd +
i] = 0.0f;
197 double *norm = malloc(
sizeof(
double) * wd);
198 for(
int i = 0;
i < wd;
i++)
201 for(
int j = 0; j < wd; j++) norm[
i] +=
A[j * wd +
i] *
A[j * wd +
i];
202 norm[
i] = 1.0 / sqrt(norm[
i]);
207 double(*
r)[wd] = malloc(
sizeof(
double) * dim * wd);
208 const double **b = malloc(
sizeof(
double *) * dim);
209 for(
int k = 0;
k < dim;
k++) b[
k] = target[
k];
210 for(
int k = 0;
k < dim;
k++) memcpy(
r[
k], b[
k],
sizeof(
double) * wd);
212 double *w = malloc(
sizeof(
double) *
S);
213 double *
v = malloc(
sizeof(
double) *
S *
S);
214 double *As = calloc((
size_t)wd *
S,
sizeof(
double));
217 int s = 0, patches = 0;
218 double olderr = FLT_MAX;
222 const int sparsity =
MIN(s,
S);
235 assert(sparsity <
S + 4);
241 for(
int t = 0;
t < wd;
t++)
248 for(
int ch = 0;
ch < dim;
ch++)
252 for(
int i = 0;
i <= sparsity;
i++)
253 for(
int j = 0; j < wd; j++) As[j *
S +
i] =
A[j * wd +
permutation[
i]];
269 for(
int j = 0; j < wd; j++)
280 for(
int ch = 0;
ch < dim;
ch++)
283 for(
int j = 0; j < wd; j++) chdot +=
A[j * wd +
t] *
r[
ch][j];
301 if(maxcol <
N) patches++;
308 double minerr = FLT_MAX;
309 for(
int t = 0;
t < sparsity;
t++)
316 for(
int ch = 0;
ch < dim;
ch++)
319 for(
int i = 0;
i < sparsity;
i++)
320 for(
int j = 0; j < wd; j++) As[j *
S +
i] =
A[j * wd +
permutation[
i]];
336 for(
int j = 0; j < wd; j++)
347 for(
int ch = 0;
ch < dim;
ch++)
350 for(
int j = 0; j < wd; j++) chdot +=
A[j * wd +
t] *
r[
ch][j];
354 for(
int j = 0; j < wd; j++)
n +=
A[j * wd + mincol] *
A[j * wd + mincol];
356 double err = 1. / dot;
368 if(minerr < 1. / maxdot)
370 fprintf(stderr,
"replacing %d <- %d\n", mincol, maxcol);
378 for(
int j = 0; j < wd; j++) norm[mincol] +=
A[j * wd + mincol] *
A[j * wd + mincol];
379 norm[mincol] = 1.0 / sqrt(norm[mincol]);
386 double err = 1. / maxdot;
388 const int sp =
MIN(sparsity,
S-1);
390 for(
int ch = 0;
ch < dim;
ch++)
394 for(
int i = 0;
i <= sp;
i++)
395 for(
int j = 0; j < wd; j++) As[j *
S +
i] =
A[j * wd +
permutation[
i]];
412 for(
int j = 0; j < wd; j++)
420 const double err =
compute_error(curve, target,
r[0],
r[1],
r[2], wd, &merr);
426 if(avgerr) *avgerr = err;
427 if(maxerr) *maxerr = merr;
428 fprintf(stderr,
"rank %d/%d avg DE %g max DE %g\n", sp + 1, patches, err, merr);
430 if(s >=
S && err >= olderr)
431 fprintf(stderr,
"error increased!\n");
446#pragma GCC diagnostic pop
450 const float pi = 3.14153f;
451 const float h = atan2f(b, a) + pi;
453 const int sector = 4.0f * h / (2.0f * pi);
454 return 256.0 * sector +
L;
double tonecurve_apply(const tonecurve_t *c, const double L)
const dt_aligned_pixel_t f
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
#define IS_NULL_PTR(p)
C is way too permissive with !=, == and if(var) checks, which can mean too many things depending on w...
float dt_colorspaces_deltaE_2000(dt_aligned_pixel_t Lab0, dt_aligned_pixel_t Lab1)
const float *const const float coeff[3]
float *const restrict const size_t k
float *const restrict const size_t const size_t ch
float dt_aligned_pixel_t[4]
Singular value decomposition helper shared by chart and color-math code.
static int dsvd(double *a, int m, int n, int str, double *w, double *v)
Compute the singular value decomposition of a dense matrix.
typedef double((*spd)(unsigned long int wavelength, double TempK))
float thinplate_color_pos(float L, float a, float b)
static double thinplate_kernel(const double *x, const double *y)
static int solve(double *As, double *w, double *v, const double *b, double *coeff, int wd, int s, int S)
static double compute_error(const tonecurve_t *c, const double **target, const double *residual_L, const double *residual_a, const double *residual_b, const int wd, double *maxerr)
int thinplate_match(const tonecurve_t *curve, int dim, int N, const double *point, const double **target, int S, int *permutation, double **coeff, double *avgerr, double *maxerr)