85 float *
const restrict L,
size_t n)
91 if(
A[0] <= 0.0f)
return 0;
93 for(
size_t i = 0; i < n; i++)
94 for(
size_t j = 0; j < (i + 1); j++)
98 for(
size_t k = 0; k < j; k++)
99 sum += L[i * n + k] * L[j * n + k];
101 L[i * n + j] = (i == j) ?
102 sqrtf(
A[i * n + i] - sum) :
103 (
A[i * n + j] - sum) / L[j * n + j];
112 float *
const restrict L,
size_t n)
117 if(
A[0] <= 0.0f)
return 0;
121 for(
size_t i = 0; i < n; i++)
122 for(
size_t j = 0; j < (i + 1); j++)
126 for(
size_t k = 0; k < j; k++)
127 sum += L[i * n + k] * L[j * n + k];
131 const float temp =
A[i * n + i] - sum;
139 L[i * n + j] = sqrtf(
A[i * n + i] - sum);
143 const float temp = L[j * n + j];
151 L[i * n + j] = (
A[i * n + j] - sum) / temp;
161 const float *
const restrict y,
float *
const restrict b,
167 for(
size_t i = 0; i < n; ++i)
170 for(
size_t j = 0; j < i; ++j)
171 sum -= L[i * n + j] * b[j];
173 b[i] = sum / L[i * n + i];
182 const float *
const restrict y,
float *
const restrict b,
190 for(
size_t i = 0; i < n; ++i)
193 for(
size_t j = 0; j < i; ++j)
194 sum -= L[i * n + j] * b[j];
196 const float temp = L[i * n + i];
213 const float *
const restrict b,
float *
const restrict x,
219 for(
int i = (n - 1); i > -1 ; --i)
222 for(
int j = (n - 1); j > i; --j)
223 sum -= L[j * n + i] * x[j];
225 x[i] = sum / L[i * n + i];
234 const float *
const restrict b,
float *
const restrict x,
242 for(
int i = (n - 1); i > -1 ; --i)
245 for(
int j = (n - 1); j > i; --j)
246 sum -= L[j * n + i] * x[j];
248 const float temp = L[i * n + i];
264 float *
const restrict y,
265 const size_t n,
const int checks)
288 dt_control_log(_(
"Choleski decomposition failed to allocate memory, check your RAM settings"));
289 fprintf(stdout,
"Choleski decomposition failed to allocate memory, check your RAM settings\n");
296 if(!valid) fprintf(stdout,
"Cholesky decomposition returned NaNs\n");
302 if(!valid) fprintf(stdout,
"Cholesky LU triangular descent returned NaNs\n");
308 if(!valid) fprintf(stdout,
"Cholesky LU triangular ascent returned NaNs\n");
322 float *
const restrict A_square,
323 const size_t m,
const size_t n)
328 for(
size_t i = 0; i < n; ++i)
329 for(
size_t j = 0; j < (i + 1); ++j)
332 for(
size_t k = 0; k <
m; ++k)
333 sum +=
A[k * n + i] *
A[k * n + j];
335 A_square[i * n + j] = sum;
344 float *
const restrict y,
345 float *
const restrict y_square,
346 const size_t m,
const size_t n)
350 for(
size_t i = 0; i < n; ++i)
353 for(
size_t k = 0; k <
m; ++k)
354 sum +=
A[k * n + i] * y[k];
365 float *
const restrict y,
366 const size_t m,
const size_t n,
const int checks)
377 fprintf(stdout,
"Pseudo solve: cannot cast %zu \303\227 %zu matrice\n",
m, n);
384 if(!A_square || !y_square)
386 dt_control_log(_(
"Choleski decomposition failed to allocate memory, check your RAM settings"));
391 #pragma omp parallel sections
#define m
Definition basecurve.c:231
static __DT_CLONE_TARGETS__ int choleski_decompose_fast(const float *const restrict A, float *const restrict L, size_t n)
Definition choleski.h:84
static __DT_CLONE_TARGETS__ int triangular_descent_safe(const float *const restrict L, const float *const restrict y, float *const restrict b, const size_t n)
Definition choleski.h:181
static __DT_CLONE_TARGETS__ int transpose_dot_vector(float *const restrict A, float *const restrict y, float *const restrict y_square, const size_t m, const size_t n)
Definition choleski.h:343
static __DT_CLONE_TARGETS__ int triangular_descent_fast(const float *const restrict L, const float *const restrict y, float *const restrict b, const size_t n)
Definition choleski.h:160
static __DT_CLONE_TARGETS__ int solve_hermitian(const float *const restrict A, float *const restrict y, const size_t n, const int checks)
Definition choleski.h:263
static __DT_CLONE_TARGETS__ int triangular_ascent_fast(const float *const restrict L, const float *const restrict b, float *const restrict x, const size_t n)
Definition choleski.h:212
static __DT_CLONE_TARGETS__ int choleski_decompose_safe(const float *const restrict A, float *const restrict L, size_t n)
Definition choleski.h:111
static __DT_CLONE_TARGETS__ int transpose_dot_matrix(float *const restrict A, float *const restrict A_square, const size_t m, const size_t n)
Definition choleski.h:321
static __DT_CLONE_TARGETS__ int triangular_ascent_safe(const float *const restrict L, const float *const restrict b, float *const restrict x, const size_t n)
Definition choleski.h:233
static __DT_CLONE_TARGETS__ int pseudo_solve(float *const restrict A, float *const restrict y, const size_t m, const size_t n, const int checks)
Definition choleski.h:364
#define A(y, x)
Definition colorspaces.c:148
void dt_control_log(const char *msg,...)
Definition control.c:424
#define __DT_CLONE_TARGETS__
Definition darktable.h:249
#define dt_free_align(A)
Definition darktable.h:334
static void * dt_alloc_sse_ps(size_t pixels)
Definition darktable.h:356
static __DT_CLONE_TARGETS__ void dt_simd_memcpy(const float *const __restrict__ in, float *const __restrict__ out, const size_t num_elem)
Definition imagebuf.h:64