Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
math.h
Go to the documentation of this file.
1/*
2 * This file is part of darktable,
3 * Copyright (C) 2016 johannes hanika.
4 * Copyright (C) 2016, 2018 Tobias Ellinghaus.
5 * Copyright (C) 2018-2019 Heiko Bauke.
6 * Copyright (C) 2020-2021 Pascal Obry.
7 * Copyright (C) 2020-2021 Ralf Brown.
8 * Copyright (C) 2021 Andreas Schneider.
9 * Copyright (C) 2021, 2023-2025 Aurélien PIERRE.
10 * Copyright (C) 2022 Martin Bařinka.
11 * Copyright (C) 2023 Luca Zulberti.
12 *
13 * darktable is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * darktable is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with darktable. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27#pragma once
28
29#include <stddef.h>
30#include <math.h>
31#include <stdint.h>
32
33#include "common/darktable.h"
34
35#define NORM_MIN 1.52587890625e-05f // norm can't be < to 2^(-16)
36
37// work around missing standard math.h symbols
39#ifndef M_LN10
40#define M_LN10 2.30258509299404568402
41#endif /* !M_LN10 */
42
44#ifndef M_PI
45#define M_PI 3.14159265358979323846
46#endif /* !M_PI */
47#ifndef M_PI_F
48#define M_PI_F 3.14159265358979324f
49#endif /* !M_PI_F */
50
51
52#define DT_M_PI_F (3.14159265358979324f)
53#define DT_M_PI (3.14159265358979324)
54
55#define DT_M_LN2f (0.6931471805599453f)
56
57// If platform supports hardware-accelerated fused-multiply-add
58// This is not only faster but more accurate because rounding happens at the right place
59#ifdef FP_FAST_FMAF
60 #define DT_FMA(x, y, z) fmaf(x, y, z)
61#else
62 #define DT_FMA(x, y, z) ((x) * (y) + (z))
63#endif
64
65// Golden number (1+sqrt(5))/2
66#ifndef PHI
67#define PHI 1.61803398874989479F
68#endif
69
70// 1/PHI
71#ifndef INVPHI
72#define INVPHI 0.61803398874989479F
73#endif
74
75// NaN-safe clamping (NaN compares false, and will thus result in H)
76#define CLAMPS(A, L, H) ((A) > (L) ? ((A) < (H) ? (A) : (H)) : (L))
77
78// clip channel value to be between 0 and 1
79// NaN-safe: NaN compares false and will result in 0.0
80// also does not force promotion of floats to doubles, but will use the type of its argument
81#define CLIP(x) (((x) >= 0) ? ((x) <= 1 ? (x) : 1) : 0)
82#define MM_CLIP_PS(X) (_mm_min_ps(_mm_max_ps((X), _mm_setzero_ps()), _mm_set1_ps(1.0)))
83
84// clip luminance values to be between 0 and 100
85#define LCLIP(x) ((x < 0) ? 0.0 : (x > 100.0) ? 100.0 : x)
86
87// clamp value to lie between mn and mx
88// Nan-safe: NaN compares false and will result in mn
89#define CLAMPF(a, mn, mx) ((a) >= (mn) ? ((a) <= (mx) ? (a) : (mx)) : (mn))
90//#define CLAMPF(a, mn, mx) ((a) < (mn) ? (mn) : ((a) > (mx) ? (mx) : (a)))
91
92#if defined(__x86_64__) || defined(__i386__)
93#define MMCLAMPPS(a, mn, mx) (_mm_min_ps((mx), _mm_max_ps((a), (mn))))
94#endif
95
96static inline float clamp_range_f(const float x, const float low, const float high)
97{
98 return x > high ? high : (x < low ? low : x);
99}
100
101// Kahan summation algorithm
102__OMP_DECLARE_SIMD__(aligned(c))
103static inline float Kahan_sum(const float m, float *const __restrict__ c, const float add)
104{
105 const float t1 = add - (*c);
106 const float t2 = m + t1;
107 *c = (t2 - m) - t1;
108 return t2;
109}
110
111static inline float Log2(float x)
112{
113 return (x > 0.0f) ? (logf(x) / DT_M_LN2f) : x;
114}
115
116static inline float Log2Thres(float x, float Thres)
117{
118 return logf(x > Thres ? x : Thres) / DT_M_LN2f;
119}
120
121// ensure that any changes here are synchronized with data/kernels/extended.cl
122static inline float fastlog2(float x)
123{
124 union { float f; uint32_t i; } vx = { x };
125 union { uint32_t i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
126
127 float y = vx.i;
128
129 y *= 1.1920928955078125e-7f;
130
131 return y - 124.22551499f
132 - 1.498030302f * mx.f
133 - 1.72587999f / (0.3520887068f + mx.f);
134}
135
136// ensure that any changes here are synchronized with data/kernels/extended.cl
137static inline float
138fastlog (float x)
139{
140 return DT_M_LN2f * fastlog2(x);
141}
142
143// multiply 3x3 matrix with 3x1 vector
144// dest needs to be different from v
146static inline void mat3mulv(float *const __restrict__ dest, const float *const mat, const float *const __restrict__ v)
147{
148 for(int k = 0; k < 3; k++)
149 {
150 float x = 0.0f;
151 for(int i = 0; i < 3; i++)
152 x += mat[3 * k + i] * v[i];
153 dest[k] = x;
154 }
155}
156
157// multiply two 3x3 matrices
158// dest needs to be different from m1 and m2
159// dest = m1 * m2 in this order
161static inline void mat3mul(float *const __restrict__ dest, const float *const __restrict__ m1, const float *const __restrict__ m2)
162{
163 for(int k = 0; k < 3; k++)
164 {
165 for(int i = 0; i < 3; i++)
166 {
167 float x = 0.0f;
168 for(int j = 0; j < 3; j++)
169 x += m1[3 * k + j] * m2[3 * j + i];
170 dest[3 * k + i] = x;
171 }
172 }
173}
174
176static inline void mul_mat_vec_2(const float *m, const float *p, float *o)
177{
178 o[0] = p[0] * m[0] + p[1] * m[1];
179 o[1] = p[0] * m[2] + p[1] * m[3];
180}
181
182__OMP_DECLARE_SIMD__(uniform(v_2) aligned(v_1, v_2:16))
183static inline float scalar_product(const dt_aligned_pixel_t v_1, const dt_aligned_pixel_t v_2)
184{
185 // specialized 3x1 dot products 2 4x1 RGB-alpha pixels.
186 // v_2 needs to be uniform along loop increments, e.g. independent from current pixel values
187 // we force an order of computation similar to SSE4 _mm_dp_ps() hoping the compiler will get the clue
188 float acc = 0.f;
189 __OMP_SIMD__(aligned(v_1, v_2:16) reduction(+:acc))
190 for(size_t c = 0; c < 3; c++) acc += v_1[c] * v_2[c];
191
192 return acc;
193}
194
195
197static inline float sqf(const float x)
198{
199 return x * x;
200}
201
202
203__OMP_DECLARE_SIMD__(aligned(vector:16))
204static inline float euclidean_norm(const dt_aligned_pixel_t vector)
205{
206 return fmaxf(sqrtf(sqf(vector[0]) + sqf(vector[1]) + sqf(vector[2])), NORM_MIN);
207}
208
209
210__OMP_DECLARE_SIMD__(aligned(vector:16))
211static inline void downscale_vector(dt_aligned_pixel_t vector, const float scaling)
212{
213 // check zero or NaN
214 const int valid = (scaling > NORM_MIN) && !isnan(scaling);
215 for(size_t c = 0; c < 3; c++) vector[c] = (valid) ? vector[c] / (scaling + NORM_MIN) : vector[c] / NORM_MIN;
216}
217
218
219__OMP_DECLARE_SIMD__(aligned(vector:16))
220static inline void upscale_vector(dt_aligned_pixel_t vector, const float scaling)
221{
222 const int valid = (scaling > NORM_MIN) && !isnan(scaling);
223 for(size_t c = 0; c < 3; c++) vector[c] = (valid) ? vector[c] * (scaling + NORM_MIN) : vector[c] * NORM_MIN;
224}
225
226
228static inline float dt_log2f(const float f)
229{
230#ifdef __GLIBC__
231 return log2f(f);
232#else
233 return logf(f) / logf(2.0f);
234#endif
235}
236
237union float_int {
238 float f;
239 int k;
240};
241
242// a faster, vectorizable version of hypotf() when we know that there won't be overflow, NaNs, or infinities
244static inline float dt_fast_hypotf(const float x, const float y)
245{
246 return sqrtf(x * x + y * y);
247}
248
249// fast approximation of expf()
250/****** if you change this function, you need to make the same change in data/kernels/{basecurve,basic}.cl ***/
252static inline float dt_fast_expf(const float x)
253{
254 // meant for the range [-100.0f, 0.0f]. largest error ~ -0.06 at 0.0f.
255 // will get _a_lot_ worse for x > 0.0f (9000 at 10.0f)..
256 const int i1 = 0x3f800000u;
257 // e^x, the comment would be 2^x
258 const int i2 = 0x402DF854u; // 0x40000000u;
259 // const int k = CLAMPS(i1 + x * (i2 - i1), 0x0u, 0x7fffffffu);
260 // without max clamping (doesn't work for large x, but is faster):
261 const int k0 = i1 + x * (i2 - i1);
262 union float_int u;
263 u.k = k0 > 0 ? k0 : 0;
264 return u.f;
265}
266
267static inline void dt_fast_expf_4wide(const float x[4], float result[4])
268{
269 // meant for the range [-100.0f, 0.0f]. largest error ~ -0.06 at 0.0f.
270 // will get _a_lot_ worse for x > 0.0f (9000 at 10.0f)..
271 const int i1 = 0x3f800000u;
272 // e^x, the comment would be 2^x
273 const int i2 = 0x402DF854u; // 0x40000000u;
274 // const int k = CLAMPS(i1 + x * (i2 - i1), 0x0u, 0x7fffffffu);
275 // without max clamping (doesn't work for large x, but is faster):
276 union float_int u[4];
277 __OMP_SIMD__(aligned(x, result))
278 for(size_t c = 0; c < 4; c++)
279 {
280 const int k0 = i1 + (int)(x[c] * (i2 - i1));
281 u[c].k = k0 > 0 ? k0 : 0;
282 result[c] = u[c].f;
283 }
284}
285
286// fast approximation of 2^-x for 0<x<126
287/****** if you change this function, you need to make the same change in data/kernels/{denoiseprofile,nlmeans}.cl ***/
288static inline float dt_fast_mexp2f(const float x)
289{
290 const int i1 = 0x3f800000; // bit representation of 2^0
291 const int i2 = 0x3f000000; // bit representation of 2^-1
292 const int k0 = i1 + (int)(x * (i2 - i1));
293 union {
294 float f;
295 int i;
296 } k;
297 k.i = k0 >= 0x800000 ? k0 : 0;
298 return k.f;
299}
300
301// The below version is incorrect, suffering from reduced precision.
302// It is used by the non-local means code in both nlmeans.c and
303// denoiseprofile.c, and fixing it would cause a change in output.
304static inline float fast_mexp2f(const float x)
305{
306 const float i1 = (float)0x3f800000u; // 2^0
307 const float i2 = (float)0x3f000000u; // 2^-1
308 const float k0 = i1 + x * (i2 - i1);
309 union {
310 float f;
311 int i;
312 } k;
313 k.i = k0 >= (float)0x800000u ? k0 : 0;
314 return k.f;
315}
316
322static inline float ceil_fast(float x)
323{
324 if(x <= 0.f)
325 {
326 return (float)(int)x;
327 }
328 else
329 {
330 return -((float)(int)-x) + 1.f;
331 }
332}
333
334#if defined(__x86_64__) || defined(__i386__)
static inline __m128 _mm_abs_ps(__m128 t)
339{
340 static const uint32_t signmask[4] __attribute__((aligned(64)))
341 = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
342 return _mm_and_ps(*(__m128 *)signmask, t);
343}
344#endif
345
358static inline float sinf_fast(float t)
359{
360 /***** if you change this function, you must also change the copy in data/kernels/basic.cl *****/
361 static const float a = 4 / (M_PI * M_PI);
362 static const float p = 0.225f;
363
364 t = a * t * (M_PI_F - fabsf(t));
365
366 return t * (p * (fabsf(t) - 1) + 1);
367}
368
369#if defined(__x86_64__) || defined(__i386__)
382static inline __m128 sinf_fast_sse(__m128 t)
383{
384 static const __m128 a
385 = { 4.f / (M_PI * M_PI), 4.f / (M_PI * M_PI), 4.f / (M_PI * M_PI), 4.f / (M_PI * M_PI) };
386 static const __m128 p = { 0.225f, 0.225f, 0.225f, 0.225f };
387 static const __m128 pi = { M_PI, M_PI, M_PI, M_PI };
388
389 // m4 = a*t*(M_PI - fabsf(t));
390 const __m128 m1 = _mm_abs_ps(t);
391 const __m128 m2 = _mm_sub_ps(pi, m1);
392 const __m128 m3 = _mm_mul_ps(t, m2);
393 const __m128 m4 = _mm_mul_ps(a, m3);
394
395 // p*(m4*fabsf(m4) - m4) + m4;
396 const __m128 n1 = _mm_abs_ps(m4);
397 const __m128 n2 = _mm_mul_ps(m4, n1);
398 const __m128 n3 = _mm_sub_ps(n2, m4);
399 const __m128 n4 = _mm_mul_ps(p, n3);
400
401 return _mm_add_ps(n4, m4);
402}
403#endif
404
405
413static inline int ipow(int base, int exp)
414{
415 int result = 1;
416 for(;;)
417 {
418 if (exp & 1)
419 result *= base;
420 exp >>= 1;
421 if (!exp)
422 break;
423 base *= base;
424 }
425 return result;
426}
427
440static inline void dt_vector_sin(const dt_aligned_pixel_t arg,
442{
443 static const dt_aligned_pixel_t pi = { M_PI_F, M_PI_F, M_PI_F, M_PI_F };
444 static const dt_aligned_pixel_t a
445 = { 4 / (M_PI_F * M_PI_F),
446 4 / (M_PI_F * M_PI_F),
447 4 / (M_PI_F * M_PI_F),
448 4 / (M_PI_F * M_PI_F) };
449 static const dt_aligned_pixel_t p = { 0.225f, 0.225f, 0.225f, 0.225f };
450 static const dt_aligned_pixel_t one = { 1.0f, 1.0f, 1.0f, 1.0f };
451
452 dt_aligned_pixel_t abs_arg;
454 abs_arg[c] = (arg[c] < 0.0f) ? -arg[c] : arg[c];
455 dt_aligned_pixel_t scaled;
457 scaled[c] = a[c] * arg[c] * (pi[c] - abs_arg[c]);
458 dt_aligned_pixel_t abs_scaled;
460 abs_scaled[c] = (scaled[c] < 0.0f) ? -scaled[c] : scaled[c];
462 sine[c] = scaled[c] * (p[c] * (abs_scaled[c] - one[c]) + one[c]);
463}
464
469static inline float f_inv_sqrtf(const float x)
470{
471 if(x <= 1e-16f) return 0.0f;
472
473 union
474 {
475 float f;
476 uint32_t i;
477 } conv = { x };
478
479 conv.i = 0x5f3759dfu - (conv.i >> 1);
480 float y = conv.f;
481 // One Newton-Raphson iteration is accurate enough for geometry offsets.
482 y = y * (1.5f - 0.5f * x * y * y);
483 return y;
484}
485
486// clang-format off
487// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
488// vim: shiftwidth=2 expandtab tabstop=2 cindent
489// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
490// clang-format on
#define m
Definition basecurve.c:277
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
Definition chromatic_adaptation.h:309
static const float scaling
Definition chromatic_adaptation.h:293
const float i
Definition colorspaces_inline_conversions.h:440
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:102
for(size_t c=0;c< 3;c++) sRGB[c]
typedef void((*dt_cache_allocate_t)(void *userdata, dt_cache_entry_t *entry))
#define __OMP_SIMD__(...)
Definition darktable.h:262
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
#define __OMP_DECLARE_SIMD__(...)
Definition darktable.h:263
#define for_four_channels(_var,...)
Definition darktable.h:664
static const float x
Definition iop_profile.h:235
const int t
Definition iop_profile.h:225
const float v
Definition iop_profile.h:221
float *const restrict const size_t k
Definition luminance_mask.h:78
static float Kahan_sum(const float m, float *const __restrict__ c, const float add)
Definition math.h:103
static float ceil_fast(float x)
Definition math.h:322
static float clamp_range_f(const float x, const float low, const float high)
Definition math.h:96
#define DT_M_LN2f
Definition math.h:55
static int ipow(int base, int exp)
Fast integer power, computing base^exp.
Definition math.h:413
static void mul_mat_vec_2(const float *m, const float *p, float *o)
Definition math.h:176
#define NORM_MIN
Definition math.h:35
static float f_inv_sqrtf(const float x)
Definition math.h:469
static float fast_mexp2f(const float x)
Definition math.h:304
static float sinf_fast(float t)
Definition math.h:358
c< 3;c++) acc+=v_1[c] *v_2[c];return acc;}static inline float sqf(const float x){ return x *x;}static inline float euclidean_norm(const dt_aligned_pixel_t vector){ return fmaxf(sqrtf(sqf(vector[0])+sqf(vector[1])+sqf(vector[2])), 1.52587890625e-05f);}static inline void downscale_vector(dt_aligned_pixel_t vector, const float scaling){ const int valid=(scaling > 1.52587890625e-05f) &&!isnan(scaling);for(size_t c=0;c< 3;c++) vector[c]=(valid) ? vector[c]/(scaling+1.52587890625e-05f) :vector[c]/1.52587890625e-05f ;}static inline void upscale_vector(dt_aligned_pixel_t vector, const float scaling){ const int valid=(scaling > 1.52587890625e-05f) &&!isnan(scaling);for(size_t c=0;c< 3;c++) vector[c]=(valid) ? vector[c] *(scaling+1.52587890625e-05f) :vector[c] *1.52587890625e-05f ;}static inline float dt_log2f(const float f){ return logf(f)/logf(2.0f);}union float_int { float f;int k;};static inline float dt_fast_hypotf(const float x, const float y){ return sqrtf(x *x+y *y);}static inline float dt_fast_expf(const float x){ const int i1=0x3f800000u;const int i2=0x402DF854u;const int k0=i1+x *(i2 - i1);union float_int u;u.k=k0 > k0
Definition math.h:263
return u f
Definition math.h:264
static float dt_fast_mexp2f(const float x)
Definition math.h:288
static void mat3mul(float *const __restrict__ dest, const float *const __restrict__ m1, const float *const __restrict__ m2)
Definition math.h:161
const dt_aligned_pixel_t v_2
Definition math.h:184
static void dt_vector_sin(const dt_aligned_pixel_t arg, dt_aligned_pixel_t sine)
Definition math.h:440
#define M_PI_F
Definition math.h:48
static float fastlog2(float x)
Definition math.h:122
static float fastlog(float x)
Definition math.h:138
static float Log2(float x)
Definition math.h:111
static float Log2Thres(float x, float Thres)
Definition math.h:116
static void dt_fast_expf_4wide(const float x[4], float result[4])
Definition math.h:267
#define M_PI
Definition math.h:45
static void mat3mulv(float *const __restrict__ dest, const float *const mat, const float *const __restrict__ v)
Definition math.h:146
c
Definition derive_filmic_v6_gamut_mapping.py:35
float dt_aligned_pixel_t[4]
Definition noiseprofile.c:28