27#define NORM_MIN 1.52587890625e-05f
32#define M_LN10 2.30258509299404568402
37#define M_PI 3.14159265358979323846
40#define M_PI_F 3.14159265358979324f
44#define DT_M_PI_F (3.14159265358979324f)
45#define DT_M_PI (3.14159265358979324)
47#define DT_M_LN2f (0.6931471805599453f)
52 #define DT_FMA(x, y, z) fmaf(x, y, z)
54 #define DT_FMA(x, y, z) ((x) * (y) + (z))
59#define PHI 1.61803398874989479F
64#define INVPHI 0.61803398874989479F
68#define CLAMPS(A, L, H) ((A) > (L) ? ((A) < (H) ? (A) : (H)) : (L))
73#define CLIP(x) (((x) >= 0) ? ((x) <= 1 ? (x) : 1) : 0)
74#define MM_CLIP_PS(X) (_mm_min_ps(_mm_max_ps((X), _mm_setzero_ps()), _mm_set1_ps(1.0)))
77#define LCLIP(x) ((x < 0) ? 0.0 : (x > 100.0) ? 100.0 : x)
81#define CLAMPF(a, mn, mx) ((a) >= (mn) ? ((a) <= (mx) ? (a) : (mx)) : (mn))
85#define MMCLAMPPS(a, mn, mx) (_mm_min_ps((mx), _mm_max_ps((a), (mn))))
88static inline float clamp_range_f(
const float x,
const float low,
const float high)
90 return x > high ? high : (x < low ? low : x);
95#pragma omp declare simd aligned(c)
97static inline float Kahan_sum(
const float m,
float *
const __restrict__ c,
const float add)
99 const float t1 = add - (*c);
100 const float t2 =
m + t1;
107static inline __m128 Kahan_sum_sse(
const __m128
m, __m128 *
const __restrict__ c,
const __m128 add)
109 const __m128 t1 = add - (*c);
110 const __m128 t2 =
m + t1;
116static inline float Log2(
float x)
118 return (x > 0.0f) ? (logf(x) /
DT_M_LN2f) : x;
123 return logf(x > Thres ? x : Thres) /
DT_M_LN2f;
129 union {
float f; uint32_t i; } vx = { x };
130 union { uint32_t i;
float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
134 y *= 1.1920928955078125e-7f;
136 return y - 124.22551499f
137 - 1.498030302f * mx.f
138 - 1.72587999f / (0.3520887068f + mx.f);
151#pragma omp declare simd
153static inline void mat3mulv(
float *
const __restrict__ dest,
const float *
const mat,
const float *
const __restrict__ v)
155 for(
int k = 0; k < 3; k++)
158 for(
int i = 0; i < 3; i++)
159 x += mat[3 * k + i] * v[i];
168#pragma omp declare simd
170static inline void mat3mul(
float *
const __restrict__ dest,
const float *
const __restrict__ m1,
const float *
const __restrict__ m2)
172 for(
int k = 0; k < 3; k++)
174 for(
int i = 0; i < 3; i++)
177 for(
int j = 0; j < 3; j++)
178 x += m1[3 * k + j] * m2[3 * j + i];
185#pragma omp declare simd
189 o[0] = p[0] *
m[0] + p[1] *
m[1];
190 o[1] = p[0] *
m[2] + p[1] *
m[3];
194#pragma omp declare simd uniform(v_2) aligned(v_1, v_2:16)
196static inline float scalar_product(
const dt_aligned_pixel_t v_1,
const dt_aligned_pixel_t v_2)
204#pragma omp simd aligned(v_1, v_2:16) reduction(+:acc)
206 for(
size_t c = 0; c < 3; c++) acc += v_1[c] * v_2[c];
213#pragma omp declare simd
215static inline float sqf(
const float x)
222#pragma omp declare simd aligned(vector:16)
231#pragma omp declare simd aligned(vector:16)
236 const int valid = (scaling >
NORM_MIN) && !isnan(scaling);
237 for(
size_t c = 0; c < 3; c++) vector[c] = (valid) ? vector[c] / (scaling +
NORM_MIN) : vector[c] /
NORM_MIN;
242#pragma omp declare simd aligned(vector:16)
246 const int valid = (scaling >
NORM_MIN) && !isnan(scaling);
247 for(
size_t c = 0; c < 3; c++) vector[c] = (valid) ? vector[c] * (scaling +
NORM_MIN) : vector[c] *
NORM_MIN;
252#pragma omp declare simd
259 return logf(
f) / logf(2.0f);
270#pragma omp declare simd
274 return sqrtf(x * x + y * y);
280#pragma omp declare simd
286 const int i1 = 0x3f800000u;
288 const int i2 = 0x402DF854u;
291 const int k0 = i1 + x * (i2 - i1);
293 u.
k = k0 > 0 ? k0 : 0;
301 const int i1 = 0x3f800000u;
303 const int i2 = 0x402DF854u;
308#pragma omp simd aligned(x, result)
310 for(
size_t c = 0; c < 4; c++)
312 const int k0 = i1 + (int)(x[c] * (i2 - i1));
313 u[c].
k = k0 > 0 ? k0 : 0;
319#define ALIGNED(a) __attribute__((aligned(a)))
326static const __m128 dt__fone ALIGNED(64) = VEC4(0x3f800000u);
327static const __m128 femo ALIGNED(64) = VEC4(0x00adf880u);
328static inline __m128 dt_fast_expf_sse2(
const __m128 x)
330 __m128
f = dt__fone + (x * femo);
331 __m128i i = _mm_cvtps_epi32(
f);
332 __m128i
mask = _mm_srai_epi32(i, 31);
333 i = _mm_andnot_si128(mask, i);
334 return _mm_castsi128_ps(i);
345 const int i1 = 0x3f800000;
346 const int i2 = 0x3f000000;
347 const int k0 = i1 + (int)(x * (i2 - i1));
352 k.i = k0 >= 0x800000 ? k0 : 0;
361 const float i1 = (float)0x3f800000u;
362 const float i2 = (float)0x3f000000u;
363 const float k0 = i1 + x * (i2 - i1);
368 k.i = k0 >= (float)0x800000u ? k0 : 0;
381 return (
float)(int)x;
385 return -((float)(
int)-x) + 1.f;
static inline __m128 _mm_abs_ps(__m128 t)
395 static const uint32_t signmask[4]
__attribute__((aligned(64)))
396 = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
397 return _mm_and_ps(*(__m128 *)signmask, t);
416 static const float a = 4 / (
M_PI *
M_PI);
417 static const float p = 0.225f;
419 t = a * t * (
M_PI_F - fabsf(t));
421 return t * (p * (fabsf(t) - 1) + 1);
437static inline __m128 sinf_fast_sse(__m128 t)
439 static const __m128 a
441 static const __m128
p = { 0.225f, 0.225f, 0.225f, 0.225f };
445 const __m128 m1 = _mm_abs_ps(t);
446 const __m128 m2 = _mm_sub_ps(pi, m1);
447 const __m128 m3 = _mm_mul_ps(t, m2);
448 const __m128 m4 = _mm_mul_ps(a, m3);
451 const __m128 n1 = _mm_abs_ps(m4);
452 const __m128 n2 = _mm_mul_ps(m4, n1);
453 const __m128 n3 = _mm_sub_ps(n2, m4);
454 const __m128 n4 = _mm_mul_ps(p, n3);
456 return _mm_add_ps(n4, m4);
468static inline int ipow(
int base,
int exp)
496 dt_aligned_pixel_t sine)
499 static const dt_aligned_pixel_t a
504 static const dt_aligned_pixel_t p = { 0.225f, 0.225f, 0.225f, 0.225f };
505 static const dt_aligned_pixel_t one = { 1.0f, 1.0f, 1.0f, 1.0f };
507 dt_aligned_pixel_t abs_arg;
509 abs_arg[c] = (arg[c] < 0.0f) ? -arg[c] : arg[c];
510 dt_aligned_pixel_t scaled;
512 scaled[c] = a[c] * arg[c] * (pi[c] - abs_arg[c]);
513 dt_aligned_pixel_t abs_scaled;
515 abs_scaled[c] = (scaled[c] < 0.0f) ? -scaled[c] : scaled[c];
517 sine[c] = scaled[c] * (p[c] * (abs_scaled[c] - one[c]) + one[c]);
#define m
Definition basecurve.c:231
#define for_four_channels(_var,...)
Definition darktable.h:413
static float f(const float t, const float c, const float x)
Definition graduatednd.c:173
static float Kahan_sum(const float m, float *const __restrict__ c, const float add)
Definition math.h:97
static float scalar_product(const dt_aligned_pixel_t v_1, const dt_aligned_pixel_t v_2)
Definition math.h:196
static float ceil_fast(float x)
Definition math.h:377
static float clamp_range_f(const float x, const float low, const float high)
Definition math.h:88
static float sqf(const float x)
Definition math.h:215
static float dt_log2f(const float f)
Definition math.h:254
#define DT_M_LN2f
Definition math.h:47
static int ipow(int base, int exp)
Fast integer power, computing base^exp.
Definition math.h:468
static void mul_mat_vec_2(const float *m, const float *p, float *o)
Definition math.h:187
#define NORM_MIN
Definition math.h:27
static void upscale_vector(dt_aligned_pixel_t vector, const float scaling)
Definition math.h:244
static float fast_mexp2f(const float x)
Definition math.h:359
static float sinf_fast(float t)
Definition math.h:413
static float dt_fast_expf(const float x)
Definition math.h:282
static float dt_fast_hypotf(const float x, const float y)
Definition math.h:272
static float dt_fast_mexp2f(const float x)
Definition math.h:343
static void mat3mul(float *const __restrict__ dest, const float *const __restrict__ m1, const float *const __restrict__ m2)
Definition math.h:170
static float euclidean_norm(const dt_aligned_pixel_t vector)
Definition math.h:224
static void dt_vector_sin(const dt_aligned_pixel_t arg, dt_aligned_pixel_t sine)
Definition math.h:495
#define M_PI_F
Definition math.h:40
static float fastlog2(float x)
Definition math.h:127
static float fastlog(float x)
Definition math.h:143
static float Log2(float x)
Definition math.h:116
static float Log2Thres(float x, float Thres)
Definition math.h:121
static void dt_fast_expf_4wide(const float x[4], float result[4])
Definition math.h:297
#define M_PI
Definition math.h:37
static void mat3mulv(float *const __restrict__ dest, const float *const mat, const float *const __restrict__ v)
Definition math.h:153
static void downscale_vector(dt_aligned_pixel_t vector, const float scaling)
Definition math.h:233
c
Definition derive_filmic_v6_gamut_mapping.py:11
mask
Definition dtstyle_to_xmp.py:54
static float __attribute__((__unused__))
Definition thinplate.c:39
float f
Definition math.h:264
int k
Definition math.h:265