35#define NORM_MIN 1.52587890625e-05f
40#define M_LN10 2.30258509299404568402
45#define M_PI 3.14159265358979323846
48#define M_PI_F 3.14159265358979324f
52#define DT_M_PI_F (3.14159265358979324f)
53#define DT_M_PI (3.14159265358979324)
55#define DT_M_LN2f (0.6931471805599453f)
60 #define DT_FMA(x, y, z) fmaf(x, y, z)
62 #define DT_FMA(x, y, z) ((x) * (y) + (z))
67#define PHI 1.61803398874989479F
72#define INVPHI 0.61803398874989479F
76#define CLAMPS(A, L, H) ((A) > (L) ? ((A) < (H) ? (A) : (H)) : (L))
81#define CLIP(x) (((x) >= 0) ? ((x) <= 1 ? (x) : 1) : 0)
82#define MM_CLIP_PS(X) (_mm_min_ps(_mm_max_ps((X), _mm_setzero_ps()), _mm_set1_ps(1.0)))
85#define LCLIP(x) ((x < 0) ? 0.0 : (x > 100.0) ? 100.0 : x)
89#define CLAMPF(a, mn, mx) ((a) >= (mn) ? ((a) <= (mx) ? (a) : (mx)) : (mn))
93#define MMCLAMPPS(a, mn, mx) (_mm_min_ps((mx), _mm_max_ps((a), (mn))))
96static inline float clamp_range_f(
const float x,
const float low,
const float high)
98 return x > high ? high : (
x < low ? low :
x);
103#pragma omp declare simd aligned(c)
105static inline float Kahan_sum(
const float m,
float *
const __restrict__
c,
const float add)
107 const float t1 = add - (*c);
108 const float t2 =
m + t1;
115static inline __m128 Kahan_sum_sse(
const __m128
m, __m128 *
const __restrict__
c,
const __m128 add)
117 const __m128 t1 = add - (*c);
118 const __m128 t2 =
m + t1;
137 union {
float f; uint32_t
i; } vx = {
x };
138 union { uint32_t
i;
float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
142 y *= 1.1920928955078125e-7f;
144 return y - 124.22551499f
145 - 1.498030302f * mx.f
146 - 1.72587999f / (0.3520887068f + mx.f);
159#pragma omp declare simd
161static inline void mat3mulv(
float *
const __restrict__ dest,
const float *
const mat,
const float *
const __restrict__
v)
163 for(
int k = 0; k < 3; k++)
166 for(
int i = 0;
i < 3;
i++)
167 x += mat[3 * k +
i] *
v[
i];
176#pragma omp declare simd
178static inline void mat3mul(
float *
const __restrict__ dest,
const float *
const __restrict__ m1,
const float *
const __restrict__ m2)
180 for(
int k = 0; k < 3; k++)
182 for(
int i = 0;
i < 3;
i++)
185 for(
int j = 0; j < 3; j++)
186 x += m1[3 * k + j] * m2[3 * j +
i];
193#pragma omp declare simd
197 o[0] =
p[0] *
m[0] +
p[1] *
m[1];
198 o[1] =
p[0] *
m[2] +
p[1] *
m[3];
202#pragma omp declare simd uniform(v_2) aligned(v_1, v_2:16)
204static inline float scalar_product(
const dt_aligned_pixel_t v_1,
const dt_aligned_pixel_t v_2)
212#pragma omp simd aligned(v_1, v_2:16) reduction(+:acc)
214 for(
size_t c = 0;
c < 3;
c++) acc += v_1[
c] * v_2[
c];
221#pragma omp declare simd
223static inline float sqf(
const float x)
230#pragma omp declare simd aligned(vector:16)
239#pragma omp declare simd aligned(vector:16)
250#pragma omp declare simd aligned(vector:16)
260#pragma omp declare simd
267 return logf(
f) / logf(2.0f);
278#pragma omp declare simd
282 return sqrtf(
x *
x + y * y);
288#pragma omp declare simd
294 const int i1 = 0x3f800000u;
296 const int i2 = 0x402DF854u;
299 const int k0 = i1 +
x * (i2 - i1);
301 u.
k = k0 > 0 ? k0 : 0;
309 const int i1 = 0x3f800000u;
311 const int i2 = 0x402DF854u;
316#pragma omp simd aligned(x, result)
318 for(
size_t c = 0;
c < 4;
c++)
320 const int k0 = i1 + (int)(
x[
c] * (i2 - i1));
321 u[
c].
k = k0 > 0 ? k0 : 0;
327#define ALIGNED(a) __attribute__((aligned(a)))
334static const __m128 dt__fone ALIGNED(64) = VEC4(0x3f800000u);
335static const __m128 femo ALIGNED(64) = VEC4(0x00adf880u);
336static inline __m128 dt_fast_expf_sse2(
const __m128
x)
338 __m128
f = dt__fone + (
x * femo);
339 __m128i
i = _mm_cvtps_epi32(
f);
340 __m128i
mask = _mm_srai_epi32(
i, 31);
341 i = _mm_andnot_si128(mask,
i);
342 return _mm_castsi128_ps(
i);
353 const int i1 = 0x3f800000;
354 const int i2 = 0x3f000000;
355 const int k0 = i1 + (int)(
x * (i2 - i1));
360 k.i = k0 >= 0x800000 ? k0 : 0;
369 const float i1 = (float)0x3f800000u;
370 const float i2 = (float)0x3f000000u;
371 const float k0 = i1 +
x * (i2 - i1);
376 k.i = k0 >= (float)0x800000u ? k0 : 0;
389 return (
float)(int)
x;
393 return -((float)(
int)-
x) + 1.f;
static inline __m128 _mm_abs_ps(__m128 t)
403 static const uint32_t signmask[4]
__attribute__((aligned(64)))
404 = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
405 return _mm_and_ps(*(__m128 *)signmask,
t);
424 static const float a = 4 / (
M_PI *
M_PI);
425 static const float p = 0.225f;
429 return t * (
p * (fabsf(
t) - 1) + 1);
445static inline __m128 sinf_fast_sse(__m128
t)
447 static const __m128
a
449 static const __m128
p = { 0.225f, 0.225f, 0.225f, 0.225f };
453 const __m128 m1 = _mm_abs_ps(
t);
454 const __m128 m2 = _mm_sub_ps(pi, m1);
455 const __m128 m3 = _mm_mul_ps(
t, m2);
456 const __m128 m4 = _mm_mul_ps(
a, m3);
459 const __m128 n1 = _mm_abs_ps(m4);
460 const __m128 n2 = _mm_mul_ps(m4, n1);
461 const __m128 n3 = _mm_sub_ps(n2, m4);
462 const __m128 n4 = _mm_mul_ps(
p, n3);
464 return _mm_add_ps(n4, m4);
476static inline int ipow(
int base,
int exp)
504 dt_aligned_pixel_t sine)
507 static const dt_aligned_pixel_t
a
512 static const dt_aligned_pixel_t
p = { 0.225f, 0.225f, 0.225f, 0.225f };
513 static const dt_aligned_pixel_t one = { 1.0f, 1.0f, 1.0f, 1.0f };
515 dt_aligned_pixel_t abs_arg;
517 abs_arg[
c] = (arg[
c] < 0.0f) ? -arg[
c] : arg[
c];
518 dt_aligned_pixel_t scaled;
520 scaled[
c] =
a[
c] * arg[
c] * (pi[
c] - abs_arg[
c]);
521 dt_aligned_pixel_t abs_scaled;
523 abs_scaled[
c] = (scaled[
c] < 0.0f) ? -scaled[
c] : scaled[
c];
525 sine[
c] = scaled[
c] * (
p[
c] * (abs_scaled[
c] - one[
c]) + one[
c]);
534 if(
x <= 1e-16f)
return 0.0f;
542 conv.i = 0x5f3759dfu - (conv.i >> 1);
545 y = y * (1.5f - 0.5f *
x * y * y);
#define m
Definition basecurve.c:277
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
Definition chromatic_adaptation.h:315
static const float scaling
Definition chromatic_adaptation.h:299
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
const float a
Definition colorspaces_inline_conversions.h:1292
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Multi-tap smudge source sample with directional jitter.
Definition darktable.h:448
#define for_four_channels(_var,...)
Definition darktable.h:584
static const float x
Definition iop_profile.h:239
const int t
Definition iop_profile.h:227
static const float v
Definition iop_profile.h:223
static float Kahan_sum(const float m, float *const __restrict__ c, const float add)
Definition math.h:105
static float scalar_product(const dt_aligned_pixel_t v_1, const dt_aligned_pixel_t v_2)
Definition math.h:204
static float ceil_fast(float x)
Definition math.h:385
static float clamp_range_f(const float x, const float low, const float high)
Definition math.h:96
static float sqf(const float x)
Definition math.h:223
static float dt_log2f(const float f)
Definition math.h:262
#define DT_M_LN2f
Definition math.h:55
static int ipow(int base, int exp)
Fast integer power, computing base^exp.
Definition math.h:476
static void mul_mat_vec_2(const float *m, const float *p, float *o)
Definition math.h:195
#define NORM_MIN
Definition math.h:35
static void upscale_vector(dt_aligned_pixel_t vector, const float scaling)
Definition math.h:252
static float f_inv_sqrtf(const float x)
Definition math.h:532
static float fast_mexp2f(const float x)
Definition math.h:367
static float sinf_fast(float t)
Definition math.h:421
static float dt_fast_expf(const float x)
Definition math.h:290
static float dt_fast_hypotf(const float x, const float y)
Definition math.h:280
static float dt_fast_mexp2f(const float x)
Definition math.h:351
static void mat3mul(float *const __restrict__ dest, const float *const __restrict__ m1, const float *const __restrict__ m2)
Definition math.h:178
static float euclidean_norm(const dt_aligned_pixel_t vector)
Definition math.h:232
static void dt_vector_sin(const dt_aligned_pixel_t arg, dt_aligned_pixel_t sine)
Definition math.h:503
#define M_PI_F
Definition math.h:48
static float fastlog2(float x)
Definition math.h:135
static float fastlog(float x)
Definition math.h:151
static float Log2(float x)
Definition math.h:124
static float Log2Thres(float x, float Thres)
Definition math.h:129
static void dt_fast_expf_4wide(const float x[4], float result[4])
Definition math.h:305
#define M_PI
Definition math.h:45
static void mat3mulv(float *const __restrict__ dest, const float *const mat, const float *const __restrict__ v)
Definition math.h:161
static void downscale_vector(dt_aligned_pixel_t vector, const float scaling)
Definition math.h:241
mask
Definition dtstyle_to_xmp.py:79
float f
Definition math.h:272
int k
Definition math.h:273