36#if !defined _XOPEN_SOURCE && !defined(__DragonFly__) && !defined(__FreeBSD__) && !defined(__NetBSD__) \
37 && !defined(__OpenBSD__) && !defined(_WIN32)
58 for(
int k = 0;
k < num_threads;
k++)
61 p->s[
k].state1 = 2 +
k;
74 p->s[thread_num].state0 = s0;
79 p->s[thread_num].state1 = s1;
86 ((
p->s[thread_num].state0 +
p->s[thread_num].state1) >> 41);
106#warning "MEXP is not defined. I assume MEXP is 19937."
118#define N (MEXP / 128 + 1)
169#include "SFMT-params607.h"
171#include "SFMT-params1279.h"
173#include "SFMT-params2281.h"
175#include "SFMT-params4253.h"
177#include "SFMT-params11213.h"
179#include "SFMT-params19937.h"
181#include "SFMT-params44497.h"
183#include "SFMT-params86243.h"
185#include "SFMT-params132049.h"
187#include "SFMT-params216091.h"
190#error "MEXP is not valid."
201#ifndef SFMT_PARAMS19937_H
202#define SFMT_PARAMS19937_H
209#define MSK1 0xdfffffefU
210#define MSK2 0xddfecb7fU
211#define MSK3 0xbffaffffU
212#define MSK4 0xbffffff6U
213#define PARITY1 0x00000001U
214#define PARITY2 0x00000000U
215#define PARITY3 0x00000000U
216#define PARITY4 0x13c9e684U
229 MSK1, MSK2, MSK3, MSK4 \
233 MSK2, MSK1, MSK4, MSK3 \
235#define ALTI_SL2_PERM \
237 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 \
239#define ALTI_SL2_PERM64 \
241 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 \
243#define ALTI_SR2_PERM \
245 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 \
247#define ALTI_SR2_PERM64 \
249 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 \
251#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
262typedef struct sfmt_state_t
268#if !defined(BIG_ENDIAN64) || defined(ONLY64)
316#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
318#elif defined(_MSC_VER) || defined(__BORLANDC__)
319typedef unsigned int uint32_t;
321#define inline __inline
325#define inline __inline__
330#if defined(_MSC_VER) || defined(__BORLANDC__)
340#define ALWAYSINLINE __attribute__((always_inline))
347#define PRE_ALWAYS __forceinline
349#define PRE_ALWAYS inline
352#define PRE_ALWAYS inline
355static inline uint32_t gen_rand32(
struct sfmt_state_t *s);
356static inline uint64_t gen_rand64(
struct sfmt_state_t *s);
357static inline void init_gen_rand(
struct sfmt_state_t *s, uint32_t seed)
__attribute__((unused));
358static inline void init_by_array(
struct sfmt_state_t *s, uint32_t *init_key,
int key_length)
360static inline const char *get_idstring(
void)
__attribute__((unused));
362inline static float to_real2f(uint32_t
v)
368 x.u = 0x3f800000 | (
v >> 9);
372inline static float genrand_real2f(
struct sfmt_state_t *s)
374 return to_real2f(gen_rand32(s));
396PRE_ALWAYS
static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i
d,
397 __m128i mask) ALWAYSINLINE;
408PRE_ALWAYS
static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i
d, __m128i mask)
412 x = _mm_load_si128(a);
413 y = _mm_srli_epi32(*b, SR1);
414 z = _mm_srli_si128(c, SR2);
415 v = _mm_slli_epi32(
d, SL1);
416 z = _mm_xor_si128(z,
x);
417 z = _mm_xor_si128(z,
v);
418 x = _mm_slli_si128(
x, SL2);
419 y = _mm_and_si128(y, mask);
420 z = _mm_xor_si128(z,
x);
421 z = _mm_xor_si128(z, y);
429inline static void gen_rand_all(
struct sfmt_state_t *s)
432 __m128i
r, r1, r2,
mask;
433 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
435 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
436 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
437 for(
i = 0;
i <
N - POS1;
i++)
439 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1].si), r1, r2, mask);
440 _mm_store_si128(&(s->sfmt[
i].si),
r);
446 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1 -
N].si), r1, r2, mask);
447 _mm_store_si128(&(s->sfmt[
i].si),
r);
460inline static void gen_rand_array(
struct sfmt_state_t *s, w128_t *array,
int size)
463 __m128i
r, r1, r2,
mask;
464 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
466 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
467 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
468 for(
i = 0;
i <
N - POS1;
i++)
470 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1].si), r1, r2, mask);
471 _mm_store_si128(&array[
i].si,
r);
477 r = mm_recursion(&(s->sfmt[
i].si), &array[
i + POS1 -
N].si, r1, r2, mask);
478 _mm_store_si128(&array[
i].si,
r);
485 r = mm_recursion(&array[
i -
N].si, &array[
i + POS1 -
N].si, r1, r2, mask);
486 _mm_store_si128(&array[
i].si,
r);
490 for(j = 0; j < 2 *
N -
size; j++)
492 r = _mm_load_si128(&array[j +
size -
N].si);
493 _mm_store_si128(&(s->sfmt[j].si),
r);
497 r = mm_recursion(&array[
i -
N].si, &array[
i + POS1 -
N].si, r1, r2, mask);
498 _mm_store_si128(&array[
i].si,
r);
499 _mm_store_si128(&(s->sfmt[j++].si),
r);
523#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
524#define BIG_ENDIAN64 1
526#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
527#define BIG_ENDIAN64 1
529#if defined(ONLY64) && !defined(BIG_ENDIAN64)
531#error "-DONLY64 must be specified with -DBIG_ENDIAN64"
549static w128_t sfmt[
N];
551static uint32_t *psfmt32 = &sfmt[0].u[0];
552#if !defined(BIG_ENDIAN64) || defined(ONLY64)
560static int initialized = 0;
562static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
568inline static int idxof(
int i);
569inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift);
570inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift);
571inline static void gen_rand_all(sfmt_state_t *s);
572inline static void gen_rand_array(sfmt_state_t *s, w128_t *array,
int size);
573inline static uint32_t func1(uint32_t
x);
574inline static uint32_t func2(uint32_t
x);
575static void period_certification(sfmt_state_t *s);
576#if defined(BIG_ENDIAN64) && !defined(ONLY64)
577inline static void swap(w128_t *array,
int size);
591inline static int idxof(
int i)
596inline static int idxof(
int i)
610inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift)
617 oh = th >> (shift * 8);
618 ol = tl >> (shift * 8);
619 ol |= th << (64 - shift * 8);
620 out->u[0] = (uint32_t)(ol >> 32);
621 out->u[1] = (uint32_t)ol;
622 out->u[2] = (uint32_t)(oh >> 32);
623 out->u[3] = (uint32_t)oh;
626inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift)
633 oh = th >> (shift * 8);
634 ol = tl >> (shift * 8);
635 ol |= th << (64 - shift * 8);
636 out->u[1] = (uint32_t)(ol >> 32);
637 out->u[0] = (uint32_t)ol;
638 out->u[3] = (uint32_t)(oh >> 32);
639 out->u[2] = (uint32_t)oh;
651inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift)
658 oh = th << (shift * 8);
659 ol = tl << (shift * 8);
660 oh |= tl >> (64 - shift * 8);
661 out->u[0] = (uint32_t)(ol >> 32);
662 out->u[1] = (uint32_t)ol;
663 out->u[2] = (uint32_t)(oh >> 32);
664 out->u[3] = (uint32_t)oh;
667inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift)
674 oh = th << (shift * 8);
675 ol = tl << (shift * 8);
676 oh |= tl >> (64 - shift * 8);
677 out->u[1] = (uint32_t)(ol >> 32);
678 out->u[0] = (uint32_t)ol;
679 out->u[3] = (uint32_t)(oh >> 32);
680 out->u[2] = (uint32_t)oh;
692#if(!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
694inline static void do_recursion(w128_t *
r, w128_t *a, w128_t *b, w128_t *c, w128_t *
d)
699 lshift128(&
x, a, SL2);
700 rshift128(&y, c, SR2);
701 r->u[0] = a->u[0] ^
x.u[0] ^ ((
b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (
d->u[0] << SL1);
702 r->u[1] = a->u[1] ^
x.u[1] ^ ((
b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (
d->u[1] << SL1);
703 r->u[2] = a->u[2] ^
x.u[2] ^ ((
b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (
d->u[2] << SL1);
704 r->u[3] = a->u[3] ^
x.u[3] ^ ((
b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (
d->u[3] << SL1);
707inline static void do_recursion(w128_t *
r, w128_t *a, w128_t *b, w128_t *c, w128_t *
d)
712 lshift128(&
x, a, SL2);
713 rshift128(&y, c, SR2);
714 r->u[0] = a->u[0] ^
x.u[0] ^ ((
b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (
d->u[0] << SL1);
715 r->u[1] = a->u[1] ^
x.u[1] ^ ((
b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (
d->u[1] << SL1);
716 r->u[2] = a->u[2] ^
x.u[2] ^ ((
b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (
d->u[2] << SL1);
717 r->u[3] = a->u[3] ^
x.u[3] ^ ((
b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (
d->u[3] << SL1);
722#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
723inline static void swap(w128_t *array,
int size)
727 uint32_t
x = array[
i].u[0];
728 uint32_t y = array[
i].u[2];
729 array[
i].u[0] = array[
i].u[1];
730 array[
i].u[2] = array[
i].u[3];
742static uint32_t func1(uint32_t
x)
744 return (
x ^ (
x >> 27)) * (uint32_t)1664525UL;
753static uint32_t func2(uint32_t
x)
755 return (
x ^ (
x >> 27)) * (uint32_t)1566083941UL;
761static void period_certification(sfmt_state_t *s)
765 for(
int i = 0;
i < 4;
i++) inner ^= s->psfmt32[idxof(
i)] & s->parity[
i];
766 for(
int i = 16;
i > 0;
i >>= 1) inner ^= inner >>
i;
774 for(
int i = 0;
i < 4;
i++)
777 for(
int j = 0; j < 32; j++)
779 if((work & s->parity[
i]) != 0)
781 s->psfmt32[idxof(
i)] ^= work;
797const char *get_idstring(
void)
808uint32_t gen_rand32(sfmt_state_t *s)
818 r = s->psfmt32[s->idx++];
831#if defined(BIG_ENDIAN64) && !defined(ONLY64)
845#if defined(BIG_ENDIAN64) && !defined(ONLY64)
846 r1 = s->psfmt32[s->idx];
847 r2 = s->psfmt32[s->idx + 1];
851 r = s->psfmt64[s->idx / 2];
864void init_gen_rand(sfmt_state_t *s, uint32_t seed)
868 s->psfmt32[idxof(0)] = seed;
869 for(
i = 1;
i < N32;
i++)
871 s->psfmt32[idxof(
i)] = 1812433253UL * (s->psfmt32[idxof(
i - 1)] ^ (s->psfmt32[idxof(
i - 1)] >> 30)) +
i;
874 period_certification(s);
884void init_by_array(sfmt_state_t *s, uint32_t *init_key,
int key_length)
908 mid = (
size - lag) / 2;
910 memset(s->sfmt, 0x8b,
sizeof(s->sfmt));
911 if(key_length + 1 > N32)
913 count = key_length + 1;
919 r = func1(s->psfmt32[idxof(0)] ^ s->psfmt32[idxof(mid)] ^ s->psfmt32[idxof(N32 - 1)]);
920 s->psfmt32[idxof(mid)] +=
r;
922 s->psfmt32[idxof(mid + lag)] +=
r;
923 s->psfmt32[idxof(0)] =
r;
926 for(
i = 1, j = 0; (j < count) && (j < key_length); j++)
928 r = func1(s->psfmt32[idxof(
i)] ^ s->psfmt32[idxof((
i + mid) % N32)]
929 ^ s->psfmt32[idxof((
i + N32 - 1) % N32)]);
930 s->psfmt32[idxof((
i + mid) % N32)] +=
r;
931 r += init_key[j] +
i;
932 s->psfmt32[idxof((
i + mid + lag) % N32)] +=
r;
933 s->psfmt32[idxof(
i)] =
r;
936 for(; j < count; j++)
938 r = func1(s->psfmt32[idxof(
i)] ^ s->psfmt32[idxof((
i + mid) % N32)]
939 ^ s->psfmt32[idxof((
i + N32 - 1) % N32)]);
940 s->psfmt32[idxof((
i + mid) % N32)] +=
r;
942 s->psfmt32[idxof((
i + mid + lag) % N32)] +=
r;
943 s->psfmt32[idxof(
i)] =
r;
946 for(j = 0; j < N32; j++)
948 r = func2(s->psfmt32[idxof(
i)] + s->psfmt32[idxof((
i + mid) % N32)]
949 + s->psfmt32[idxof((
i + N32 - 1) % N32)]);
950 s->psfmt32[idxof((
i + mid) % N32)] ^=
r;
952 s->psfmt32[idxof((
i + mid + lag) % N32)] ^=
r;
953 s->psfmt32[idxof(
i)] =
r;
958 period_certification(s);
966 sizeof(sfmt_state_t) * num_threads, 0);
967 p->s = (sfmt_state_t **)calloc(num_threads,
sizeof(sfmt_state_t *));
968 p->num = num_threads;
971 for(
int i = 0;
i < (int)num_threads;
i++)
973 p->s[
i] = states +
i;
974#if !defined(BIG_ENDIAN64) || defined(ONLY64)
975 p->s[
i]->psfmt64 = (
uint64_t *)&(
p->s[
i]->sfmt[0].u[0]);
977 p->s[
i]->psfmt32 = &(
p->s[
i]->sfmt[0].u[0]);
978 p->s[
i]->initialized = 0;
979 p->s[
i]->parity[0] = PARITY1;
980 p->s[
i]->parity[1] = PARITY2;
981 p->s[
i]->parity[2] = PARITY3;
982 p->s[
i]->parity[3] = PARITY4;
983 init_gen_rand(
p->s[
i], seed);
996 return genrand_real2f(
p->s[thread_num]);
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
Definition chromatic_adaptation.h:309
const float i
Definition colorspaces_inline_conversions.h:440
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:102
const float d
Definition colorspaces_inline_conversions.h:680
const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:42
darktable_t darktable
Definition darktable.c:173
#define dt_pixelpipe_cache_alloc_align_cache(size, id)
Definition darktable.h:433
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Enable aggressive floating-point arithmetic optimizations, in denormals handling. Set through user pr...
Definition darktable.h:524
static int dt_get_thread_num()
Definition darktable.h:291
#define dt_free(ptr)
Definition darktable.h:456
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:453
static const float x
Definition iop_profile.h:235
const float v
Definition iop_profile.h:221
static void swap(float *x, float *y)
Definition lightroom.c:1022
float *const restrict const size_t k
Definition luminance_mask.h:78
size_t size
Definition mipmap_cache.c:3
mask
Definition dtstyle_to_xmp.py:79
#define N
Definition noiseprofile.c:158
static float dt_points_get()
Definition points.h:90
static void dt_points_cleanup(dt_points_t *p)
Definition points.h:65
static void dt_points_init(dt_points_t *p, const unsigned int num_threads)
Definition points.h:55
static float dt_points_get_for(dt_points_t *p, const unsigned int thread_num)
Definition points.h:70
const float r
Definition src/develop/noise_generator.h:101
unsigned __int64 uint64_t
Definition strptime.c:75
struct dt_points_t * points
Definition darktable.h:782
uint64_t state1
Definition points.h:47
uint64_t state0
Definition points.h:46
dt_points_state_t * s
Definition points.h:52