23#if !defined _XOPEN_SOURCE && !defined(__DragonFly__) && !defined(__FreeBSD__) && !defined(__NetBSD__) \
24 && !defined(__OpenBSD__) && !defined(_WIN32)
45 for(
int k = 0; k < num_threads; k++)
47 p->s[k].state0 = 1 + k;
48 p->s[k].state1 = 2 + k;
59 uint64_t s1 = p->s[thread_num].state0;
60 uint64_t s0 = p->s[thread_num].state1;
61 p->s[thread_num].state0 = s0;
66 p->s[thread_num].state1 = s1;
73 ((p->s[thread_num].state0 + p->s[thread_num].state1) >> 41);
96#warning "MEXP is not defined. I assume MEXP is 19937."
108#define N (MEXP / 128 + 1)
159#include "SFMT-params607.h"
161#include "SFMT-params1279.h"
163#include "SFMT-params2281.h"
165#include "SFMT-params4253.h"
167#include "SFMT-params11213.h"
169#include "SFMT-params19937.h"
171#include "SFMT-params44497.h"
173#include "SFMT-params86243.h"
175#include "SFMT-params132049.h"
177#include "SFMT-params216091.h"
180#error "MEXP is not valid."
191#ifndef SFMT_PARAMS19937_H
192#define SFMT_PARAMS19937_H
199#define MSK1 0xdfffffefU
200#define MSK2 0xddfecb7fU
201#define MSK3 0xbffaffffU
202#define MSK4 0xbffffff6U
203#define PARITY1 0x00000001U
204#define PARITY2 0x00000000U
205#define PARITY3 0x00000000U
206#define PARITY4 0x13c9e684U
219 MSK1, MSK2, MSK3, MSK4 \
223 MSK2, MSK1, MSK4, MSK3 \
225#define ALTI_SL2_PERM \
227 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 \
229#define ALTI_SL2_PERM64 \
231 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 \
233#define ALTI_SR2_PERM \
235 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 \
237#define ALTI_SR2_PERM64 \
239 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 \
241#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
252typedef struct sfmt_state_t
258#if !defined(BIG_ENDIAN64) || defined(ONLY64)
306#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
308#elif defined(_MSC_VER) || defined(__BORLANDC__)
309typedef unsigned int uint32_t;
311#define inline __inline
315#define inline __inline__
320#if defined(_MSC_VER) || defined(__BORLANDC__)
330#define ALWAYSINLINE __attribute__((always_inline))
337#define PRE_ALWAYS __forceinline
339#define PRE_ALWAYS inline
342#define PRE_ALWAYS inline
345static inline uint32_t gen_rand32(
struct sfmt_state_t *s);
346static inline uint64_t gen_rand64(
struct sfmt_state_t *s);
347static inline void init_gen_rand(
struct sfmt_state_t *s, uint32_t seed)
__attribute__((unused));
348static inline void init_by_array(
struct sfmt_state_t *s, uint32_t *init_key,
int key_length)
350static inline const char *get_idstring(
void)
__attribute__((unused));
352inline static float to_real2f(uint32_t v)
358 x.u = 0x3f800000 | (v >> 9);
362inline static float genrand_real2f(
struct sfmt_state_t *s)
364 return to_real2f(gen_rand32(s));
386PRE_ALWAYS
static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d,
387 __m128i mask) ALWAYSINLINE;
398PRE_ALWAYS
static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask)
402 x = _mm_load_si128(a);
403 y = _mm_srli_epi32(*b, SR1);
404 z = _mm_srli_si128(c, SR2);
405 v = _mm_slli_epi32(d, SL1);
406 z = _mm_xor_si128(z, x);
407 z = _mm_xor_si128(z, v);
408 x = _mm_slli_si128(x, SL2);
409 y = _mm_and_si128(y, mask);
410 z = _mm_xor_si128(z, x);
411 z = _mm_xor_si128(z, y);
419inline static void gen_rand_all(
struct sfmt_state_t *s)
422 __m128i
r, r1, r2,
mask;
423 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
425 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
426 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
427 for(i = 0; i <
N - POS1; i++)
429 r = mm_recursion(&(s->sfmt[i].si), &(s->sfmt[i + POS1].si), r1, r2, mask);
430 _mm_store_si128(&(s->sfmt[i].si), r);
436 r = mm_recursion(&(s->sfmt[i].si), &(s->sfmt[i + POS1 -
N].si), r1, r2, mask);
437 _mm_store_si128(&(s->sfmt[i].si), r);
450inline static void gen_rand_array(
struct sfmt_state_t *s, w128_t *array,
int size)
453 __m128i
r, r1, r2,
mask;
454 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
456 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
457 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
458 for(i = 0; i <
N - POS1; i++)
460 r = mm_recursion(&(s->sfmt[i].si), &(s->sfmt[i + POS1].si), r1, r2, mask);
461 _mm_store_si128(&array[i].si, r);
467 r = mm_recursion(&(s->sfmt[i].si), &array[i + POS1 -
N].si, r1, r2, mask);
468 _mm_store_si128(&array[i].si, r);
473 for(; i <
size -
N; i++)
475 r = mm_recursion(&array[i -
N].si, &array[i + POS1 -
N].si, r1, r2, mask);
476 _mm_store_si128(&array[i].si, r);
480 for(j = 0; j < 2 *
N -
size; j++)
482 r = _mm_load_si128(&array[j +
size -
N].si);
483 _mm_store_si128(&(s->sfmt[j].si), r);
487 r = mm_recursion(&array[i -
N].si, &array[i + POS1 -
N].si, r1, r2, mask);
488 _mm_store_si128(&array[i].si, r);
489 _mm_store_si128(&(s->sfmt[j++].si), r);
513#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
514#define BIG_ENDIAN64 1
516#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
517#define BIG_ENDIAN64 1
519#if defined(ONLY64) && !defined(BIG_ENDIAN64)
521#error "-DONLY64 must be specified with -DBIG_ENDIAN64"
539static w128_t sfmt[
N];
541static uint32_t *psfmt32 = &sfmt[0].u[0];
542#if !defined(BIG_ENDIAN64) || defined(ONLY64)
550static int initialized = 0;
552static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
558inline static int idxof(
int i);
559inline static void rshift128(w128_t *out, w128_t
const *in,
int shift);
560inline static void lshift128(w128_t *out, w128_t
const *in,
int shift);
561inline static void gen_rand_all(sfmt_state_t *s);
562inline static void gen_rand_array(sfmt_state_t *s, w128_t *array,
int size);
563inline static uint32_t func1(uint32_t x);
564inline static uint32_t func2(uint32_t x);
565static void period_certification(sfmt_state_t *s);
566#if defined(BIG_ENDIAN64) && !defined(ONLY64)
567inline static void swap(w128_t *array,
int size);
581inline static int idxof(
int i)
586inline static int idxof(
int i)
600inline static void rshift128(w128_t *out, w128_t
const *in,
int shift)
607 oh = th >> (shift * 8);
608 ol = tl >> (shift * 8);
609 ol |= th << (64 - shift * 8);
610 out->u[0] = (uint32_t)(ol >> 32);
611 out->u[1] = (uint32_t)ol;
612 out->u[2] = (uint32_t)(oh >> 32);
613 out->u[3] = (uint32_t)oh;
616inline static void rshift128(w128_t *out, w128_t
const *in,
int shift)
623 oh = th >> (shift * 8);
624 ol = tl >> (shift * 8);
625 ol |= th << (64 - shift * 8);
626 out->u[1] = (uint32_t)(ol >> 32);
627 out->u[0] = (uint32_t)ol;
628 out->u[3] = (uint32_t)(oh >> 32);
629 out->u[2] = (uint32_t)oh;
641inline static void lshift128(w128_t *out, w128_t
const *in,
int shift)
648 oh = th << (shift * 8);
649 ol = tl << (shift * 8);
650 oh |= tl >> (64 - shift * 8);
651 out->u[0] = (uint32_t)(ol >> 32);
652 out->u[1] = (uint32_t)ol;
653 out->u[2] = (uint32_t)(oh >> 32);
654 out->u[3] = (uint32_t)oh;
657inline static void lshift128(w128_t *out, w128_t
const *in,
int shift)
664 oh = th << (shift * 8);
665 ol = tl << (shift * 8);
666 oh |= tl >> (64 - shift * 8);
667 out->u[1] = (uint32_t)(ol >> 32);
668 out->u[0] = (uint32_t)ol;
669 out->u[3] = (uint32_t)(oh >> 32);
670 out->u[2] = (uint32_t)oh;
682#if(!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
684inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
689 lshift128(&x, a, SL2);
690 rshift128(&y, c, SR2);
691 r->u[0] = a->u[0] ^ x.u[0] ^ ((
b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1);
692 r->u[1] = a->u[1] ^ x.u[1] ^ ((
b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1);
693 r->u[2] = a->u[2] ^ x.u[2] ^ ((
b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1);
694 r->u[3] = a->u[3] ^ x.u[3] ^ ((
b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1);
697inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
702 lshift128(&x, a, SL2);
703 rshift128(&y, c, SR2);
704 r->u[0] = a->u[0] ^ x.u[0] ^ ((
b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1);
705 r->u[1] = a->u[1] ^ x.u[1] ^ ((
b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1);
706 r->u[2] = a->u[2] ^ x.u[2] ^ ((
b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1);
707 r->u[3] = a->u[3] ^ x.u[3] ^ ((
b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1);
712#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
713inline static void swap(w128_t *array,
int size)
715 for(
int i = 0; i <
size; i++)
717 uint32_t x = array[i].u[0];
718 uint32_t y = array[i].u[2];
719 array[i].u[0] = array[i].u[1];
720 array[i].u[2] = array[i].u[3];
732static uint32_t func1(uint32_t x)
734 return (x ^ (x >> 27)) * (uint32_t)1664525UL;
743static uint32_t func2(uint32_t x)
745 return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
751static void period_certification(sfmt_state_t *s)
755 for(
int i = 0; i < 4; i++) inner ^= s->psfmt32[idxof(i)] & s->parity[i];
756 for(
int i = 16; i > 0; i >>= 1) inner ^= inner >> i;
764 for(
int i = 0; i < 4; i++)
767 for(
int j = 0; j < 32; j++)
769 if((work & s->parity[i]) != 0)
771 s->psfmt32[idxof(i)] ^= work;
787const char *get_idstring(
void)
798uint32_t gen_rand32(sfmt_state_t *s)
808 r = s->psfmt32[s->idx++];
821#if defined(BIG_ENDIAN64) && !defined(ONLY64)
835#if defined(BIG_ENDIAN64) && !defined(ONLY64)
836 r1 = s->psfmt32[s->idx];
837 r2 = s->psfmt32[s->idx + 1];
841 r = s->psfmt64[s->idx / 2];
854void init_gen_rand(sfmt_state_t *s, uint32_t seed)
858 s->psfmt32[idxof(0)] = seed;
859 for(i = 1; i < N32; i++)
861 s->psfmt32[idxof(i)] = 1812433253UL * (s->psfmt32[idxof(i - 1)] ^ (s->psfmt32[idxof(i - 1)] >> 30)) + i;
864 period_certification(s);
874void init_by_array(sfmt_state_t *s, uint32_t *init_key,
int key_length)
898 mid = (
size - lag) / 2;
900 memset(s->sfmt, 0x8b,
sizeof(s->sfmt));
901 if(key_length + 1 > N32)
903 count = key_length + 1;
909 r = func1(s->psfmt32[idxof(0)] ^ s->psfmt32[idxof(mid)] ^ s->psfmt32[idxof(N32 - 1)]);
910 s->psfmt32[idxof(mid)] +=
r;
912 s->psfmt32[idxof(mid + lag)] +=
r;
913 s->psfmt32[idxof(0)] =
r;
916 for(i = 1, j = 0; (j < count) && (j < key_length); j++)
918 r = func1(s->psfmt32[idxof(i)] ^ s->psfmt32[idxof((i + mid) % N32)]
919 ^ s->psfmt32[idxof((i + N32 - 1) % N32)]);
920 s->psfmt32[idxof((i + mid) % N32)] +=
r;
921 r += init_key[j] + i;
922 s->psfmt32[idxof((i + mid + lag) % N32)] +=
r;
923 s->psfmt32[idxof(i)] =
r;
926 for(; j < count; j++)
928 r = func1(s->psfmt32[idxof(i)] ^ s->psfmt32[idxof((i + mid) % N32)]
929 ^ s->psfmt32[idxof((i + N32 - 1) % N32)]);
930 s->psfmt32[idxof((i + mid) % N32)] +=
r;
932 s->psfmt32[idxof((i + mid + lag) % N32)] +=
r;
933 s->psfmt32[idxof(i)] =
r;
936 for(j = 0; j < N32; j++)
938 r = func2(s->psfmt32[idxof(i)] + s->psfmt32[idxof((i + mid) % N32)]
939 + s->psfmt32[idxof((i + N32 - 1) % N32)]);
940 s->psfmt32[idxof((i + mid) % N32)] ^=
r;
942 s->psfmt32[idxof((i + mid + lag) % N32)] ^=
r;
943 s->psfmt32[idxof(i)] =
r;
948 period_certification(s);
955 sfmt_state_t *states = (sfmt_state_t *)
dt_alloc_align(
sizeof(sfmt_state_t) * num_threads);
956 p->s = (sfmt_state_t **)calloc(num_threads,
sizeof(sfmt_state_t *));
957 p->num = num_threads;
960 for(
int i = 0; i < (int)num_threads; i++)
962 p->s[i] = states + i;
963#if !defined(BIG_ENDIAN64) || defined(ONLY64)
964 p->s[i]->psfmt64 = (
uint64_t *)&(
p->s[i]->sfmt[0].u[0]);
966 p->s[i]->psfmt32 = &(
p->s[i]->sfmt[0].u[0]);
967 p->s[i]->initialized = 0;
968 p->s[i]->parity[0] = PARITY1;
969 p->s[i]->parity[1] = PARITY2;
970 p->s[i]->parity[2] = PARITY3;
971 p->s[i]->parity[3] = PARITY4;
972 init_gen_rand(
p->s[i], seed);
985 return genrand_real2f(
p->s[thread_num]);
darktable_t darktable
Definition darktable.c:111
static int dt_get_thread_num()
Definition darktable.h:227
#define dt_free_align(A)
Definition darktable.h:334
static float f(const float t, const float c, const float x)
Definition graduatednd.c:173
static void swap(float *x, float *y)
Definition lightroom.c:1042
size_t size
Definition mipmap_cache.c:3
r
Definition derive_filmic_v6_gamut_mapping.py:17
mask
Definition dtstyle_to_xmp.py:54
#define N
Definition noiseprofile.c:139
static float dt_points_get()
Definition points.h:77
static void dt_points_cleanup(dt_points_t *p)
Definition points.h:52
static void dt_points_init(dt_points_t *p, const unsigned int num_threads)
Definition points.h:42
static float dt_points_get_for(dt_points_t *p, const unsigned int thread_num)
Definition points.h:57
unsigned __int64 uint64_t
Definition strptime.c:71
struct dt_points_t * points
Definition darktable.h:549
uint64_t state1
Definition points.h:34
uint64_t state0
Definition points.h:33
dt_points_state_t * s
Definition points.h:39
#define dt_alloc_align(B)
Definition tests/cache.c:22
static float __attribute__((__unused__))
Definition thinplate.c:39