36#if !defined _XOPEN_SOURCE && !defined(__DragonFly__) && !defined(__FreeBSD__) && !defined(__NetBSD__) \
37 && !defined(__OpenBSD__) && !defined(_WIN32)
58 for(
int k = 0; k < num_threads; k++)
61 p->s[k].state1 = 2 + k;
74 p->s[thread_num].state0 = s0;
79 p->s[thread_num].state1 = s1;
86 ((
p->s[thread_num].state0 +
p->s[thread_num].state1) >> 41);
109#warning "MEXP is not defined. I assume MEXP is 19937."
121#define N (MEXP / 128 + 1)
172#include "SFMT-params607.h"
174#include "SFMT-params1279.h"
176#include "SFMT-params2281.h"
178#include "SFMT-params4253.h"
180#include "SFMT-params11213.h"
182#include "SFMT-params19937.h"
184#include "SFMT-params44497.h"
186#include "SFMT-params86243.h"
188#include "SFMT-params132049.h"
190#include "SFMT-params216091.h"
193#error "MEXP is not valid."
204#ifndef SFMT_PARAMS19937_H
205#define SFMT_PARAMS19937_H
212#define MSK1 0xdfffffefU
213#define MSK2 0xddfecb7fU
214#define MSK3 0xbffaffffU
215#define MSK4 0xbffffff6U
216#define PARITY1 0x00000001U
217#define PARITY2 0x00000000U
218#define PARITY3 0x00000000U
219#define PARITY4 0x13c9e684U
232 MSK1, MSK2, MSK3, MSK4 \
236 MSK2, MSK1, MSK4, MSK3 \
238#define ALTI_SL2_PERM \
240 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 \
242#define ALTI_SL2_PERM64 \
244 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 \
246#define ALTI_SR2_PERM \
248 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 \
250#define ALTI_SR2_PERM64 \
252 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 \
254#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
265typedef struct sfmt_state_t
271#if !defined(BIG_ENDIAN64) || defined(ONLY64)
319#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
321#elif defined(_MSC_VER) || defined(__BORLANDC__)
322typedef unsigned int uint32_t;
324#define inline __inline
328#define inline __inline__
333#if defined(_MSC_VER) || defined(__BORLANDC__)
343#define ALWAYSINLINE __attribute__((always_inline))
350#define PRE_ALWAYS __forceinline
352#define PRE_ALWAYS inline
355#define PRE_ALWAYS inline
358static inline uint32_t gen_rand32(
struct sfmt_state_t *s);
359static inline uint64_t gen_rand64(
struct sfmt_state_t *s);
360static inline void init_gen_rand(
struct sfmt_state_t *s, uint32_t seed)
__attribute__((unused));
361static inline void init_by_array(
struct sfmt_state_t *s, uint32_t *init_key,
int key_length)
363static inline const char *get_idstring(
void)
__attribute__((unused));
365inline static float to_real2f(uint32_t
v)
371 x.u = 0x3f800000 | (
v >> 9);
375inline static float genrand_real2f(
struct sfmt_state_t *s)
377 return to_real2f(gen_rand32(s));
399PRE_ALWAYS
static __m128i mm_recursion(__m128i *
a, __m128i *
b, __m128i
c, __m128i
d,
400 __m128i mask) ALWAYSINLINE;
411PRE_ALWAYS
static __m128i mm_recursion(__m128i *
a, __m128i *
b, __m128i
c, __m128i
d, __m128i mask)
415 x = _mm_load_si128(
a);
416 y = _mm_srli_epi32(*
b, SR1);
417 z = _mm_srli_si128(
c, SR2);
418 v = _mm_slli_epi32(
d, SL1);
419 z = _mm_xor_si128(z,
x);
420 z = _mm_xor_si128(z,
v);
421 x = _mm_slli_si128(
x, SL2);
422 y = _mm_and_si128(y, mask);
423 z = _mm_xor_si128(z,
x);
424 z = _mm_xor_si128(z, y);
432inline static void gen_rand_all(
struct sfmt_state_t *s)
435 __m128i
r, r1, r2,
mask;
436 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
438 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
439 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
440 for(
i = 0;
i <
N - POS1;
i++)
442 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1].si), r1, r2, mask);
443 _mm_store_si128(&(s->sfmt[
i].si),
r);
449 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1 -
N].si), r1, r2, mask);
450 _mm_store_si128(&(s->sfmt[
i].si),
r);
463inline static void gen_rand_array(
struct sfmt_state_t *s, w128_t *array,
int size)
466 __m128i
r, r1, r2,
mask;
467 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
469 r1 = _mm_load_si128(&(s->sfmt[
N - 2].si));
470 r2 = _mm_load_si128(&(s->sfmt[
N - 1].si));
471 for(
i = 0;
i <
N - POS1;
i++)
473 r = mm_recursion(&(s->sfmt[
i].si), &(s->sfmt[
i + POS1].si), r1, r2, mask);
474 _mm_store_si128(&array[
i].si,
r);
480 r = mm_recursion(&(s->sfmt[
i].si), &array[
i + POS1 -
N].si, r1, r2, mask);
481 _mm_store_si128(&array[
i].si,
r);
488 r = mm_recursion(&array[
i -
N].si, &array[
i + POS1 -
N].si, r1, r2, mask);
489 _mm_store_si128(&array[
i].si,
r);
493 for(j = 0; j < 2 *
N -
size; j++)
495 r = _mm_load_si128(&array[j +
size -
N].si);
496 _mm_store_si128(&(s->sfmt[j].si),
r);
500 r = mm_recursion(&array[
i -
N].si, &array[
i + POS1 -
N].si, r1, r2, mask);
501 _mm_store_si128(&array[
i].si,
r);
502 _mm_store_si128(&(s->sfmt[j++].si),
r);
526#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
527#define BIG_ENDIAN64 1
529#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
530#define BIG_ENDIAN64 1
532#if defined(ONLY64) && !defined(BIG_ENDIAN64)
534#error "-DONLY64 must be specified with -DBIG_ENDIAN64"
552static w128_t sfmt[
N];
554static uint32_t *psfmt32 = &sfmt[0].u[0];
555#if !defined(BIG_ENDIAN64) || defined(ONLY64)
563static int initialized = 0;
565static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
571inline static int idxof(
int i);
572inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift);
573inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift);
574inline static void gen_rand_all(sfmt_state_t *s);
575inline static void gen_rand_array(sfmt_state_t *s, w128_t *array,
int size);
576inline static uint32_t func1(uint32_t
x);
577inline static uint32_t func2(uint32_t
x);
578static void period_certification(sfmt_state_t *s);
579#if defined(BIG_ENDIAN64) && !defined(ONLY64)
580inline static void swap(w128_t *array,
int size);
594inline static int idxof(
int i)
599inline static int idxof(
int i)
613inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift)
620 oh = th >> (shift * 8);
621 ol = tl >> (shift * 8);
622 ol |= th << (64 - shift * 8);
623 out->u[0] = (uint32_t)(ol >> 32);
624 out->u[1] = (uint32_t)ol;
625 out->u[2] = (uint32_t)(oh >> 32);
626 out->u[3] = (uint32_t)oh;
629inline static void rshift128(w128_t *
out, w128_t
const *in,
int shift)
636 oh = th >> (shift * 8);
637 ol = tl >> (shift * 8);
638 ol |= th << (64 - shift * 8);
639 out->u[1] = (uint32_t)(ol >> 32);
640 out->u[0] = (uint32_t)ol;
641 out->u[3] = (uint32_t)(oh >> 32);
642 out->u[2] = (uint32_t)oh;
654inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift)
661 oh = th << (shift * 8);
662 ol = tl << (shift * 8);
663 oh |= tl >> (64 - shift * 8);
664 out->u[0] = (uint32_t)(ol >> 32);
665 out->u[1] = (uint32_t)ol;
666 out->u[2] = (uint32_t)(oh >> 32);
667 out->u[3] = (uint32_t)oh;
670inline static void lshift128(w128_t *
out, w128_t
const *in,
int shift)
677 oh = th << (shift * 8);
678 ol = tl << (shift * 8);
679 oh |= tl >> (64 - shift * 8);
680 out->u[1] = (uint32_t)(ol >> 32);
681 out->u[0] = (uint32_t)ol;
682 out->u[3] = (uint32_t)(oh >> 32);
683 out->u[2] = (uint32_t)oh;
695#if(!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
697inline static void do_recursion(w128_t *
r, w128_t *
a, w128_t *
b, w128_t *
c, w128_t *
d)
702 lshift128(&
x,
a, SL2);
703 rshift128(&y,
c, SR2);
704 r->u[0] =
a->u[0] ^
x.u[0] ^ ((
b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (
d->u[0] << SL1);
705 r->u[1] =
a->u[1] ^
x.u[1] ^ ((
b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (
d->u[1] << SL1);
706 r->u[2] =
a->u[2] ^
x.u[2] ^ ((
b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (
d->u[2] << SL1);
707 r->u[3] =
a->u[3] ^
x.u[3] ^ ((
b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (
d->u[3] << SL1);
710inline static void do_recursion(w128_t *
r, w128_t *
a, w128_t *
b, w128_t *
c, w128_t *
d)
715 lshift128(&
x,
a, SL2);
716 rshift128(&y,
c, SR2);
717 r->u[0] =
a->u[0] ^
x.u[0] ^ ((
b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (
d->u[0] << SL1);
718 r->u[1] =
a->u[1] ^
x.u[1] ^ ((
b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (
d->u[1] << SL1);
719 r->u[2] =
a->u[2] ^
x.u[2] ^ ((
b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (
d->u[2] << SL1);
720 r->u[3] =
a->u[3] ^
x.u[3] ^ ((
b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (
d->u[3] << SL1);
725#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
726inline static void swap(w128_t *array,
int size)
730 uint32_t
x = array[
i].u[0];
731 uint32_t y = array[
i].u[2];
732 array[
i].u[0] = array[
i].u[1];
733 array[
i].u[2] = array[
i].u[3];
745static uint32_t func1(uint32_t
x)
747 return (
x ^ (
x >> 27)) * (uint32_t)1664525UL;
756static uint32_t func2(uint32_t
x)
758 return (
x ^ (
x >> 27)) * (uint32_t)1566083941UL;
764static void period_certification(sfmt_state_t *s)
768 for(
int i = 0;
i < 4;
i++) inner ^= s->psfmt32[idxof(
i)] & s->parity[
i];
769 for(
int i = 16;
i > 0;
i >>= 1) inner ^= inner >>
i;
777 for(
int i = 0;
i < 4;
i++)
780 for(
int j = 0; j < 32; j++)
782 if((work & s->parity[
i]) != 0)
784 s->psfmt32[idxof(
i)] ^= work;
800const char *get_idstring(
void)
811uint32_t gen_rand32(sfmt_state_t *s)
821 r = s->psfmt32[s->idx++];
834#if defined(BIG_ENDIAN64) && !defined(ONLY64)
848#if defined(BIG_ENDIAN64) && !defined(ONLY64)
849 r1 = s->psfmt32[s->idx];
850 r2 = s->psfmt32[s->idx + 1];
854 r = s->psfmt64[s->idx / 2];
867void init_gen_rand(sfmt_state_t *s, uint32_t seed)
871 s->psfmt32[idxof(0)] = seed;
872 for(
i = 1;
i < N32;
i++)
874 s->psfmt32[idxof(
i)] = 1812433253UL * (s->psfmt32[idxof(
i - 1)] ^ (s->psfmt32[idxof(
i - 1)] >> 30)) +
i;
877 period_certification(s);
887void init_by_array(sfmt_state_t *s, uint32_t *init_key,
int key_length)
911 mid = (
size - lag) / 2;
913 memset(s->sfmt, 0x8b,
sizeof(s->sfmt));
914 if(key_length + 1 > N32)
916 count = key_length + 1;
922 r = func1(s->psfmt32[idxof(0)] ^ s->psfmt32[idxof(mid)] ^ s->psfmt32[idxof(N32 - 1)]);
923 s->psfmt32[idxof(mid)] +=
r;
925 s->psfmt32[idxof(mid + lag)] +=
r;
926 s->psfmt32[idxof(0)] =
r;
929 for(
i = 1, j = 0; (j < count) && (j < key_length); j++)
931 r = func1(s->psfmt32[idxof(
i)] ^ s->psfmt32[idxof((
i + mid) % N32)]
932 ^ s->psfmt32[idxof((
i + N32 - 1) % N32)]);
933 s->psfmt32[idxof((
i + mid) % N32)] +=
r;
934 r += init_key[j] +
i;
935 s->psfmt32[idxof((
i + mid + lag) % N32)] +=
r;
936 s->psfmt32[idxof(
i)] =
r;
939 for(; j < count; j++)
941 r = func1(s->psfmt32[idxof(
i)] ^ s->psfmt32[idxof((
i + mid) % N32)]
942 ^ s->psfmt32[idxof((
i + N32 - 1) % N32)]);
943 s->psfmt32[idxof((
i + mid) % N32)] +=
r;
945 s->psfmt32[idxof((
i + mid + lag) % N32)] +=
r;
946 s->psfmt32[idxof(
i)] =
r;
949 for(j = 0; j < N32; j++)
951 r = func2(s->psfmt32[idxof(
i)] + s->psfmt32[idxof((
i + mid) % N32)]
952 + s->psfmt32[idxof((
i + N32 - 1) % N32)]);
953 s->psfmt32[idxof((
i + mid) % N32)] ^=
r;
955 s->psfmt32[idxof((
i + mid + lag) % N32)] ^=
r;
956 s->psfmt32[idxof(
i)] =
r;
961 period_certification(s);
969 sizeof(sfmt_state_t) * num_threads, 0);
970 p->s = (sfmt_state_t **)calloc(num_threads,
sizeof(sfmt_state_t *));
971 p->num = num_threads;
974 for(
int i = 0;
i < (int)num_threads;
i++)
976 p->s[
i] = states +
i;
977#if !defined(BIG_ENDIAN64) || defined(ONLY64)
978 p->s[
i]->psfmt64 = (
uint64_t *)&(
p->s[
i]->sfmt[0].u[0]);
980 p->s[
i]->psfmt32 = &(
p->s[
i]->sfmt[0].u[0]);
981 p->s[
i]->initialized = 0;
982 p->s[
i]->parity[0] = PARITY1;
983 p->s[
i]->parity[1] = PARITY2;
984 p->s[
i]->parity[2] = PARITY3;
985 p->s[
i]->parity[3] = PARITY4;
986 init_gen_rand(
p->s[
i], seed);
999 return genrand_real2f(
p->s[thread_num]);
static const dt_aligned_pixel_simd_t const dt_adaptation_t const float p
Definition chromatic_adaptation.h:315
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
const dt_aligned_pixel_t f
Definition colorspaces_inline_conversions.h:256
const float d
Definition colorspaces_inline_conversions.h:931
const float r
Definition colorspaces_inline_conversions.h:1324
const float b
Definition colorspaces_inline_conversions.h:1326
const float a
Definition colorspaces_inline_conversions.h:1292
static const dt_colormatrix_t dt_aligned_pixel_t out
Definition colorspaces_inline_conversions.h:184
darktable_t darktable
Definition darktable.c:178
#define dt_pixelpipe_cache_alloc_align_cache(size, id)
Definition darktable.h:357
float dt_aligned_pixel_simd_t __attribute__((vector_size(16), aligned(16)))
Multi-tap smudge source sample with directional jitter.
Definition darktable.h:448
static int dt_get_thread_num()
Definition darktable.h:269
#define dt_free(ptr)
Definition darktable.h:380
#define dt_pixelpipe_cache_free_align(mem)
Definition darktable.h:377
static const float x
Definition iop_profile.h:239
static const float v
Definition iop_profile.h:223
static void swap(float *x, float *y)
Definition lightroom.c:1022
size_t size
Definition mipmap_cache.c:3
mask
Definition dtstyle_to_xmp.py:79
#define N
Definition noiseprofile.c:159
static float dt_points_get()
Definition points.h:90
static void dt_points_cleanup(dt_points_t *p)
Definition points.h:65
static void dt_points_init(dt_points_t *p, const unsigned int num_threads)
Definition points.h:55
static float dt_points_get_for(dt_points_t *p, const unsigned int thread_num)
Definition points.h:70
unsigned __int64 uint64_t
Definition strptime.c:74
struct dt_points_t * points
Definition darktable.h:711
uint64_t state1
Definition points.h:47
uint64_t state0
Definition points.h:46
dt_points_state_t * s
Definition points.h:52