X-Git-Url: https://git.draconx.ca/gitweb/rrace.git/blobdiff_plain/e9ab90141032c23a97fa82a723a7a751658bce66..30f49b9152dcf5ce7baefa9287b0ec627155fba9:/t/xos256ss.c diff --git a/t/xos256ss.c b/t/xos256ss.c new file mode 100644 index 0000000..d0b83a2 --- /dev/null +++ b/t/xos256ss.c @@ -0,0 +1,104 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See . */ + +#include + +/* This is xoshiro256** 1.0, one of our all-purpose, rock-solid + generators. It has excellent (sub-ns) speed, a state (256 bits) that is + large enough for any parallel application, and it passes all tests we + are aware of. + + For generating just floating-point numbers, xoshiro256+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +static uint64_t s[4]; + +uint64_t next(void) { + const uint64_t result = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^128 calls to next(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +} + + + +/* This is the long-jump function for the generator. It is equivalent to + 2^192 calls to next(); it can be used to generate 2^64 starting points, + from each of which jump() will generate 2^64 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635 }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +}