Logo ROOT   6.13/01
Reference Guide
mixmax.icc
Go to the documentation of this file.
1 /*
2  * mixmax.c
3  * A Pseudo-Random Number Generator
4  *
5  * Created by Konstantin Savvidy.
6  *
7  * The code is released under GNU Lesser General Public License v3
8  *
9  * G.K.Savvidy and N.G.Ter-Arutyunian,
10  * On the Monte Carlo simulation of physical systems,
11  * J.Comput.Phys. 97, 566 (1991);
12  * Preprint EPI-865-16-86, Yerevan, Jan. 1986
13  *
14  * K.Savvidy
15  * The MIXMAX random number generator
16  * Comp. Phys. Commun. 196 (2015), pp 161–165
17  * http://dx.doi.org/10.1016/j.cpc.2015.06.003
18  *
19  * K.Savvidy and G.Savvidy
20  * Spectrum and Entropy of C-systems. MIXMAX random number generator
21  * Chaos, Solitons & Fractals, Volume 91, (2016) pp. 33–38
22  * http://dx.doi.org/10.1016/j.chaos.2016.05.003
23  *
24  */
25 
26 #ifdef __MIXMAX_C
27  #error "You should not define __MIXMAX_C. Please #include mixmax.h"
28 #endif
29 
30 #define __MIXMAX_C
31 
32 #include "mixmax.h"
33 
35  X->sumtot = iterate_raw_vec(X->V, X->sumtot);
36  return 0;
37 }
38 
39 #if (SPECIALMUL!=0)
40 inline uint64_t MULWU (uint64_t k){ return (( (k)<<(SPECIALMUL) & M61) | ( (k) >> (BITS-SPECIALMUL)) ) ;}
41 #elif (SPECIALMUL==0)
42 inline uint64_t MULWU (uint64_t /* k */ ){ return 0;}
43 #else
44 #error SPECIALMUL not defined
45 #endif
46 
48  // operates with a raw vector, uses known sum of elements of Y
49  int i;
50 #if (SPECIAL!=0)
51  myuint temp2 = Y[1];
52 #endif
53  myuint tempP, tempV;
54  Y[0] = ( tempV = sumtotOld);
55  myuint sumtot = Y[0], ovflow = 0; // will keep a running sum of all new elements
56  tempP = 0; // will keep a partial sum of all old elements
57  for (i=1; i<N; i++){
58 #if (SPECIALMUL!=0)
59  myuint tempPO = MULWU(tempP);
60  tempP = modadd(tempP,Y[i]);
61  tempV = MOD_MERSENNE(tempV + tempP + tempPO); // edge cases ?
62 #else
63  tempP = modadd(tempP , Y[i]);
64  tempV = modadd(tempV , tempP);
65 #endif
66  Y[i] = tempV;
67  sumtot += tempV; if (sumtot < tempV) {ovflow++;}
68  }
69 #if (SPECIAL!=0)
70  temp2 = MOD_MULSPEC(temp2);
71  Y[2] = modadd( Y[2] , temp2 );
72  sumtot += temp2; if (sumtot < temp2) {ovflow++;}
73 #endif
74  return MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
75 }
76 
78  return GET_BY_MACRO(X);
79 }
80 
82  return get_next_float_BY_MACRO(X);
83 }
84 
85 void fill_array(rng_state_t* X, unsigned int n, double *array)
86 {
87  // Return an array of n random numbers uniformly distributed in (0,1]
88  unsigned int i,j;
89  const int M=N-1;
90  for (i=0; i<(n/M); i++){
91  iterate_and_fill_array(X, array+i*M);
92  }
93  unsigned int rem=(n % M);
94  if (rem) {
95  iterate(X);
96  for (j=0; j< (rem); j++){
97  array[M*i+j] = (int64_t)X->V[j] * (double)(INV_MERSBASE);
98  }
99  X->counter = j; // needed to continue with single fetches from the exact spot, but if you only use fill_array to get numbers then it is not necessary
100  }else{
101  X->counter = N;
102  }
103 }
104 
105 void iterate_and_fill_array(rng_state_t* X, double *array){
106  myuint* Y=X->V;
107  int i;
108  myuint tempP, tempV;
109 #if (SPECIAL != 0)
110  myuint temp2 = Y[1];
111 #endif
112  Y[0] = (tempV = X->sumtot);
113  //array[0] = (double)tempV * (double)(INV_MERSBASE);
114  myuint sumtot = Y[0], ovflow = 0; // will keep a running sum of all new elements
115  tempP = 0; // will keep a partial sum of all old elements
116  for (i=1; i<N; i++){
117 #if (SPECIALMUL!=0)
118  myuint tempPO = MULWU(tempP);
119  tempP = modadd(tempP,Y[i]);
120  tempV = MOD_MERSENNE(tempV + tempP + tempPO); // edge cases ?
121 #else
122  tempP = MOD_MERSENNE(tempP + Y[i]);
123  tempV = MOD_MERSENNE(tempV + tempP);
124 #endif
125  Y[i] = tempV;
126  sumtot += tempV; if (sumtot < tempV) {ovflow++;}
127  array[i-1] = (int64_t)tempV * (double)(INV_MERSBASE);
128  }
129 #if (SPECIAL!=0)
130  temp2 = MOD_MULSPEC(temp2);
131  Y[2] = modadd( Y[2] , temp2 );
132  sumtot += temp2; if (sumtot < temp2) {ovflow++;}
133 #endif
134  X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
135 }
136 
138 #if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && defined(USE_INLINE_ASM)
139 //#warning Using assembler routine in modadd
140  myuint out;
141  /* Assembler trick suggested by Andrzej Görlich */
142  __asm__ ("addq %2, %0; "
143  "btrq $61, %0; "
144  "adcq $0, %0; "
145  :"=r"(out)
146  :"0"(foo), "r"(bar)
147  );
148  return out;
149 #else
150  return MOD_MERSENNE(foo+bar);
151 #endif
152 }
153 
155 {
156 /* allocate the state */
157  rng_state_t *p = (rng_state_t*)malloc(sizeof(rng_state_t));
158  p->fh=NULL; // by default, set the output file handle to stdout
159  return p;
160 }
161 
162 int rng_free(rng_state_t* X) /* free the memory occupied by the state */
163 {
164  free(X);
165  return 0;
166 }
167 
169 {
170  /* copy the vector stored at Y, and return pointer to the newly allocated and initialized state.
171  It is the user's responsibility to make sure that Y is properly allocated with rng_alloc,
172  then pass Y->V or it can also be an array -- such as myuint Y[N+1] and Y[1]...Y[N] have been set to legal values [0 .. MERSBASE-1]
173  Partial sums on this new state are recalculated, and counter set to zero, so that when get_next is called,
174  it will output the initial vector before any new numbers are produced, call iterate(X) if you want to advance right away */
175  rng_state_t* X = rng_alloc();
176  myuint sumtot=0,ovflow=0;
177  X->counter = 2;
178  int i;
179  for ( i=0; i < N; i++){
180  X->V[i] = Y[i];
181  sumtot += X->V[(i)]; if (sumtot < X->V[(i)]) {ovflow++;}
182 
183  }
184  X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
185  return X;
186 }
187 
188 void seed_vielbein(rng_state_t* X, unsigned int index)
189 {
190 int i;
191  if (index<N){
192  for (i=0; i < N; i++){
193  X->V[i] = 0;
194  }
195  X->V[index] = 1;
196  }else{
197  fprintf(stderr, "Out of bounds index, is not ( 0 <= index < N )\n"); exit(ARRAY_INDEX_OUT_OF_BOUNDS);
198  }
199  X->counter = N; // set the counter to N if iteration should happen right away
200  //precalc(X);
201  X->sumtot = 1; //(index ? 1:0);
202  if (X->fh==NULL){X->fh=stdout;}
203 }
204 
206 { // a 64-bit LCG from Knuth line 26, in combination with a bit swap is used to seed
207  const myuint MULT64=6364136223846793005ULL;
208  int i;
209  myuint sumtot=0,ovflow=0;
210  if (seed == 0){
211  fprintf(stderr, " try seeding with nonzero seed next time!\n");
212  exit(SEED_WAS_ZERO);
213  }
214 
215  myuint l = seed;
216 
217  //X->V[0] = l & MERSBASE;
218  if (X->fh==NULL){X->fh=stdout;} // if the filehandle is not yet set, make it stdout
219  for (i=0; i < N; i++){
220  l*=MULT64; l = (l << 32) ^ (l>>32);
221  X->V[i] = l & MERSBASE;
222  sumtot += X->V[(i)]; if (sumtot < X->V[(i)]) {ovflow++;}
223  }
224  X->counter = N; // set the counter to N if iteration should happen right away
225  X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
226 }
227 
229  int i;
230  myuint temp;
231  temp = 0;
232  for (i=0; i < N; i++){
233  temp = MOD_MERSENNE(temp + X->V[i]);
234  }
235  X->sumtot = temp;
236  return temp;
237 }
238 
239 
240 int rng_get_N(void){return N;}
241 
242 #if defined(__x86_64__)
243 inline myuint mod128(__uint128_t s){
244  myuint s1;
245  s1 = ( ( ((myuint)s)&MERSBASE ) + ( ((myuint)(s>>64)) * 8 ) + ( ((myuint)s) >>BITS) );
246  return MOD_MERSENNE(s1);
247 }
248 
249 inline myuint fmodmulM61(myuint cum, myuint a, myuint b){
250  __uint128_t temp;
251  temp = (__uint128_t)a*(__uint128_t)b + cum;
252  return mod128(temp);
253 }
254 
255 #else // on all other platforms, including 32-bit linux, PPC and PPC64 and all Windows
256 #define MASK32 0xFFFFFFFFULL
257 
259 {
260  register myuint o,ph,pl,ah,al;
261  o=(s)*a;
262  ph = ((s)>>32);
263  pl = (s) & MASK32;
264  ah = a>>32;
265  al = a & MASK32;
266  o = (o & M61) + ((ph*ah)<<3) + ((ah*pl+al*ph + ((al*pl)>>32))>>29) ;
267  o += cum;
268  o = (o & M61) + ((o>>61));
269  return o;
270 }
271 #endif
272 
274  int j;
275  fprintf(X->fh, "mixmax state, file version 1.0\n" );
276  fprintf(X->fh, "N=%u; V[N]={", rng_get_N() );
277  for (j=0; (j< (rng_get_N()-1) ); j++) {
278  fprintf(X->fh, "%llu, ", X->V[j] );
279  }
280  fprintf(X->fh, "%llu", X->V[rng_get_N()-1] );
281  fprintf(X->fh, "}; " );
282  fprintf(X->fh, "counter=%u; ", X->counter );
283  fprintf(X->fh, "sumtot=%llu;\n", X->sumtot );
284 }
285 
286 void read_state(rng_state_t* X, const char filename[] ){
287  // a function for reading the state from a file, after J. Apostolakis
288  FILE* fin;
289  if( ( fin = fopen(filename, "r") ) ){
290  int l=0;
291  while ( l != '{' ) { // 0x7B = "{"
292  l=fgetc(fin); // proceed until hitting opening bracket
293  }
294  ungetc(' ', fin);
295  }else{
296  fprintf(stderr, "mixmax -> read_state: error reading file %s\n", filename);
298  }
299 
300  myuint vecVal;
301  //printf("mixmax -> read_state: starting to read state from file\n");
302  if (!fscanf(fin, "%llu", &X->V[0]) ) {fprintf(stderr, "mixmax -> read_state: error reading file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
303  //printf("V[%d] = %llu\n",0, X->V[0]);
304  int i;
305  for( i = 1; i < rng_get_N(); i++){
306  if (!fscanf(fin, ", %llu", &vecVal) ) {fprintf(stderr, "mixmax -> read_state: error reading vector component i=%d from file %s\n", i, filename); exit(ERROR_READING_STATE_FILE);}
307  //printf("V[%d] = %llu\n",i, vecVal);
308  if( vecVal <= MERSBASE ){
309  X->V[i] = vecVal;
310  }else{
311  fprintf(stderr, "mixmax -> read_state: Invalid state vector value= %llu"
312  " ( must be less than %llu ) "
313  " obtained from reading file %s\n"
314  , vecVal, MERSBASE, filename);
315 
316  }
317  }
318 
319  unsigned int counter;
320  if (!fscanf( fin, "}; counter=%u; ", &counter)){fprintf(stderr, "mixmax -> read_state: error reading counter from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
321  if( counter <= N ) {
322  X->counter= counter;
323  }else{
324  fprintf(stderr, "mixmax -> read_state: Invalid counter = %d"
325  " Must be 0 <= counter < %u\n" , counter, N);
326  print_state(X);
328  }
329  precalc(X);
330  myuint sumtot;
331  if (!fscanf( fin, "sumtot=%llu\n", &sumtot)){fprintf(stderr, "mixmax -> read_state: error reading checksum from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
332 
333  if (X->sumtot != sumtot) {
334  fprintf(stderr, "mixmax -> checksum error while reading state from file %s - corrupted?\n", filename);
336  }
337  else{fprintf(stderr, "mixmax -> read_state: checksum ok: %llu == %llu\n",X->sumtot, sumtot);}
338  fclose(fin);
339 }
340 
341 
342 #define FUSEDMODMULVEC \
343 { for (i =0; i<N; i++){ \
344 cum[i] = fmodmulM61( cum[i], coeff , Y[i] ) ; \
345 } }
346 
347 
348 #define SKIPISON 1
349 #define OLDSKIP 0
350 
351 #if ( ( (N==8) || (N==17) || (N==240) ||(N==120) || (N==256) ) && BITS==61 && SKIPISON!=0)
352 #if (OLDSKIP==1)
353 #warning Compiling with normal skip
354 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
355  seed_vielbein(Xin,0);
356  Xin->sumtot = apply_bigskip(Xin->V, Xin->V, clusterID, machineID, runID, streamID );
357  if (Xin->fh==NULL){Xin->fh=stdout;} // if the filehandle is not yet set, make it stdout
358  Xin->counter = 1;
359 }
360 #else
361 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
362 //#warning Compiling with cached skip
363 
364  /* Kostas Savvidis - 2016-04-27
365  A caching implementation of seed_uniquestream which
366  will keep the previous vector and update it in accordance with increment of the seed,
367  so that if the user increments the seed vector by 1, the whole
368  vector can be reused next time and skip just by 1 unit, see below.
369  This is believed to be thread safe due to the __thread declaration of the cached variables.
370  */
371  static __thread myuint Vcache[N]={1};
372  static __thread myuint sumtmp=0;
373  static __thread myID_t ID1cached=0, ID2cached=0, ID3cached=0, ID4cached=0;
374 
375  if ( ID1cached <= clusterID && ID2cached <= machineID && ID3cached <= runID && ID4cached <= streamID){ // proceed only if the the cached seed value is less than the new seed value
376 
377  sumtmp = apply_bigskip(Vcache, Vcache, clusterID - ID1cached, machineID - ID2cached, runID - ID3cached, streamID - ID4cached );
378 
379  ID1cached = clusterID; ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID; // increment the cached seed value
380  for (int i=0; i<N; i++) { Xin->V[i] = Vcache[i] ; } // copy to destination
381  Xin->sumtot = sumtmp;
382  }else{
383  seed_vielbein(Xin,0);
384  Xin->sumtot = apply_bigskip(Xin->V, Xin->V, clusterID, machineID, runID, streamID );
385  for (int i=0; i<N; i++) { Vcache[i]=Xin->V[i]; }
386  ID1cached = clusterID; ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID;
387  }
388  Xin->counter = 1;
389 }
390 
391 
392 #endif
393 
394 void branch_inplace( rng_state_t* Xin, myID_t* IDvec ){
395  Xin->sumtot = apply_bigskip(Xin->V, Xin->V, IDvec[3], IDvec[2], IDvec[1], IDvec[0] );
396 }
397 
398 myuint apply_bigskip(myuint* Vout, myuint* Vin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
399  /*
400  makes a derived state vector, Vout, from the mother state vector Vin
401  by skipping a large number of steps, determined by the given seeding ID's
402 
403  it is mathematically guaranteed that the substreams derived in this way from the SAME (!!!) Vin will not collide provided
404  1) at least one bit of ID is different
405  2) less than 10^100 numbers are drawn from the stream
406  (this is good enough : a single CPU will not exceed this in the lifetime of the universe, 10^19 sec,
407  even if it had a clock cycle of Planch time, 10^44 Hz )
408 
409  Caution: never apply this to a derived vector, just choose some mother vector Vin, for example the unit vector by seed_vielbein(X,0),
410  and use it in all your runs, just change runID to get completely nonoverlapping streams of random numbers on a different day.
411 
412  clusterID and machineID are provided for the benefit of large organizations who wish to ensure that a simulation
413  which is running in parallel on a large number of clusters and machines will have non-colliding source of random numbers.
414 
415  did i repeat it enough times? the non-collision guarantee is absolute, not probabilistic
416 
417  */
418 
419 
420  const myuint skipMat[128][N] = // to make this file, delete all except some chosen 128 rows of the coefficients table
421 #if (N==240)
422 #include "mixmax_skip_N240.icc"
423 #elif (N==120)
424 #include "mixmax_skip_N120.icc"
425 
426 #elif (N==256)
427 #if (SPECIAL==-1)
428 #include "mixmax_skip_N256.oldS.icc"
429 #else
430 #include "mixmax_skip_N256.icc"
431 #endif
432 
433 #elif (N==8)
434 #include "mixmax_skip_N8.icc"
435 #elif (N==17)
436 #include "mixmax_skip_N17.icc"
437 #endif
438  ;
439 
440  myID_t IDvec[4] = {streamID, runID, machineID, clusterID};
441  int r,i,j, IDindex;
442  myID_t id;
443  myuint Y[N], cum[N];
444  myuint coeff;
445  myuint* rowPtr;
446  myuint sumtot=0;
447 
448 
449  for (i=0; i<N; i++) { Y[i] = Vin[i]; sumtot = modadd( sumtot, Vin[i]); } ;
450  for (IDindex=0; IDindex<4; IDindex++) { // go from lower order to higher order ID
451  id=IDvec[IDindex];
452  //printf("now doing ID at level %d, with ID = %d\n", IDindex, id);
453  r = 0;
454  while (id){
455  if (id & 1) {
456  rowPtr = (myuint*)skipMat[r + IDindex*8*sizeof(myID_t)];
457  //printf("free coeff for row %d is %llu\n", r, rowPtr[0]);
458  for (i=0; i<N; i++){ cum[i] = 0; }
459  for (j=0; j<N; j++){ // j is lag, enumerates terms of the poly
460  // for zero lag Y is already given
461  coeff = rowPtr[j]; // same coeff for all i
462  for (i =0; i<N; i++){
463  cum[i] = fmodmulM61( cum[i], coeff , Y[i] ) ;
464  }
465  sumtot = iterate_raw_vec(Y, sumtot);
466  }
467  sumtot=0;
468  for (i=0; i<N; i++){ Y[i] = cum[i]; sumtot = modadd( sumtot, cum[i]); } ;
469  }
470  id = (id >> 1); r++; // bring up the r-th bit in the ID
471  }
472  }
473  sumtot=0;
474  for (i=0; i<N; i++){ Vout[i] = Y[i]; sumtot = modadd( sumtot, Y[i]); } ; // returns sumtot, and copy the vector over to Vout
475  return (sumtot) ;
476 }
477 #else
478 #warning For this N, we dont have the skipping coefficients yet, using alternative method to seed
479 
480 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
481  Xin->V[0] = (myuint)clusterID;
482  Xin->V[1] = (myuint)machineID;
483  Xin->V[2] = (myuint)runID;
484  Xin->V[3] = (myuint)streamID;
485  Xin->V[4] = (myuint)clusterID << 5;
486  Xin->V[5] = (myuint)machineID << 7;
487  Xin->V[6] = (myuint)runID << 11;
488  Xin->V[7] = (myuint)streamID << 13;
489  precalc(Xin);
490  Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);
491  Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);
492 }
493 #endif // SKIPISON
494 
495 
void seed_spbox(rng_state_t *X, myuint seed)
Definition: mixmax.icc:205
int iterate(rng_state_t *X)
Definition: mixmax.icc:34
myuint fmodmulM61(myuint cum, myuint s, myuint a)
Definition: mixmax.icc:258
myuint V[N]
Definition: mixmax.h:54
#define SPECIALMUL
#define ERROR_READING_STATE_FILE
#define MOD_MULSPEC(k)
#define SEED_WAS_ZERO
rng_state_t * rng_copy(myuint *Y)
Definition: mixmax.icc:168
int rng_free(rng_state_t *X)
Definition: mixmax.icc:162
int rng_get_N(void)
Definition: mixmax.icc:240
myuint get_next(rng_state_t *X)
Definition: mixmax.icc:77
#define MOD_MERSENNE(k)
double get_next_float(rng_state_t *X)
Definition: mixmax.icc:81
#define BITS
#define ARRAY_INDEX_OUT_OF_BOUNDS
uint64_t myuint
Definition: mixmax.h:45
#define N
void branch_inplace(rng_state_t *Xin, myID_t *IDvec)
Definition: mixmax.icc:394
#define MASK32
myuint iterate_raw_vec(myuint *Y, myuint sumtotOld)
Definition: mixmax.icc:47
FILE * fh
Definition: mixmax.h:57
uint32_t myID_t
Definition: mixmax.h:76
int counter
Definition: mixmax.h:56
uint64_t MULWU(uint64_t k)
Definition: mixmax.icc:40
void iterate_and_fill_array(rng_state_t *X, double *array)
Definition: mixmax.icc:105
rng_state_t * rng_alloc()
Definition: mixmax.icc:154
myuint sumtot
Definition: mixmax.h:55
#define M61
#define MERSBASE
myuint GET_BY_MACRO(rng_state_t *X)
Definition: mixmax.h:195
void read_state(rng_state_t *X, const char filename[])
Definition: mixmax.icc:286
#define ERROR_READING_STATE_CHECKSUM
void seed_vielbein(rng_state_t *X, unsigned int index)
Definition: mixmax.icc:188
myuint modadd(myuint foo, myuint bar)
Definition: mixmax.icc:137
#define INV_MERSBASE
void seed_uniquestream(rng_state_t *Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID)
Definition: mixmax.icc:361
void print_state(rng_state_t *X)
Definition: mixmax.icc:273
myuint apply_bigskip(myuint *Vout, myuint *Vin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID)
Definition: mixmax.icc:398
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
myuint precalc(rng_state_t *X)
Definition: mixmax.icc:228
#define ERROR_READING_STATE_COUNTER
double get_next_float_BY_MACRO(rng_state_t *X)
Definition: mixmax.h:209
void fill_array(rng_state_t *X, unsigned int n, double *array)
Definition: mixmax.icc:85