From 9249e2d7e45846c34ce6fc70552f800539a1a94d Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Mon, 17 Nov 2014 19:05:15 +0100 Subject: [PATCH] hmc_tm: add real reproducible random numbers mode which will keep the RNG state consistent across executions of different numbers of trajectories (say you have a serial run doing 1000 trajectories at a time in 10 goes and a parallel run doing 10000 trajectories in one run, with this mode, the two runs will still have exactly the same random numbers because the RNG state is saved to file between executions of hmc_tm) --- default_input_values.h | 6 ++-- doc/input.tex | 36 +++++++++++++++++++++++- hmc_tm.c | 10 ++++++- read_input.h | 2 ++ read_input.l | 19 +++++++++++++ start.c | 64 ++++++++++++++++++++++++++++++++++++++++++ start.h | 12 ++++++++ update_tm.c | 6 +++- 8 files changed, 150 insertions(+), 5 deletions(-) diff --git a/default_input_values.h b/default_input_values.h index c57d4fa38..324f7fabc 100644 --- a/default_input_values.h +++ b/default_input_values.h @@ -54,8 +54,9 @@ #define _default_g_beta 6.0 #define _default_g_N_s 20 #define _default_g_dflgcr_flag 0 +#define _default_save_ranlux_state 0 #define _default_random_seed 123456 -#define _default_rlxd_level 1 +#define _default_rlxd_level 2 #define _default_solver_flag 1 #define _default_startoption 0 #define _default_Ntherm 0 @@ -64,7 +65,8 @@ #define _default_write_cp_flag 1 #define _default_cp_interval 5 #define _default_nstore 0 -#define _default_rlxd_input_filename "last_state" +#define _default_rlxd_input_filename "rlxd_state" +#define _default_rlxs_input_filename "rlxs_state" #define _default_gauge_input_filename "conf" #define _default_read_source_flag 0 #define _default_source_filename "source" diff --git a/doc/input.tex b/doc/input.tex index e46ac605f..78cdbab15 100644 --- a/doc/input.tex +++ b/doc/input.tex @@ -79,7 +79,41 @@ \subsection{Input parameter for main program} See {\ttfamily NrXProcs}. \item {\ttfamily seed}:\\ - The seed for the random number generator. Default value is $123456$. + The seed for the random number generator. Possible values: Integer + $i \in \left[ 1, 2^{31}-1 \right] $ or {\ttfamily statefile}. Default value is $123456$. + The seeds for different MPI processes are computed from this number XOR'ed + with the store counter and a regular pattern according to the process number. + When {\ttfamily statefile} is specified here, ranlux will be initialised + from the state stored in the files {\ttfamily rlxd\_state} and {\ttfamily rlxs\_state}. + Also, {\ttfamily hmc\_tm} will save the ranlux state at the end of each + trajectory. Note that when using MPI, {\ttfamily seed = statefile} should only ever be used + when in {\ttfamily ReproduceRandomNumbers} mode because all processes + will be initialised with the same random number generator. Serial or OpenMP + executions of {\ttfamily hmc\_tm} have only one random number generator and + so this option allows running a simulation with a single chain of + random numbers fully specified by the initial seed set for the first + execution. + +\item {\ttfamily SaveRanluxState}:\\ + Store the state of the random number generator of MPI process 0 + to the files {\ttfamily rlxd\_state} and {\ttfamily rlxs\_state} + at the end of each trajectory. This is useful to make a fully reproducible + chain of random numbers even across multiple executions of {\ttfamily hmc\_tm}. + When using MPI, this only makes sense if {\ttfamily ReproduceRandomNumbers} is + used. (see {\ttfamily seed}) + +\item {\ttfamily ReproduceRandomNumbers}:\\ + Possible values are {\ttfamily yes} or {\ttfamily no}, default is {\ttfamily yes}. + When set to {\ttfamily yes}, when random numbers are requested the random + number generators of all processes are set to the same state. Then each + process generates random numbers for the whole volume but uses only those + which belong to its local volume. This ensures that a parallelised run + is equivalent to a serial one as far as the random numbers are concerned and + is useful for testing purposes. + This can also be extended by setting {\ttfamily seed = statefile } + to produce a single chain of random numbers across multiple executions + of the program (such as using {\ttfamily StartCondition = continue} and + {\ttfamily InitialStoreCounter = readin}, even when using MPI. \item {\ttfamily kappa}:\\ The $\kappa$ value. Default is $0.12$. For the {\ttfamily hmc\_tm} diff --git a/hmc_tm.c b/hmc_tm.c index a15b7eaf1..bef61891f 100644 --- a/hmc_tm.c +++ b/hmc_tm.c @@ -290,7 +290,15 @@ int main(int argc,char *argv[]) { #endif /* Initialise random number generator */ - start_ranlux(rlxd_level, random_seed^trajectory_counter); + /* if running in reproducible mode we can initialize ranlux using + * a saved state */ + if(reproduce_randomnumber_flag && random_seed == -1) { + start_ranlux_from_file(rlxd_input_filename,rlxs_input_filename); + } else if (random_seed == -1) { + fatal_error("Initializing RANLUX from file only works in reproducible random numbers mode. Aborting!","hmc_tm"); + } else { + start_ranlux(rlxd_level, random_seed^trajectory_counter); + } /* Set up the gauge field */ /* continue and restart */ diff --git a/read_input.h b/read_input.h index bd456106f..392f646b1 100644 --- a/read_input.h +++ b/read_input.h @@ -53,6 +53,7 @@ extern "C" extern int nstore; extern int crylov_space_dim; extern char rlxd_input_filename[500]; + extern char rlxs_input_filename[500]; extern char gauge_input_filename[500]; extern int subforwilson_flag; extern int eigenvalue_method_flag; @@ -60,6 +61,7 @@ extern "C" extern double eigenvalue_precision; extern int index_start; extern int index_end; + extern int save_ranlux_state; extern int random_seed; extern int rlxd_level; extern double X0, X1, X2, X3; diff --git a/read_input.l b/read_input.l index 7626089e3..f47fb2d3a 100644 --- a/read_input.l +++ b/read_input.l @@ -116,8 +116,10 @@ inline void rmQuotes(char *str){ int nstore; int index_start, index_end; int random_seed; + int save_ranlux_state; int rlxd_level; char rlxd_input_filename[500]; + char rlxs_input_filename[500]; char gauge_input_filename[500]; int read_source_flag; int return_check_flag, return_check_interval; @@ -191,6 +193,7 @@ inline void rmQuotes(char *str){ %x MU %x CSW %x SEED +%x SAVERANLUXSTATE %x RLXDLEVEL %x NSAVE %x RLXDINPUTFILE @@ -336,6 +339,7 @@ inline void rmQuotes(char *str){ ^NoEigenvalues{EQL} BEGIN(NOEV); ^EigenvaluePrecision{EQL} BEGIN(PRECEV); ^seed{EQL} BEGIN(SEED); +^SaveRanluxState{EQL} BEGIN(SAVERANLUXSTATE); ^StartCondition{EQL} BEGIN(STARTCOND); ^ThermalisationSweeps{EQL} BEGIN(THERMSWEEPS); ^Measurements{EQL} BEGIN(NMEAS); @@ -1622,10 +1626,23 @@ inline void rmQuotes(char *str){ dfl_poly_iter=atoi(yytext); if(myverbose!=0) printf("dfl_poly_iter = %s \n", yytext); } +yes { + save_ranlux_state = 1; + if(myverbose!=0) printf("Save RANLUX state at end of trajectory.\n"); +} +no { + save_ranlux_state = 0; + if(myverbose!=0) printf("Don't save RANLUX state at end of trajectory.\n"); +} {DIGIT}+ { random_seed=atoi(yytext); if(myverbose!=0) printf("seed=%s \n", yytext); } +statefile { + random_seed=-1; + save_ranlux_state=1; + if(myverbose!=0) printf("seed=%s; Trying to read ranlux state from file! Saving RANLUX state at end of trajectory!\n", yytext); +} [12] { rlxd_level = atoi(yytext); if(myverbose!=0) printf("RanluxdLevel set to %d \n", rlxd_level); @@ -2113,6 +2130,7 @@ int read_input(char * conf_file){ g_N_s = _default_g_N_s; g_dflgcr_flag = _default_g_dflgcr_flag; random_seed = _default_random_seed; + save_ranlux_state = _default_save_ranlux_state; rlxd_level = _default_rlxd_level; startoption = _default_startoption; Ntherm = _default_Ntherm; @@ -2122,6 +2140,7 @@ int read_input(char * conf_file){ cp_interval = _default_cp_interval; nstore = _default_nstore; strcpy(rlxd_input_filename, _default_rlxd_input_filename); + strcpy(rlxs_input_filename, _default_rlxs_input_filename); strcpy(gauge_input_filename, _default_gauge_input_filename); g_stdio_proc = _default_g_stdio_proc; index_start = _default_index_start; diff --git a/start.c b/start.c index cdd7fa77a..bef19a680 100644 --- a/start.c +++ b/start.c @@ -77,6 +77,7 @@ #include "ranlxd.h" #include "ranlxs.h" #include "start.h" +#include "fatal_error.h" static void gauss_vector(double v[],int n) { @@ -856,6 +857,69 @@ void start_ranlux(int level, int seed) rlxd_init(level, loc_seed); } +/* read warning in start.h before using this function! */ +void start_ranlux_from_file(char * const rlxd_state_filename, char * const rlxs_state_filename) { + FILE * rlxd_state_file; + FILE * rlxs_state_file; + + char error_message[1000]; + + rlxd_state_file = fopen(rlxd_state_filename,"r"); + rlxs_state_file = fopen(rlxs_state_filename,"r"); + + if(rlxd_state_file != NULL) { + int rlxd_state[105]; + fread(rlxd_state, sizeof(rlxd_state), 1, rlxd_state_file); + fclose(rlxd_state_file); + rlxd_reset(rlxd_state); + } else { + snprintf(error_message,1000,"Problem reading RLXD state file \"%s\", aborting!",rlxd_state_filename); + fatal_error(error_message,"start_ranlux_from_file"); + } + + if(rlxs_state_file != NULL) { + int rlxs_state[105]; + fread(rlxs_state, sizeof(rlxs_state), 1, rlxs_state_file); + fclose(rlxs_state_file); + rlxs_reset(rlxs_state); + } else { + snprintf(error_message,1000,"Problem reading RLXS state file \"%s\", aborting!",rlxs_state_filename); + fatal_error(error_message,"start_ranlux_from_file"); + } + +} + +void store_ranlux_state(char * const rlxd_state_filename, char * const rlxs_state_filename) { + FILE * rlxd_state_file; + FILE * rlxs_state_file; + + char error_message[1000]; + + rlxd_state_file = fopen(rlxd_state_filename,"w"); + rlxs_state_file = fopen(rlxs_state_filename,"w"); + + if(rlxd_state_file != NULL) { + int rlxd_state[105]; + rlxd_get(rlxd_state); + fwrite(rlxd_state, sizeof(rlxd_state), 1, rlxd_state_file); + fclose(rlxd_state_file); + } else { + snprintf(error_message,1000,"Problem opening RLXD state file \"%s\" for writing, aborting!",rlxd_state_filename); + fatal_error(error_message,"store_ranlux_state"); + } + + if(rlxs_state_file != NULL) { + int rlxs_state[105]; + rlxs_get(rlxs_state); + fwrite(rlxs_state, sizeof(rlxs_state), 1, rlxs_state_file); + fclose(rlxs_state_file); + } else { + snprintf(error_message,1000,"Problem opening RLXS state file \"%s\" for writing, aborting!",rlxs_state_filename); + fatal_error(error_message,"store_ranlux_state"); + } + +} + void gen_test_spinor_field(spinor * const k, const int eoflag) { int ix,iy,effvol; diff --git a/start.h b/start.h index 8874e25c1..ed312a84f 100644 --- a/start.h +++ b/start.h @@ -68,6 +68,18 @@ void source_spinor_field_point_from_file(spinor * const P, spinor * const Q, int void start_ranlux(int level,int seed); +/* This function allows initializing RANLUX from a saved state. IMPORTANT NOTE BELOW: + * Because of the way that random numbers are used in tmLQCD, this function should only ever be + * used in "reproducible random numbers" mode with the full understanding that all processes + * will have exactly the same random number generators!! + * The main routines in start.c all accomodate this by making every process generate random + * numbers for the whole volume and only using those relevant for the local volume while throwing + * all others away. + * If some function requests random numbers via any of the utility functions or ranlx[d,s] directly + * without taking this fact into account, all processes will generate the same ones! You have been warned. */ +void start_ranlux_from_file(char * const rlxd_state_filename, char * const rlxs_state_filename); +void store_ranlux_state(char * const rlxd_state_filename, char * const rlxs_state_filename); + void gen_test_spinor_field(spinor * const k , const int eoflag); void write_test_spinor_field(spinor * const k , const int eoflag, char * postfix); #endif diff --git a/update_tm.c b/update_tm.c index a50a6ed18..5194df73c 100644 --- a/update_tm.c +++ b/update_tm.c @@ -334,7 +334,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, #endif etime=gettime(); - /* printing data in the .data file */ + /* printing data in the .data file and save ranlux state if the option is set */ if(g_proc_id==0) { datafile = fopen(filename, "a"); if (!bc_flag) { /* if Periodic Boundary Conditions */ @@ -361,6 +361,10 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, fprintf(datafile, "\n"); fflush(datafile); fclose(datafile); + + if(save_ranlux_state) { + store_ranlux_state(rlxd_input_filename, rlxs_input_filename); + } } return(accept); }