diff --git a/benchmark.c b/benchmark.c index aee29f344..a49ab6aaf 100644 --- a/benchmark.c +++ b/benchmark.c @@ -45,7 +45,9 @@ # include # include "init/init_openmp.h" #endif +#include "git_hash.h" #include "gettime.h" +#include "io/utils.h" #include "su3.h" #include "su3adj.h" #include "ranlxd.h" @@ -55,6 +57,8 @@ #include "boundary.h" #include "operator/Hopping_Matrix.h" #include "operator/Hopping_Matrix_nocom.h" +#include "operator/Hopping_Matrix_32.h" +#include "operator/Hopping_Matrix_32_nocom.h" #include "operator/tm_operators.h" #include "global.h" #include "xchange/xchange.h" @@ -63,6 +67,7 @@ #include "operator/D_psi.h" #include "phmc.h" #include "mpi_init.h" +#include "linalg/assign_to_32.h" #ifdef PARALLELT # define SLICE (LX*LY*LZ/2) @@ -82,6 +87,22 @@ int check_xchange(); +double benchmark_hopping(const int j_max, const int k_max, double* antioptaway); +double benchmark_hopping_nocom(const int j_max, const int k_max, double* antioptaway); + +double benchmark_hopping_32(const int j_max, const int k_max, float* antioptaway_32); +double benchmark_hopping_32_nocom(const int j_max, const int k_max, float* antioptaway_32); + +void average_and_print( + const double dt, const int j_max, const int k_max, const double antioptaway, + const unsigned int precision); + +void average_and_compute_bandwidth( + const double dt, const double dt_nocom, const int j_max, const int k_max, + const double antioptaway, const unsigned int precision); + +double benchmark_Dpsi(const int k_max, const int j_max); + int main(int argc,char *argv[]) { int j,j_max,k,k_max = 1; @@ -90,16 +111,16 @@ int main(int argc,char *argv[]) #endif int status = 0; - static double t1,t2,dt,sdt,dts,qdt,sqdt; + static double t1,t2,dt,dt2; double antioptaway=0.0; - -#ifdef TM_USE_MPI - static double dt2; + float antioptaway_32=0.0; DUM_DERI = 6; DUM_MATRIX = DUM_DERI+8; NO_OF_SPINORFIELDS = DUM_MATRIX+2; - + NO_OF_SPINORFIELDS_32 = 6; + +#ifdef TM_USE_MPI # ifdef TM_USE_OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); @@ -107,7 +128,6 @@ int main(int argc,char *argv[]) MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); - #else g_proc_id = 0; #endif @@ -126,64 +146,24 @@ int main(int argc,char *argv[]) tmlqcd_mpi_init(argc, argv); - - - if(g_proc_id==0) { -#ifdef SSE - printf("# The code was compiled with SSE instructions\n"); -#endif -#ifdef SSE2 - printf("# The code was compiled with SSE2 instructions\n"); -#endif -#ifdef SSE3 - printf("# The code was compiled with SSE3 instructions\n"); -#endif -#ifdef P4 - printf("# The code was compiled for Pentium4\n"); -#endif -#ifdef OPTERON - printf("# The code was compiled for AMD Opteron\n"); -#endif -#ifdef _GAUGE_COPY - printf("# The code was compiled with -D_GAUGE_COPY\n"); -#endif -#ifdef BGL - printf("# The code was compiled for Blue Gene/L\n"); -#endif -#ifdef BGP - printf("# The code was compiled for Blue Gene/P\n"); -#endif -#ifdef _USE_HALFSPINOR - printf("# The code was compiled with -D_USE_HALFSPINOR\n"); -#endif -#ifdef _USE_SHMEM - printf("# The code was compiled with -D_USE_SHMEM\n"); -# ifdef _PERSISTENT - printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); -# endif -#endif -#ifdef TM_USE_MPI -# ifdef _NON_BLOCKING - printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); -# endif -#endif - printf("\n"); - fflush(stdout); - } - + write_first_messages(NULL,"benchmark",git_hash); #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); + j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); + j += init_gauge_field_32(VOLUMEPLUSRAND, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); + j += init_spinor_field_32(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS_32); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); + j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32); } if ( j!= 0) { @@ -222,13 +202,11 @@ int main(int argc,char *argv[]) fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } - if(g_sloppy_precision_flag == 1) { - g_sloppy_precision = 1; - j = init_dirac_halfspinor32(); - if ( j!= 0) { - fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); - exit(0); - } + j = init_dirac_halfspinor32(); + if (j != 0) + { + fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); + exit(-1); } # if (defined _PERSISTENT) init_xchange_halffield(); @@ -251,131 +229,52 @@ int main(int argc,char *argv[]) /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif + convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND); if(even_odd_flag) { - sdt=0.; sqdt=0.0; /*initialize the pseudo-fermion fields*/ for (k = 0; k < k_max; k++) { random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS); + assign_to_32(g_spinor_field32[k], g_spinor_field[k], VOLUME/2); } - j_max=512; - antioptaway=0.0; - /* compute approximately how many applications we need to do to get a reliable measurement */ -#ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - t1 = gettime(); - for (j=0;j +void print_fprint(FILE* parameterfile, const char * const msg){ + if(g_proc_id == 0){ + printf(msg); + if( (void*)parameterfile != NULL ) fprintf(parameterfile, msg); + } +} + int write_first_messages(FILE * parameterfile, char const * const executable, char const * const git_hash) { char message[1024]; - snprintf(message, 1024, "This is the %s code for twisted mass Wilson QCD\n\nVersion %s, commit %s\n",executable,PACKAGE_VERSION,git_hash); - printf("%s",message); - fprintf(parameterfile,"%s",message); - + snprintf(message, 1023, "This is the %s code for twisted mass Wilson QCD\n\nVersion %s, commit %s\n",executable,PACKAGE_VERSION,git_hash); + print_fprint(parameterfile, message); + #ifdef SSE - printf("# The code is compiled with SSE instructions\n"); - fprintf(parameterfile, - "# The code is compiled with SSE instructions\n"); + snprintf(message, 1023, "# The code is compiled with SSE instructions\n"); + print_fprint(parameterfile, message); #endif #ifdef SSE2 - printf("# The code is compiled with SSE2 instructions\n"); - fprintf(parameterfile, - "# The code is compiled with SSE2 instructions\n"); + snprintf(message, 1023, "# The code is compiled with SSE2 instructions\n"); + print_fprint(parameterfile, message); #endif #ifdef SSE3 - printf("# The code is compiled with SSE3 instructions\n"); - fprintf(parameterfile, - "# The code is compiled with SSE3 instructions\n"); + snprintf(message, 1023, "# The code is compiled with SSE3 instructions\n"); + print_fprint(parameterfile, message); #endif #ifdef P4 - printf("# The code is compiled for Pentium4\n"); - fprintf(parameterfile, - "# The code is compiled for Pentium4\n"); + snprintf(message, 1023, "# The code is compiled for Pentium4\n"); + print_fprint(parameterfile, message); #endif #if (defined BGL && !defined BGP) - printf("# The code is compiled for Blue Gene/L\n"); - fprintf(parameterfile, - "# The code is compiled for Blue Gene/L\n"); + snprintf(message, 1023, "# The code is compiled for Blue Gene/L\n"); + print_fprint(parameterfile, message); #endif #ifdef BGP - printf("# The code is compiled for Blue Gene/P\n"); - fprintf(parameterfile, - "# The code is compiled for Blue Gene/P\n"); + snprintf(message, 1023, "# The code is compiled for Blue Gene/P\n"); + print_fprint(parameterfile, message); #endif #if (defined BGQ && defined XLC) - printf("# The code is compiled with QPX intrinsics for Blue Gene/Q\n"); - fprintf(parameterfile, - "# The code is compiled with QPX intrinsics for Blue Gene/Q\n"); + snprintf(message, 1023, "# The code is compiled for Blue Gene/Q\n"); + print_fprint(parameterfile, message); #endif #ifdef SPI - printf("# Compiled with BG/Q SPI communication\n"); - fprintf(parameterfile, - "# Compiled with IBM Blue Gene/Q SPI communication\n"); + snprintf(message, 1023, "# The code is compiled with Blue Gene/Q SPI communication\n"); + print_fprint(parameterfile, message); #endif #ifdef OPTERON - printf("# The code is compiled for AMD Opteron\n"); - fprintf(parameterfile, - "# The code is compiled for AMD Opteron\n"); + snprintf(message, 1023, "# The code is compiled for AMD Opteron\n"); + print_fprint(parameterfile, message); #endif #ifdef _GAUGE_COPY - printf("# The code is compiled with -D_GAUGE_COPY\n"); - fprintf(parameterfile, - "# The code is compiled with -D_GAUGE_COPY\n"); + snprintf(message, 1023, "# The code is compiled with -D_GAUGE_COPY\n"); + print_fprint(parameterfile, message); #endif #ifdef _USE_HALFSPINOR - printf("# The code is compiled with -D_USE_HALFSPINOR\n"); - fprintf(parameterfile, - "# The code is compiled with -D_USE_HALFSPINOR\n"); + snprintf(message, 1023, "# the code is compiled with -D_USE_HALFSPINOR\n"); + print_fprint(parameterfile, message); #endif #ifdef _USE_SHMEM - printf("# the code is compiled with -D_USE_SHMEM\n"); - fprintf(parameterfile, - "# the code is compiled with -D_USE_SHMEM\n"); + snprintf(message, 1023, "# the code is compiled with -D_USE_SHMEM\n"); + print_fprint(parameterfile, message); # ifdef _PERSISTENT - printf("# the code is compiled for persistent MPI calls (halfspinor only)\n"); - fprintf(parameterfile, - "# the code is compiled for persistent MPI calls (halfspinor only)\n"); + snprintf(message, 1023, "# the code is compiled for persistent MPI calls (halfspinor only)\n"); + print_fprint(parameterfile, message); # endif #endif #ifdef TM_USE_MPI # ifdef _NON_BLOCKING - printf("# the code is compiled for non-blocking MPI calls (spinor and gauge)\n"); - fprintf(parameterfile, - "# the code is compiled for non-blocking MPI calls (spinor and gauge)\n"); + snprintf(message, 1023, "# the code is compiled for non-blocking MPI calls (spinor and gauge)\n"); + print_fprint(parameterfile, message); # endif # ifdef HAVE_LIBLEMON - printf("# the code is compiled with MPI IO / Lemon\n"); - fprintf(parameterfile, - "# the code is compiled with MPI IO / Lemon\n"); + snprintf(message, 1023, "# the code is compiled with MPI IO / Lemon\n"); + print_fprint(parameterfile, message); # endif #endif #ifdef TM_USE_OMP - printf("# the code is compiled with openMP support\n"); - fprintf(parameterfile, - "# the code is compiled with openMP support\n"); + snprintf(message, 1023, "# the code is compiled with OpenMP support\n"); + print_fprint(parameterfile, message); #endif if( bc_flag == 0 ) { - printf("# Periodic boundary conditions are used\n"); - fprintf(parameterfile, "# Periodic boundary conditions are used\n"); + snprintf(message, 1023, "# Periodic boundary conditions are used\n"); + print_fprint(parameterfile, message); } if( bc_flag == 1 ) { - printf("# Schroedinger Functional boundary conditions are used\n"); - fprintf(parameterfile, "# Schroedinger Functional boundary conditions are used\n"); + snprintf(message, 1023, "# Schroedinger Functional boundary conditions are used\n"); + print_fprint(parameterfile, message); } - printf("# The lattice size is %d x %d x %d x %d\n", + snprintf(message, 1023, "# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(LZ*g_nproc_z)); - printf("# The local lattice size is %d x %d x %d x %d\n", + print_fprint(parameterfile, message); + + snprintf(message, 1023, "# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); + print_fprint(parameterfile, message); + + if(even_odd_flag) { - printf("# Even/odd preconditioning is used\n"); - fprintf(parameterfile, "# Even/odd preconditioning is used\n"); + snprintf(message, 1023, "# Even/odd preconditioning is used\n"); + print_fprint(parameterfile, message); } else { - printf("# Even/odd preconditioning is not used\n"); - fprintf(parameterfile, "# Even/odd preconditioning is not used\n"); - } - printf("# beta = %.12f , kappa= %.12f\n", g_beta, g_kappa); - printf("# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3); - if( strcmp(executable,"hmc") == 0 ) { - printf("# mu = %.12f\n", g_mu/2./g_kappa); - printf("# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1); - printf("# Using %s precision for the inversions!\n", - g_relative_precision_flag ? "relative" : "absolute"); + snprintf(message, 1023, "# Even/odd preconditioning is not used\n"); + print_fprint(parameterfile, message); } - fprintf(parameterfile, "# The lattice size is %d x %d x %d x %d\n", (int)(g_nproc_t*T), (int)(g_nproc_x*LX), - (int)(g_nproc_y*LY), (int)(g_nproc_z*LZ)); - fprintf(parameterfile, "# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY), (int)(LZ)); - fprintf(parameterfile, "# g_beta = %.12f , g_kappa= %.12f, c_sw = %.12f \n",g_beta,g_kappa,g_c_sw); - fprintf(parameterfile, "# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3); + snprintf(message, 1023, "# Using %s precision for the inversions!\n", + g_relative_precision_flag ? "relative" : "absolute"); + print_fprint(parameterfile, message); + + snprintf(message, 1023, "# beta = %.12f , kappa= %.12f, mu= %.12f\n", g_beta, g_kappa, g_mu/2/g_kappa); + print_fprint(parameterfile, message); + + snprintf(message, 1023, "# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3); + print_fprint(parameterfile, message); + if( strcmp(executable,"hmc") == 0 ) { - fprintf(parameterfile, "# Nmeas=%d, Nsave=%d \n", - Nmeas,Nsave); - fprintf(parameterfile, "# mu = %.12f\n", g_mu/2./g_kappa); - fprintf(parameterfile, "# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1); - fprintf(parameterfile, "# Using %s precision for the inversions!\n", - g_relative_precision_flag ? "relative" : "absolute"); - } - if( strcmp(executable,"invert") == 0 ) { - printf("# beta = %.12f, mu = %.12f, kappa = %.12f\n", g_beta, g_mu/2./g_kappa, g_kappa); - fprintf(parameterfile, - "# beta = %.12f, mu = %.12f, kappa = %.12f\n", g_beta, g_mu/2./g_kappa, g_kappa); + snprintf(message, 1023, "# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1); + print_fprint(parameterfile, message); + snprintf(message, 1023, "# Nmeas=%d, Nsave=%d \n", Nmeas,Nsave); + print_fprint(parameterfile, message); } fflush(stdout); fflush(parameterfile); return(0); diff --git a/operator/Hopping_Matrix_32_nocom.h b/operator/Hopping_Matrix_32_nocom.h new file mode 100644 index 000000000..1642f2df4 --- /dev/null +++ b/operator/Hopping_Matrix_32_nocom.h @@ -0,0 +1,27 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ + +#ifndef _HOPPING_MATRIX_32_NOCOM_H +#define _HOPPING_MATRIX_32_NOCOM_H + +#include "su3.h" + +void Hopping_Matrix_32_nocom(const int ieo, spinor32* const l, spinor32* const k); + +#endif