From afd8eef0eb49f21b4c8fd5e1da84646f986acf07 Mon Sep 17 00:00:00 2001 From: Tarun Malviya Date: Sat, 20 May 2023 12:30:59 -0700 Subject: [PATCH] Debug libreproducible added. --- .../dbg_reproducible/fortran_constants.f90 | 57 ++++++++ .../dbg_reproducible/get_fortran_constants.c | 90 +++++++++++++ .../dbg_reproducible/mpi_reproducible.cpp | 126 ++++++++++++++++++ .../dbg_reproducible/mpi_reproducible.h | 29 ++++ 4 files changed, 302 insertions(+) create mode 100644 mpi-proxy-split/dbg_reproducible/fortran_constants.f90 create mode 100644 mpi-proxy-split/dbg_reproducible/get_fortran_constants.c create mode 100644 mpi-proxy-split/dbg_reproducible/mpi_reproducible.cpp create mode 100644 mpi-proxy-split/dbg_reproducible/mpi_reproducible.h diff --git a/mpi-proxy-split/dbg_reproducible/fortran_constants.f90 b/mpi-proxy-split/dbg_reproducible/fortran_constants.f90 new file mode 100644 index 000000000..5ffd1f7d1 --- /dev/null +++ b/mpi-proxy-split/dbg_reproducible/fortran_constants.f90 @@ -0,0 +1,57 @@ +! Tested on Cori with: ftn -c THIS_FILE.f90 +! C binding: https://gcc.gnu.org/onlinedocs/gfortran/Interoperable-Subroutines-and-Functions.html +! MPICH: integer(c_int),bind(C, name=" MPIR_F08_MPI_IN_PLACE ") & :: MPI_IN_PLACE +! FIXME: Make this cleaner, either use bind() command, or use an out parameter. + +! NOTE: In MPI, the FORTRAN constants can have different values from +! one process to the next. So, we need to discover the +! FORTRAN constants in each new MPI process, to recognize +! them at C level. +! For example, note that the MPI_Allreduce wrapper contains: +! if (sendbuf == FORTRAN_MPI_IN_PLACE) { +! retval = NEXT_FUNC(Allreduce)(MPI_IN_PLACE, recvbuf, count, +! realType, realOp, realComm); +! MPI 3.1 standard: +! The constants that cannot be used in initialization expressions or +! assignments in Fortran are as follows: +! MPI_BOTTOM +! MPI_STATUS_IGNORE +! MPI_STATUSES_IGNORE +! MPI_ERRCODES_IGNORE +! MPI_IN_PLACE +! MPI_ARGV_NULL +! MPI_ARGVS_NULL +! MPI_UNWEIGHTED +! MPI_WEIGHTS_EMPTY + + + subroutine get_fortran_constants() + implicit none + include 'mpif.h' + + ! explicit interfaces + interface + subroutine get_fortran_constants_helper(t) + implicit none + integer,intent(in) :: t + end subroutine get_fortran_constants_helper + subroutine get_fortran_arrays_ignore(t) + implicit none + integer :: t(*) + end subroutine get_fortran_arrays_ignore + end interface + ! These must match the list in get_fortran_constants.c + call get_fortran_constants_helper(MPI_BOTTOM) + ! MPI_STATUS_IGNORE is a struct, similar to a length-1 array + call get_fortran_arrays_helper(MPI_STATUS_IGNORE) + call get_fortran_arrays_helper(MPI_STATUSES_IGNORE) + call get_fortran_arrays_helper(MPI_ERRCODES_IGNORE) + call get_fortran_constants_helper(MPI_IN_PLACE) + ! FIXME: MPI_ARGV_NULL is a CHARACTER(1), not supported in MANA + ! call get_fortran_constants_helper(MPI_ARGV_NULL) + ! FIXME: MPI_ARGV_NULL is a CHARACTER(1) in mpich-gnu, not supported + ! in MANA + ! call get_fortran_arrays_helper(MPI_ARGVS_NULL) + call get_fortran_constants_helper(MPI_UNWEIGHTED) + call get_fortran_constants_helper(MPI_WEIGHTS_EMPTY) + end subroutine get_fortran_constants diff --git a/mpi-proxy-split/dbg_reproducible/get_fortran_constants.c b/mpi-proxy-split/dbg_reproducible/get_fortran_constants.c new file mode 100644 index 000000000..c0766335e --- /dev/null +++ b/mpi-proxy-split/dbg_reproducible/get_fortran_constants.c @@ -0,0 +1,90 @@ +#include +#include + +// In C, we call get_fortran_constants(), which eventually calls +// get_fortran_constants_helper(FORTRAN_CONSTANT) which calls +// get_fortran_constants_helper-(int *t) in this file, which allows +// us to capture in C the value of FORTRAN_CONSTANT. +// In FORTRAN, a constant is implemented as a variable whose value +// is a pointer to the address containing the constant. +// See the wrapper for MPI_Allreduce, using FORTRAN_MPI_STATUSES_IGNORE +// for an example of how these constants are used in MANA. + +// Quoting from the MPI standard: +// In Fortran the implementation of these special constants may +// require the use of language constructs that are outside the Fortran +// standard. Using special values for the constants (e.g., by defining them +// through parameter statements) is not possible because an implementation +// cannot distinguish these values from legal data. Typically, these +// constants are implemented as predefined static variables (e.g., a +// variable in an MPI-declared COMMON block), relying on the fact that +// the target compiler passes data by address. Inside the subroutine, this +// address can be extracted by some mechanism outside the Fortran standard +// (e.g., by Fortran extensions or by implementing the function in C). + +// MPI 3.1 standard: +// The constants that cannot be used in initialization expressions or +// assignments in Fortran are as follows: +void *FORTRAN_MPI_BOTTOM = NULL; +void *FORTRAN_MPI_STATUS_IGNORE = NULL; +void *FORTRAN_MPI_STATUSES_IGNORE = NULL; +void *FORTRAN_MPI_ERRCODES_IGNORE = NULL; +void *FORTRAN_MPI_IN_PLACE = NULL; +void *FORTRAN_MPI_ARGV_NULL = NULL; +void *FORTRAN_MPI_ARGVS_NULL = NULL; +void *FORTRAN_MPI_UNWEIGHTED = NULL; +void *FORTRAN_MPI_WEIGHTS_EMPTY = NULL; +void *FORTRAN_CONSTANTS_END = NULL; + +// These must match the list in fortran_constants.f90 +void **fortran_constants[] = { + &FORTRAN_MPI_BOTTOM, + &FORTRAN_MPI_STATUS_IGNORE, + &FORTRAN_MPI_STATUSES_IGNORE, + &FORTRAN_MPI_ERRCODES_IGNORE, + &FORTRAN_MPI_IN_PLACE, + // FIXME: MPI_ARGV_NULL is a CHARACTER(1), not supported in MANA + // &FORTRAN_MPI_ARGV_NULL, + &FORTRAN_MPI_ARGVS_NULL, + &FORTRAN_MPI_UNWEIGHTED, + &FORTRAN_MPI_WEIGHTS_EMPTY, + &FORTRAN_CONSTANTS_END +}; + +// This is called as fortran_constants.f90:get_fortran_constants_helper() +void get_fortran_constants_helper_(int *t) { + static int iter = 0; + if (iter == -1) { + fprintf(stderr, "MANA: get_fortran_constants_helper_: Internal error\n"); + exit(1); + } + *(fortran_constants[iter++]) = t; + if (fortran_constants[iter] == &FORTRAN_CONSTANTS_END) { + iter = -1; // no more FOFTRAN constants to initialize. + } +} + +// This is called as fortran_constants.f90:get_fortran_arrays_helper() +void get_fortran_arrays_helper_(int *t) { + get_fortran_constants_helper_(t); +} + +void get_fortran_constants_(void); + +void get_fortran_constants() { + static int initialized = 0; + if (!initialized) { + // This was defined in fortran_constants.f90:get_fortran_constants() + get_fortran_constants_(); + initialized = 1; + } +} + +#ifdef STANDALONE +int main() { + get_fortran_constants(); + printf("Fortran MPI_IN_PLACE = %p\n", FORTRAN_MPI_IN_PLACE); + printf("Fortran MPI_STATUSES_IGNORE = %p\n", FORTRAN_MPI_STATUSES_IGNORE); + return 0; +} +#endif diff --git a/mpi-proxy-split/dbg_reproducible/mpi_reproducible.cpp b/mpi-proxy-split/dbg_reproducible/mpi_reproducible.cpp new file mode 100644 index 000000000..7a9fa12fd --- /dev/null +++ b/mpi-proxy-split/dbg_reproducible/mpi_reproducible.cpp @@ -0,0 +1,126 @@ +#include "mpi_reproducible.h" + +#include +#include +#include +#include + +#include +#include +#include + + +/* We use 'static' becuase we don't want the overhead of the compiler +initializing these to zero each time the function is called. */ +#define MAX_ALLREDUCE_SENDBUF_SIZE (1024 * 1024 * 16) /* 15 MB */ + +int +MPI_Init_thread(int *argc, char ***argv, int required, int *provided) +{ + + int retval = NEXT_FNC(MPI_Init_thread)(argc, argv, required, provided); + return retval; +} + +void touch(const char *filename) { + struct stat st; + // Check if the file already exists + int result = stat(filename, &st); + if (result == 0) { + + } else { + // If the file does not exist, create an empty file + FILE *file = fopen(filename, "w"); + if (file != NULL) { + fclose(file); + result = 0; + } else { + result = -1; + } + } + +} + +int +MPI_Allreduce(const void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, + MPI_Comm comm) +{ + static unsigned char tmpbuf[MAX_ALLREDUCE_SENDBUF_SIZE]; + +touch("/global/cfs/cdirs/cr/malviyat/exp-setup/PdO4-test-native-reproducible/reproducible.txt"); + + int retval; + int root = 0; + int comm_rank; + int comm_size; + int type_size; + + retval = NEXT_FNC(MPI_Comm_rank)(comm, &comm_rank); + retval = NEXT_FNC(MPI_Comm_size)(comm, &comm_size); + retval = NEXT_FNC(MPI_Type_size)(datatype, &type_size); + + fprintf(stdout, + "\nMPI_Allreduce_reproducible: using "); + fflush(stdout); + if (count * comm_size * type_size > MAX_ALLREDUCE_SENDBUF_SIZE) { + fprintf(stderr, + "\nMPI_Allreduce_reproducible: Insufficient tmp send buffer."); + fflush(stderr); + exit(-1); + } + + if (sendbuf != FORTRAN_MPI_IN_PLACE || sendbuf == MPI_IN_PLACE) { + fprintf(stderr, + "\nMPI_Allreduce_reproducible: MPI_IN_PLACE not yet supported."); + fflush(stderr); + exit(-1); + } + + // Gather the operands from all ranks in the comm + retval = NEXT_FNC(MPI_Gather)(sendbuf, count, datatype, tmpbuf, count, + datatype, 0, comm); + + // Perform the local reduction operation on the root rank + if (comm_rank == root) { + memset(recvbuf, 0, count * type_size); + memcpy(recvbuf, tmpbuf + (count * type_size * 0), count * type_size); + for (int i = 1; i < comm_size; i++) { + retval = NEXT_FNC(MPI_Reduce_local)(tmpbuf + (count * type_size * i), + recvbuf, count, datatype, op); + } + } + + // Broadcat the local reduction operation result in the comm + retval = NEXT_FNC(MPI_Bcast)(recvbuf, count, datatype, 0, comm); + return retval; +} + +EXTERNC int +mpi_init_thread_(int *required, int *provided, int *ierr) +{ + int argc = 0; + char **argv; + *ierr = MPI_Init_thread(&argc, &argv, *required, provided); + return *ierr; +} + +EXTERNC int +mpi_allreduce_(const void *sendbuf, + void *recvbuf, + int *count, + MPI_Datatype *datatype, + MPI_Op *op, + MPI_Comm *comm, + int *ierr) +{ + fprintf(stdout, + "\nMPI_Allreduce_reproducible__: using "); + fflush(stdout); + + *ierr = MPI_Allreduce(sendbuf, recvbuf, *count, *datatype, *op, *comm); + return *ierr; +} diff --git a/mpi-proxy-split/dbg_reproducible/mpi_reproducible.h b/mpi-proxy-split/dbg_reproducible/mpi_reproducible.h new file mode 100644 index 000000000..d8f37b409 --- /dev/null +++ b/mpi-proxy-split/dbg_reproducible/mpi_reproducible.h @@ -0,0 +1,29 @@ +#ifndef _MPI_REPRODUCIBLE_H +#define _MPI_REPRODUCIBLE_H + +#include + +#define EXTERNC extern "C" + +#define NEXT_FNC(func) \ + ({ \ + static __typeof__(&func) _real_##func = (__typeof__(&func))-1; \ + if (_real_##func == (__typeof__(&func))-1) { \ + _real_##func = (__typeof__(&func))dlsym(RTLD_NEXT, #func); \ + } \ + _real_##func; \ + }) + +EXTERNC void get_fortran_constants(); +extern void *FORTRAN_MPI_BOTTOM; +extern void *FORTRAN_MPI_STATUS_IGNORE; +extern void *FORTRAN_MPI_STATUSES_IGNORE; +extern void *FORTRAN_MPI_ERRCODES_IGNORE; +extern void *FORTRAN_MPI_IN_PLACE; +extern void *FORTRAN_MPI_ARGV_NULL; +extern void *FORTRAN_MPI_ARGVS_NULL; +extern void *FORTRAN_MPI_UNWEIGHTED; +extern void *FORTRAN_MPI_WEIGHTS_EMPTY; +extern void *FORTRAN_CONSTANTS_END; + +#endif