-
Notifications
You must be signed in to change notification settings - Fork 26
[WIP] Correctness - Feature - Debugging Library (libreproducible.so) for Enabling Reproducibility in Native Runs. #317
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
tarunsmalviya
wants to merge
1
commit into
mpickpt:main
Choose a base branch
from
tarunsmalviya:feature/native_allreduce_reproducible
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| ! Tested on Cori with: ftn -c THIS_FILE.f90 | ||
| ! C binding: https://gcc.gnu.org/onlinedocs/gfortran/Interoperable-Subroutines-and-Functions.html | ||
| ! MPICH: integer(c_int),bind(C, name=" MPIR_F08_MPI_IN_PLACE ") & :: MPI_IN_PLACE | ||
| ! FIXME: Make this cleaner, either use bind() command, or use an out parameter. | ||
|
|
||
| ! NOTE: In MPI, the FORTRAN constants can have different values from | ||
| ! one process to the next. So, we need to discover the | ||
| ! FORTRAN constants in each new MPI process, to recognize | ||
| ! them at C level. | ||
| ! For example, note that the MPI_Allreduce wrapper contains: | ||
| ! if (sendbuf == FORTRAN_MPI_IN_PLACE) { | ||
| ! retval = NEXT_FUNC(Allreduce)(MPI_IN_PLACE, recvbuf, count, | ||
| ! realType, realOp, realComm); | ||
| ! MPI 3.1 standard: | ||
| ! The constants that cannot be used in initialization expressions or | ||
| ! assignments in Fortran are as follows: | ||
| ! MPI_BOTTOM | ||
| ! MPI_STATUS_IGNORE | ||
| ! MPI_STATUSES_IGNORE | ||
| ! MPI_ERRCODES_IGNORE | ||
| ! MPI_IN_PLACE | ||
| ! MPI_ARGV_NULL | ||
| ! MPI_ARGVS_NULL | ||
| ! MPI_UNWEIGHTED | ||
| ! MPI_WEIGHTS_EMPTY | ||
|
|
||
|
|
||
| subroutine get_fortran_constants() | ||
| implicit none | ||
| include 'mpif.h' | ||
|
|
||
| ! explicit interfaces | ||
| interface | ||
| subroutine get_fortran_constants_helper(t) | ||
| implicit none | ||
| integer,intent(in) :: t | ||
| end subroutine get_fortran_constants_helper | ||
| subroutine get_fortran_arrays_ignore(t) | ||
| implicit none | ||
| integer :: t(*) | ||
| end subroutine get_fortran_arrays_ignore | ||
| end interface | ||
| ! These must match the list in get_fortran_constants.c | ||
| call get_fortran_constants_helper(MPI_BOTTOM) | ||
| ! MPI_STATUS_IGNORE is a struct, similar to a length-1 array | ||
| call get_fortran_arrays_helper(MPI_STATUS_IGNORE) | ||
| call get_fortran_arrays_helper(MPI_STATUSES_IGNORE) | ||
| call get_fortran_arrays_helper(MPI_ERRCODES_IGNORE) | ||
| call get_fortran_constants_helper(MPI_IN_PLACE) | ||
| ! FIXME: MPI_ARGV_NULL is a CHARACTER(1), not supported in MANA | ||
| ! call get_fortran_constants_helper(MPI_ARGV_NULL) | ||
| ! FIXME: MPI_ARGV_NULL is a CHARACTER(1) in mpich-gnu, not supported | ||
| ! in MANA | ||
| ! call get_fortran_arrays_helper(MPI_ARGVS_NULL) | ||
| call get_fortran_constants_helper(MPI_UNWEIGHTED) | ||
| call get_fortran_constants_helper(MPI_WEIGHTS_EMPTY) | ||
| end subroutine get_fortran_constants |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
|
|
||
| // In C, we call get_fortran_constants(), which eventually calls | ||
| // get_fortran_constants_helper(FORTRAN_CONSTANT) which calls | ||
| // get_fortran_constants_helper-(int *t) in this file, which allows | ||
| // us to capture in C the value of FORTRAN_CONSTANT. | ||
| // In FORTRAN, a constant is implemented as a variable whose value | ||
| // is a pointer to the address containing the constant. | ||
| // See the wrapper for MPI_Allreduce, using FORTRAN_MPI_STATUSES_IGNORE | ||
| // for an example of how these constants are used in MANA. | ||
|
|
||
| // Quoting from the MPI standard: | ||
| // In Fortran the implementation of these special constants may | ||
| // require the use of language constructs that are outside the Fortran | ||
| // standard. Using special values for the constants (e.g., by defining them | ||
| // through parameter statements) is not possible because an implementation | ||
| // cannot distinguish these values from legal data. Typically, these | ||
| // constants are implemented as predefined static variables (e.g., a | ||
| // variable in an MPI-declared COMMON block), relying on the fact that | ||
| // the target compiler passes data by address. Inside the subroutine, this | ||
| // address can be extracted by some mechanism outside the Fortran standard | ||
| // (e.g., by Fortran extensions or by implementing the function in C). | ||
|
|
||
| // MPI 3.1 standard: | ||
| // The constants that cannot be used in initialization expressions or | ||
| // assignments in Fortran are as follows: | ||
| void *FORTRAN_MPI_BOTTOM = NULL; | ||
| void *FORTRAN_MPI_STATUS_IGNORE = NULL; | ||
| void *FORTRAN_MPI_STATUSES_IGNORE = NULL; | ||
| void *FORTRAN_MPI_ERRCODES_IGNORE = NULL; | ||
| void *FORTRAN_MPI_IN_PLACE = NULL; | ||
| void *FORTRAN_MPI_ARGV_NULL = NULL; | ||
| void *FORTRAN_MPI_ARGVS_NULL = NULL; | ||
| void *FORTRAN_MPI_UNWEIGHTED = NULL; | ||
| void *FORTRAN_MPI_WEIGHTS_EMPTY = NULL; | ||
| void *FORTRAN_CONSTANTS_END = NULL; | ||
|
|
||
| // These must match the list in fortran_constants.f90 | ||
| void **fortran_constants[] = { | ||
| &FORTRAN_MPI_BOTTOM, | ||
| &FORTRAN_MPI_STATUS_IGNORE, | ||
| &FORTRAN_MPI_STATUSES_IGNORE, | ||
| &FORTRAN_MPI_ERRCODES_IGNORE, | ||
| &FORTRAN_MPI_IN_PLACE, | ||
| // FIXME: MPI_ARGV_NULL is a CHARACTER(1), not supported in MANA | ||
| // &FORTRAN_MPI_ARGV_NULL, | ||
| &FORTRAN_MPI_ARGVS_NULL, | ||
| &FORTRAN_MPI_UNWEIGHTED, | ||
| &FORTRAN_MPI_WEIGHTS_EMPTY, | ||
| &FORTRAN_CONSTANTS_END | ||
| }; | ||
|
|
||
| // This is called as fortran_constants.f90:get_fortran_constants_helper() | ||
| void get_fortran_constants_helper_(int *t) { | ||
| static int iter = 0; | ||
| if (iter == -1) { | ||
| fprintf(stderr, "MANA: get_fortran_constants_helper_: Internal error\n"); | ||
| exit(1); | ||
| } | ||
| *(fortran_constants[iter++]) = t; | ||
| if (fortran_constants[iter] == &FORTRAN_CONSTANTS_END) { | ||
| iter = -1; // no more FOFTRAN constants to initialize. | ||
| } | ||
| } | ||
|
|
||
| // This is called as fortran_constants.f90:get_fortran_arrays_helper() | ||
| void get_fortran_arrays_helper_(int *t) { | ||
| get_fortran_constants_helper_(t); | ||
| } | ||
|
|
||
| void get_fortran_constants_(void); | ||
|
|
||
| void get_fortran_constants() { | ||
| static int initialized = 0; | ||
| if (!initialized) { | ||
| // This was defined in fortran_constants.f90:get_fortran_constants() | ||
| get_fortran_constants_(); | ||
| initialized = 1; | ||
| } | ||
| } | ||
|
|
||
| #ifdef STANDALONE | ||
| int main() { | ||
| get_fortran_constants(); | ||
| printf("Fortran MPI_IN_PLACE = %p\n", FORTRAN_MPI_IN_PLACE); | ||
| printf("Fortran MPI_STATUSES_IGNORE = %p\n", FORTRAN_MPI_STATUSES_IGNORE); | ||
| return 0; | ||
| } | ||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| #include "mpi_reproducible.h" | ||
|
|
||
| #include <dlfcn.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
|
|
||
| #include <sys/types.h> | ||
| #include <sys/stat.h> | ||
| #include <utime.h> | ||
|
|
||
|
|
||
| /* We use 'static' becuase we don't want the overhead of the compiler | ||
| initializing these to zero each time the function is called. */ | ||
| #define MAX_ALLREDUCE_SENDBUF_SIZE (1024 * 1024 * 16) /* 15 MB */ | ||
|
|
||
| int | ||
| MPI_Init_thread(int *argc, char ***argv, int required, int *provided) | ||
| { | ||
|
|
||
| int retval = NEXT_FNC(MPI_Init_thread)(argc, argv, required, provided); | ||
| return retval; | ||
| } | ||
|
|
||
| void touch(const char *filename) { | ||
| struct stat st; | ||
| // Check if the file already exists | ||
| int result = stat(filename, &st); | ||
| if (result == 0) { | ||
|
|
||
| } else { | ||
| // If the file does not exist, create an empty file | ||
| FILE *file = fopen(filename, "w"); | ||
| if (file != NULL) { | ||
| fclose(file); | ||
| result = 0; | ||
| } else { | ||
| result = -1; | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| int | ||
| MPI_Allreduce(const void *sendbuf, | ||
| void *recvbuf, | ||
| int count, | ||
| MPI_Datatype datatype, | ||
| MPI_Op op, | ||
| MPI_Comm comm) | ||
| { | ||
| static unsigned char tmpbuf[MAX_ALLREDUCE_SENDBUF_SIZE]; | ||
|
|
||
| touch("/global/cfs/cdirs/cr/malviyat/exp-setup/PdO4-test-native-reproducible/reproducible.txt"); | ||
|
|
||
| int retval; | ||
| int root = 0; | ||
| int comm_rank; | ||
| int comm_size; | ||
| int type_size; | ||
|
|
||
| retval = NEXT_FNC(MPI_Comm_rank)(comm, &comm_rank); | ||
| retval = NEXT_FNC(MPI_Comm_size)(comm, &comm_size); | ||
| retval = NEXT_FNC(MPI_Type_size)(datatype, &type_size); | ||
|
|
||
| fprintf(stdout, | ||
| "\nMPI_Allreduce_reproducible: using "); | ||
| fflush(stdout); | ||
| if (count * comm_size * type_size > MAX_ALLREDUCE_SENDBUF_SIZE) { | ||
| fprintf(stderr, | ||
| "\nMPI_Allreduce_reproducible: Insufficient tmp send buffer."); | ||
| fflush(stderr); | ||
| exit(-1); | ||
| } | ||
|
|
||
| if (sendbuf != FORTRAN_MPI_IN_PLACE || sendbuf == MPI_IN_PLACE) { | ||
| fprintf(stderr, | ||
| "\nMPI_Allreduce_reproducible: MPI_IN_PLACE not yet supported."); | ||
| fflush(stderr); | ||
| exit(-1); | ||
| } | ||
|
|
||
| // Gather the operands from all ranks in the comm | ||
| retval = NEXT_FNC(MPI_Gather)(sendbuf, count, datatype, tmpbuf, count, | ||
| datatype, 0, comm); | ||
|
|
||
| // Perform the local reduction operation on the root rank | ||
| if (comm_rank == root) { | ||
| memset(recvbuf, 0, count * type_size); | ||
| memcpy(recvbuf, tmpbuf + (count * type_size * 0), count * type_size); | ||
| for (int i = 1; i < comm_size; i++) { | ||
| retval = NEXT_FNC(MPI_Reduce_local)(tmpbuf + (count * type_size * i), | ||
| recvbuf, count, datatype, op); | ||
| } | ||
| } | ||
|
|
||
| // Broadcat the local reduction operation result in the comm | ||
| retval = NEXT_FNC(MPI_Bcast)(recvbuf, count, datatype, 0, comm); | ||
| return retval; | ||
| } | ||
|
|
||
| EXTERNC int | ||
| mpi_init_thread_(int *required, int *provided, int *ierr) | ||
| { | ||
| int argc = 0; | ||
| char **argv; | ||
| *ierr = MPI_Init_thread(&argc, &argv, *required, provided); | ||
| return *ierr; | ||
| } | ||
|
|
||
| EXTERNC int | ||
| mpi_allreduce_(const void *sendbuf, | ||
| void *recvbuf, | ||
| int *count, | ||
| MPI_Datatype *datatype, | ||
| MPI_Op *op, | ||
| MPI_Comm *comm, | ||
| int *ierr) | ||
| { | ||
| fprintf(stdout, | ||
| "\nMPI_Allreduce_reproducible__: using "); | ||
| fflush(stdout); | ||
|
|
||
| *ierr = MPI_Allreduce(sendbuf, recvbuf, *count, *datatype, *op, *comm); | ||
| return *ierr; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| #ifndef _MPI_REPRODUCIBLE_H | ||
| #define _MPI_REPRODUCIBLE_H | ||
|
|
||
| #include <mpi.h> | ||
|
|
||
| #define EXTERNC extern "C" | ||
|
|
||
| #define NEXT_FNC(func) \ | ||
| ({ \ | ||
| static __typeof__(&func) _real_##func = (__typeof__(&func))-1; \ | ||
| if (_real_##func == (__typeof__(&func))-1) { \ | ||
| _real_##func = (__typeof__(&func))dlsym(RTLD_NEXT, #func); \ | ||
| } \ | ||
| _real_##func; \ | ||
| }) | ||
|
|
||
| EXTERNC void get_fortran_constants(); | ||
| extern void *FORTRAN_MPI_BOTTOM; | ||
| extern void *FORTRAN_MPI_STATUS_IGNORE; | ||
| extern void *FORTRAN_MPI_STATUSES_IGNORE; | ||
| extern void *FORTRAN_MPI_ERRCODES_IGNORE; | ||
| extern void *FORTRAN_MPI_IN_PLACE; | ||
| extern void *FORTRAN_MPI_ARGV_NULL; | ||
| extern void *FORTRAN_MPI_ARGVS_NULL; | ||
| extern void *FORTRAN_MPI_UNWEIGHTED; | ||
| extern void *FORTRAN_MPI_WEIGHTS_EMPTY; | ||
| extern void *FORTRAN_CONSTANTS_END; | ||
|
|
||
| #endif |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In order to use Fortran constants, you need to call something like
get_fortran_constants()on initialization method like this one.