Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
7005fbb
Add GDB.
ooreilly Jul 14, 2020
d6f7061
Set CUDA compile flags.
ooreilly Jul 14, 2020
258c5e5
Fix segmentation fault when using point forces on multiple GPUs.
ooreilly Jul 14, 2020
501f464
Fix force parallel bug.
ooreilly Jul 14, 2020
b839697
Fix ngsl.
ooreilly Jul 14, 2020
1a5eadb
Fix repeating source.
ooreilly Jul 14, 2020
67a9df5
Add old CUDA print functions.
ooreilly Jul 14, 2020
bfc52a6
Update kernel.cu to fix DM sync issues.
ooreilly Jul 17, 2020
3b07f4b
Fix three types of parallel bugs in the source and receiver implement…
ooreilly Oct 1, 2020
33bc3c2
add print statements for outputting grid numbers.
ooreilly Oct 5, 2020
a5df04f
rewrite distribute function so that it is compatible with multiple bl…
ooreilly Oct 5, 2020
8cd4dba
remove print statements and add some comments to explain certain step…
ooreilly Oct 7, 2020
6f8f7ae
fix broken test (update client call).
ooreilly Oct 7, 2020
1d097a9
fix source interpolation in the curvilinear kernel.
ooreilly Oct 12, 2020
e8d1081
fix block size for original AWP.
ooreilly Nov 20, 2020
af2d0b4
fix a bug in PR-13 to assign receiver/source indices in each partition
hzfmer Nov 21, 2020
a7763b9
add zhat arg to source functions.
ooreilly Dec 7, 2020
b4c6353
fix source amplitude errors for sources placed near the boundary at t…
ooreilly Dec 7, 2020
fdebaf0
Fix wrong quadrature weights.
ooreilly Dec 8, 2020
be130ec
change step to size_t in buffer.
ooreilly Dec 13, 2020
e4a5aea
fix integer overflow in mpi_io_indexed.
ooreilly Dec 14, 2020
8749550
enable debugging statements and fix force boundary issue for a single…
ooreilly Jan 29, 2021
a977b5e
fix source in the x-direction.
ooreilly Jan 29, 2021
4142ba3
fix force in the y direction.
ooreilly Jan 29, 2021
10aa7d5
add partitioning interface, but this change breaks the gaussian hill …
ooreilly Jan 30, 2021
ce414a6
remove unused macro.
ooreilly Jan 31, 2021
cafe12d
fix output of density.
ooreilly Jan 31, 2021
0eed1b4
remove unused grids.
ooreilly Jan 31, 2021
016f8c0
rename partitioning functions and update partitioning bounds to minim…
ooreilly Jan 31, 2021
5e93b1f
remove debug statements.
ooreilly Jan 31, 2021
4c6de6f
Fix SGT output.
ooreilly Feb 5, 2021
92dda2c
add optimized stress kernel to interior region.
ooreilly Feb 5, 2021
418f714
add optimized kernel to left boundary.
ooreilly Feb 5, 2021
e8eccce
add optimized stress kernel to right boundary.
ooreilly Feb 5, 2021
87d57c2
forgot to add kernel :)
ooreilly Feb 5, 2021
3c69c30
add missing kernel :)
ooreilly Feb 5, 2021
3245f73
Fix source placement (#16)
ooreilly Jun 25, 2021
d93d303
Fix mapping (#17)
ooreilly Jun 27, 2021
0df29b7
Grid stretch (#18)
ooreilly Jul 1, 2021
2ae3108
Update grid writing tool (#19)
ooreilly Jul 6, 2021
ddb6ab8
Fix write material property info (#20)
ooreilly Jul 6, 2021
e5f38d9
Fix unit tests (#21)
ooreilly Jul 7, 2021
6b2f579
rename project. (#22)
ooreilly Jul 8, 2021
b0b55c5
Update write_grid docs. (#23)
ooreilly Jul 8, 2021
a326b02
Energy rate (#24)
ooreilly Jul 19, 2021
55449c2
disable energy module when not in use. (#25)
ooreilly Sep 1, 2021
7553165
fix incorrect placement of source/receivers in the y-direction and po…
ooreilly Oct 9, 2021
bf83acb
Fix dm overlap placement (#30)
ooreilly Oct 12, 2021
c91561c
Redesign cerjan function so that ND is no longer bounded by min(nxt,nyt)
deanrockit Dec 8, 2021
f2c1d73
Fix write_grid tool so that it works with multiple processors. Fix an…
deanrockit Oct 11, 2022
0f658d6
* Adding new user input flags specifying Q model parameters and caps …
deanrockit Apr 5, 2024
cadd990
Delete release folder
deanrockit Apr 5, 2024
ab9aa3e
Modify velbuffer function to avoid zero values in Vz at the surface w…
deanrockit Apr 5, 2024
cc201e4
Merge pull request #31 from SCECcode/new_cerjan
deanrockit Apr 5, 2024
01f49f1
Remove textures, add compile scripts for summit,ls6,vista
Oct 24, 2024
8e7d34a
Change param type of Alloc1D from int to long
akashpalla Feb 13, 2025
18a4b26
Remove mesh_clean.c from compile scripts
akashpalla Feb 18, 2025
c2c495f
Merge pull request #34 from akashpalla/cuda12+
deanrockit Feb 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
build/*
profile/*.[0-9]*
profile/*/
.*/*
release*
pmcl3d*
!pmcl3d*h
32 changes: 19 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
cmake_minimum_required(VERSION 3.10)
cmake_policy(SET CMP0074 NEW)
project(AWP_MINI VERSION 1.0 LANGUAGES C CUDA)
project(AWP VERSION 1.0 LANGUAGES C CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 90)
endif()
include(CMakePrintHelpers)
#include(FindMPI.cmake)

include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
set(GCC_COMPILE_FLAGS "-std=c99 -Wall -Werror\
set(GCC_COMPILE_FLAGS "-std=c99 -Wall\
-Wextra -Wmissing-prototypes -Wstrict-prototypes \
-Wold-style-definition -Wno-unused-parameter")
if (DEFINED ENV{ARCH})
set(ARCH $ENV{ARCH})
else()
set(ARCH sm_70)
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=${ARCH} -Xptxas=-v -lineinfo -use_fast_math")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS} -D${ARCH}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu11 -D${ARCH}")
-Wno-unused-parameter")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O4 -Xcompiler -std=c++17 -use_fast_math -Xptxas=-v -g -lineinfo --allow-unsupported-compiler")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
#add_compile_definitions(MPICH_SKIP_MPICXX=1)

#### MPI
find_package(MPI REQUIRED)
if (MPI_FOUND)
cmake_print_variables(CMAKE_INCLUDE_PATH)
include_directories(${MY_INCLUDE_PATH})
else (MPI_FOUND)
message(SEND_ERROR "This application cannot compile without MPI")
endif (MPI_FOUND)


include(CTest)
Expand Down
19 changes: 19 additions & 0 deletions compile_ls6_impi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

module unload intel
module load cmake gcc impi cuda

rm -r release
mkdir -p release

cd release
export CC=$(which mpigcc)
export CXX=$(which mpigxx)
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/lib:$LD_LIBRARY_PATH
export CPATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/include:$CPATH
#export PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/bin:$PATH
module list
env | grep "PATH"

cmake -DCMAKE_VERBOSE_MAKEFILE=ON ..
make
18 changes: 18 additions & 0 deletions compile_ls6_mv2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

module unload intel impi
module load cmake gcc mvapich2 cuda
module list

rm -r release
mkdir -p release

cd release
#export CC=$(which mpicc)
#export CXX=$(which mpicxx)
export LD_LIBRARY_PATH=/opt/apps/gcc11_2/mvapich2/2.3.7/lib:$LD_LIBRARY_PATH
export CPATH=/opt/apps/gcc11_2/mvapich2/2.3.7/include:$CPATH
#export LD_LIBRARY_PATH=/.../mvapich2/lib:$LD_LIBRARY_PATH
#export CPATH=/.../mvapich2/include:$CPATH
cmake -DCMAKE_VERBOSE_MAKEFILE=ON ..
make
9 changes: 9 additions & 0 deletions compile_summit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/tcsh

rm -r release
mkdir -p release
module load cmake gcc cuda

cd release
cmake ..
make
72 changes: 72 additions & 0 deletions compile_vista_mv2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash


module unload cmake
#module unload gcc
#ml reset
#ml nvhpc-hpcx-cuda11/23.7
#module load intel cmake impi cuda
module unload openmpi
module use /scratch/00494/tg457572/packages/modulefiles

module load e4s
module load mvapich
module load tau
ml cuda/12.4

rm -r release
mkdir -p release

cd release
#export CXX

#export MPI_HOME="/home1/07936/tg872351/mvp-pre-rc-ofi-cuda12.5"
export MPI_HOME="/home1/07936/tg872351/mvp-pre-rc-zfp-cuda12.4"

export PATH=${MPI_HOME}/bin:$PATH
export LD_LIBRARY_PATH=${MPI_HOME}/lib:$LD_LIBRARY_PATH
export CPATH=${MPI_HOME}/include:$CPATH
export C_INCLUDE_PATH=${MPI_HOME}/include:$C_INCLUDE_PATH

export LD_LIBRARY_PATH=/home1/apps/nvidia/Linux_aarch64/24.7/cuda/12.5/lib64:$LD_LIBRARY_PATH

export CC=$(which mpicc)
export CXX=$(which mpicxx)
export FC=$(which mpifort)
export MPI_C_COMPILER=$(which mpicc)
export MPI_CXX_COMPILER=$(which mpicxx)
export MPI_INCLUDE_PATH=${MPI_HOME}/include


#export CXX
#export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/lib:$LD_LIBRARY_PATH
#export CPATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/include:$CPATH
#export C_INCLUDE_PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/include:$C_INCLUDE_PATH
#export PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/bin:$PATH

echo -e "\n"
echo "======== PATH=============="
echo $PATH | tr : '\n'

echo -e "\n"
echo "======== INCLUDE =============="
echo $INCLUDE | tr : '\n'

echo -e "\n"
echo "======== LD_LIBRARY_PATH=============="
echo $LD_LIBRARY_PATH | tr : '\n'

module list
echo -e "mpicc = `which mpicc`"
echo ""

echo "LD_PRELOAD=$LD_PRELOAD"

#export MPI_HOME=${TACC_MPI_DIR}

echo "TACC_IMPI_INC=$TACC_IMPI_INC"
echo "MPI_HOME=$MPI_HOME"


cmake -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_C_COMPILER=`which mpicc` -DCMAKE_CXX_COMPILER=`which mpicxx` -DMY_INCLUDE_PATH=$MPI_INCLUDE_PATH ..
make
50 changes: 50 additions & 0 deletions compile_vista_openmpi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash


module unload cmake
#module unload gcc
#ml reset
#ml nvhpc-hpcx-cuda11/23.7
#module load intel cmake impi cuda

rm -r release
mkdir -p release

cd release
export CC=$(which mpicc)
export CXX=$(which mpicxx)
export FC=$(which mpifort)
export MPI_C_COMPILER=$(which mpicc)
export MPI_INCLUDE_PATH=${TACC_MPI_DIR}/include
#export CXX
#export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/lib:$LD_LIBRARY_PATH
#export CPATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/include:$CPATH
#export C_INCLUDE_PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/include:$C_INCLUDE_PATH
#export PATH=/opt/intel/compilers_and_libraries_2020.4.304/linux/mpi/intel64/bin:$PATH

echo -e "\n"
echo "======== PATH=============="
echo $PATH | tr : '\n'

echo -e "\n"
echo "======== INCLUDE =============="
echo $INCLUDE | tr : '\n'

echo -e "\n"
echo "======== LD_LIBRARY_PATH=============="
echo $LD_LIBRARY_PATH | tr : '\n'

module list
echo -e "mpicc = `which mpicc`"
echo ""

echo "LD_PRELOAD=$LD_PRELOAD"

export MPI_HOME=${TACC_MPI_DIR}

echo "TACC_IMPI_INC=$TACC_IMPI_INC"
echo "MPI_HOME=$MPI_HOME"


cmake -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_C_COMPILER=`which mpicc` -DCMAKE_CXX_COMPILER=`which mpicxx` -DMY_INCLUDE_PATH=$MPI_INCLUDE_PATH ..
make
9 changes: 3 additions & 6 deletions include/awp/definitions.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#ifndef DEFINITIONS_H
#define DEFINITIONS_H

#define BLOCK_SIZE_X 2
#define BLOCK_SIZE_Y 2
#define BLOCK_SIZE_Z 4

#include <mpi.h>

#ifndef _prec
Expand All @@ -29,11 +25,11 @@ typedef float prec;
#endif

#ifndef ngsl
#define ngsl 8
#define ngsl 4
#endif

#ifndef ngsl2
#define ngsl2 16
#define ngsl2 8
#endif

#ifndef align
Expand All @@ -47,6 +43,7 @@ typedef float prec;
#define STR_LEN 2048



typedef struct
{
_prec x, y, z;
Expand Down
2 changes: 0 additions & 2 deletions include/awp/error.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef ERROR_H
#define ERROR_H

int _last_error;

enum error_codes {SUCCESS,
ERR_FILE_OPEN = 100,
ERR_FILE_READ = 101,
Expand Down
7 changes: 5 additions & 2 deletions include/awp/pmcl3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ void command(int argc, char **argv, _prec *TMAX, _prec *DH, _prec *DT,
int *USETOPO, char *SOURCEFILE,
int *USESOURCEFILE, char *RECVFILE, int *USERECVFILE,
char *FORCEFILE, int *USEFORCEFILE,
char *SGTFILE, int *USESGTFILE);
char *SGTFILE, int *USESGTFILE, char *MMSFILE, int *USEMMSFILE, float *DHB, float *DHT,
char *ENERGYFILE, int *USEENERGYFILE,
_prec *QSI, _prec *QPQSR, _prec *MAXVPVSR, _prec *VMIN, _prec *VMAX, _prec *DMIN);

int read_src_ifault_2(int rank, int READ_STEP,
char *INSRC, char *INSRC_I2,
Expand Down Expand Up @@ -72,6 +74,7 @@ void inimesh(int rank, int MEDIASTART, Grid3D d1, Grid3D mu, Grid3D lam, Grid3D
Grid3D tau, Grid3D weights,Grid1D coeff,
int nvar, _prec FP, _prec FAC, _prec Q0, _prec EX, int nxt, int nyt, int nzt, int PX, int PY, int NX, int NY,
int NZ, int *coords, MPI_Comm MCW, int IDYNA, int NVE, int SoCalQ, char *INVEL,
_prec qsi, _prec qpqsr, _prec maxvpvsr, _prec vmin, _prec vmax, _prec dmin,
_prec *vse, _prec *vpe, _prec *dde);

int checkmesh(int nxtl, int nytl, int nztl, int nxth, int nyth, int nzth, Grid3D varl, Grid3D varh,
Expand Down Expand Up @@ -109,7 +112,7 @@ void init_texture(int nxt, int nyt, int nzt, Grid3D tau1, Grid3D tau2, Grid

Grid3D Alloc3D(int nx, int ny, int nz);
Grid3Dww Alloc3Dww(int nx, int ny, int nz);
Grid1D Alloc1D(int nx);
Grid1D Alloc1D(long nx);
PosInf Alloc1P(int nx);

void Delloc3D(Grid3D U);
Expand Down
11 changes: 4 additions & 7 deletions include/awp/pmcl3d_cons.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef DEFINITIONS_H
#define BLOCK_SIZE_X 2
#define BLOCK_SIZE_Y 2
#define BLOCK_SIZE_Z 4
#define BLOCK_SIZE_Z 32
#endif
// Set floating-point precision. Make sure to configure both `_prec` and
// `_mpi_prec`.
Expand All @@ -15,12 +15,9 @@
#endif
#define align 32
#define loop 1
// Number of ghost cells is increased from 4 to 8 for topography kernels.
// In the future, it should be possible to keep this number at four, but modify
// the vertical velocity exchange so that 6 points is exchanged instead of 4.
// No modifications necessary to the other velocity components.
#define ngsl 8 /* number of ghost cells x loop */
#define ngsl2 16 /* ngsl * 2 */
// Do not change the number of ghost cells.
#define ngsl 4 /* number of ghost cells x loop */
#define ngsl2 8 /* ngsl * 2 */

#define Both 0
#define Left 1
Expand Down
2 changes: 1 addition & 1 deletion include/buffers/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ void buffer_copy_to_device(buffer_t *buffer, size_t step);
* buffer: Buffer data structure.
* step: Time step to query buffer at.
*/
void buffer_copy_to_host(buffer_t *buffer, int step);
void buffer_copy_to_host(buffer_t *buffer, size_t step);

#ifdef __cplusplus
}
Expand Down
36 changes: 33 additions & 3 deletions include/grid/grid_3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ grid3_t grid_init_metric_grid(const int3_t size, const int3_t shift,
const int3_t boundary2,
const _prec gridspacing);

grid3_t grid_init_full_grid(const int3_t size, const int3_t shift,
const int3_t coordinate, const int3_t boundary1,
const int3_t boundary2,
const _prec gridspacing);

/* Initialize grid
*
* Input arguments:
Expand Down Expand Up @@ -184,11 +189,29 @@ grid1_t grid_grid1_z(const grid3_t grid);
* out: Array to fill
* n: Array size. Must be greater than the grid size.
* grid: 1D grid data structure.
* isxdir: Specify to `1` if the grid should be filled in the x-direction.
* Adjusts for the particular internal coordinate system used by AWP, i.e., fields stored
* in the (-,+,+) octant
*
* Return value:
* Number of elements written.
*/
int grid_fill1(prec *out, const grid1_t grid, const int isxdir);
/*
*
* Fill the array `out` with the grid point values in the y-direction for a given DM block
* ('blocknum') in one dimension.
*
* Arguments:
* out: Array to fill
* n: Array size. Must be greater than the grid size.
* grid: 1D grid data structure.
* blocknum: Block number. Must be a non-negative integer.
*
* Return value:
* Number of elements written.
*/
int grid_fill1(prec *out, const grid1_t grid);
int grid_fill_y_dm(prec *out, const grid1_t grid, const int blocknum);

/*
* Check if a query point is in bounds or not. The query point is in bounds if
Expand All @@ -204,8 +227,8 @@ int grid_fill1(prec *out, const grid1_t grid);
*/
int grid_in_bounds1(const _prec *x, const _prec q, const grid1_t grid);

int grid_in_bounds_ext1(const _prec *x, const _prec q, const grid1_t grid);

int grid_in_bounds_sgt(const _prec *x, const _prec q, const grid1_t grid);
int grid_in_bounds_moment_tensor(const _prec *x, const _prec q, const grid1_t grid);

/*
* Fill the array `out` with the grid point values in the x-direction of a grid
Expand Down Expand Up @@ -308,6 +331,13 @@ int grid_pow3(_prec *out, const _prec p, const grid3_t grid);
*/
double grid_reduce3(const _prec *in, const grid3_t grid);

_prec grid_overlap(const _prec h);
_prec grid_height(const int nz, const _prec h, const int istopo);

void global_to_local(_prec *zloc, int *block_index, const _prec z,
const _prec h, const int *nz, const int num_grids,
const int istopo);


#ifdef __cplusplus
}
Expand Down
Loading