diff --git a/.github/workflows/trilinos-kokkosparallel.yml b/.github/workflows/trilinos-kokkosparallel.yml new file mode 100644 index 00000000000..b088258c4dd --- /dev/null +++ b/.github/workflows/trilinos-kokkosparallel.yml @@ -0,0 +1,141 @@ +name: Trilinos with shared memory parallelism enabled through OpenMP and CUDA backends (through Kokkos) + +on: + workflow_dispatch: + schedule: + - cron: '0 8 * * 6' + +env: + IMAGE_NAME: ghcr.io/4c-multiphysics/4c-dependencies-trilinos + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-push-trilinos-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Log in to the Container registry + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0 + with: + images: ${{ env.IMAGE_NAME }} + labels: | + org.opencontainers.image.description=Image containing all the dependencies required for building and testing 4C based on the specified Trilinos commit ref + - name: Build and push Docker image + id: push + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 + with: + context: . + file: docker/trilinos_kokkosparallel/Dockerfile + push: true + tags: ${{ env.IMAGE_NAME }}:kokkosparallel + labels: ${{ steps.meta.outputs.labels }} + + build_kokkoscuda: + needs: build-and-push-trilinos-image + runs-on: ubuntu-latest + container: + image: ghcr.io/4c-multiphysics/4c-dependencies-trilinos:kokkosparallel + options: --user root --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + defaults: + run: + shell: bash + env: + OMPI_CXX: ${{ github.workspace }}/utilities/clangcuda++ + OMPI_CC: /usr/bin/clang + CLANGCUDA_CLANG_PATH: /usr/bin/clang++ + CLANGCUDA_CUDA_PATH: /usr/local/cuda + CLANGCUDA_ARCH: sm_90 + steps: + - uses: actions/checkout@v6 + - uses: ./.github/actions/build_4C + with: + cmake-preset: docker_kokkoscuda_clangcuda + build-targets: full + build-directory: ${{ github.workspace }}/build + use-ccache: "false" + + build_kokkosopenmp: + needs: build-and-push-trilinos-image + runs-on: ubuntu-latest + container: + image: ghcr.io/4c-multiphysics/4c-dependencies-trilinos:kokkosparallel + options: --user root --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + defaults: + run: + shell: bash + outputs: + test-chunks: ${{ steps.set-matrix.outputs.chunk-array }} + steps: + - uses: actions/checkout@v6 + - uses: ./.github/actions/build_4C + with: + cmake-preset: docker_kokkosopenmp + build-targets: full + build-directory: ${{ github.workspace }}/build + use-ccache: "false" + - uses: ./.github/actions/upload_directory + with: + directory: ${{ github.workspace }}/build + retention-days: 1 + name: trilinos_kokkosopenmp_build + - uses: ./.github/actions/chunk_test_suite + id: set-matrix + with: + build-directory: ${{ github.workspace }}/build + source-directory: ${{ github.workspace }} + number-of-chunks: 15 + junit-report-artifact-name: trilinos_test_report.xml + + test_openmp: + needs: build_kokkosopenmp + runs-on: ubuntu-latest + container: + image: ghcr.io/4c-multiphysics/4c-dependencies-trilinos:kokkosparallel + options: --user root --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + strategy: + fail-fast: false + matrix: + test-chunk: ${{fromJson(needs.build_kokkosopenmp.outputs.test-chunks)}} + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v6 + - name: Setup developer environment for testing + run: | + cd $GITHUB_WORKSPACE + git config --global --add safe.directory $GITHUB_WORKSPACE + - uses: ./.github/actions/download_directory + with: + name: trilinos_kokkosopenmp_build + destination: ${{ github.workspace }}/build + - name: Test + run: | + cd $GITHUB_WORKSPACE/build + ctest -I $TEST_CHUNK -j `nproc` --output-on-failure --output-junit $GITHUB_WORKSPACE/trilinos_test_report-$TEST_CHUNK.xml + env: + TEST_CHUNK: ${{ matrix.test-chunk }} + - name: Upload test report + if: success() || failure() + uses: actions/upload-artifact@v7 + with: + name: trilinos_test_report-${{ matrix.test-chunk }}.xml + path: | + ${{ github.workspace }}/trilinos_test_report-${{ matrix.test-chunk }}.xml + retention-days: 1 diff --git a/apps/global_full/CMakeLists.txt b/apps/global_full/CMakeLists.txt index c417bfee89b..e3f843ed71b 100644 --- a/apps/global_full/CMakeLists.txt +++ b/apps/global_full/CMakeLists.txt @@ -17,7 +17,17 @@ set_target_properties( ${FOUR_C_EXECUTABLE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR} ) four_c_set_up_executable(${FOUR_C_EXECUTABLE_NAME}) - +if(FOUR_C_CLANGCUDA) + set_target_properties( + ${FOUR_C_EXECUTABLE_NAME} + PROPERTIES CXX_COMPILER_LAUNCHER "" + C_COMPILER_LAUNCHER "" + CUDA_COMPILER_LAUNCHER "" + RULE_LAUNCH_COMPILE "" + RULE_LAUNCH_LINK "" + ) + target_compile_definitions(${FOUR_C_EXECUTABLE_NAME} PRIVATE CLANGCUDA_MODE_HOST) +endif() if(FOUR_C_ENABLE_METADATA_GENERATION) if(FOUR_C_WITH_PYTHON) add_custom_command( diff --git a/cmake/configure/configure_MIRCO.cmake b/cmake/configure/configure_MIRCO.cmake index 684ee039fd7..b722deaf2c0 100644 --- a/cmake/configure/configure_MIRCO.cmake +++ b/cmake/configure/configure_MIRCO.cmake @@ -13,7 +13,7 @@ four_c_process_global_option( OFF ) if(FOUR_C_MIRCO_FIND_INSTALLED) - + # Note that MIRCO and 4C must point to the same Kokkos and Kokkos-Kernels installation. Otherwise, there will be errors. message(STATUS "FOUR_C_MIRCO_FIND_INSTALLED is enabled") # MIRCO provides a package configuration file if installed. @@ -27,13 +27,24 @@ if(FOUR_C_MIRCO_FIND_INSTALLED) endif() else() # Fetch MIRCO from GIT repository - # Turn off googletest and Trilinos in MIRCO so that they don't interfere with 4C + # Turn off googletest in MIRCO so that it does not interfere with 4C. set(GTEST_IN_MIRCO "OFF") - set(TRILINOS_IN_MIRCO "OFF") + # Explicitly turn off `*_IN_MIRCO`, so that MIRCO uses upstream targets + set(RYML_IN_MIRCO "OFF") + set(KOKKOS_IN_MIRCO "OFF") + set(KOKKOS_KERNELS_IN_MIRCO "OFF") + + # Propagate + if(FOUR_C_CLANGCUDA) + set(MIRCO_CLANGCUDA "ON") + else() + set(MIRCO_CLANGCUDA "OFF") + endif() set(MIRCO_GIT_REPO "https://github.com/imcs-compsim/MIRCO.git") - set(MIRCO_GIT_TAG "b9d0c4ba27ff8463a3d2b17163fead8800b2650c") # latest hash 03.04.2026 + set(MIRCO_GIT_TAG "8b049a6462eba5809d7cffe039a77f3bc5593767") # latest hash 02.06.2026 + set(FETCHCONTENT_TRY_FIND_PACKAGE_MODE NEVER) fetchcontent_declare( mirco GIT_REPOSITORY ${MIRCO_GIT_REPO} @@ -42,8 +53,8 @@ else() # Fetch MIRCO from GIT repository fetchcontent_makeavailable(mirco) # MIRCO requires a specific path, possibly due to inconsistent naming "mirco" vs "mirco_lib". set(FOUR_C_MIRCO_ROOT "${CMAKE_INSTALL_PREFIX}/lib/cmake/mirco") - - four_c_add_external_dependency(four_c_all_enabled_external_dependencies mirco::mirco_lib) endif() +four_c_add_external_dependency(four_c_all_enabled_external_dependencies mirco::mirco_lib) + four_c_remember_variable_for_install(FOUR_C_MIRCO_ROOT) diff --git a/cmake/configure/configure_Trilinos.cmake b/cmake/configure/configure_Trilinos.cmake index d6081f877f3..128dd6659ff 100644 --- a/cmake/configure/configure_Trilinos.cmake +++ b/cmake/configure/configure_Trilinos.cmake @@ -71,6 +71,24 @@ find_package(Trilinos REQUIRED) message(STATUS "Trilinos version: ${Trilinos_VERSION}") message(STATUS "Trilinos packages: ${Trilinos_PACKAGE_LIST}") +if(FOUR_C_CLANGCUDA) + set(CMAKE_CXX_COMPILER_LAUNCHER + "" + CACHE STRING "" FORCE + ) + set(CMAKE_C_COMPILER_LAUNCHER + "" + CACHE STRING "" FORCE + ) + set(CMAKE_CUDA_COMPILER_LAUNCHER + "" + CACHE STRING "" FORCE + ) + + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "") + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "") +endif() + # Figure out the version. if(EXISTS "${Trilinos_DIR}/../../../TrilinosRepoVersion.txt") file(STRINGS "${Trilinos_DIR}/../../../TrilinosRepoVersion.txt" TrilinosRepoVersionFile) diff --git a/cmake/functions/four_c_auto_define_module.cmake b/cmake/functions/four_c_auto_define_module.cmake index 17cc65325e1..ab9c49c547d 100644 --- a/cmake/functions/four_c_auto_define_module.cmake +++ b/cmake/functions/four_c_auto_define_module.cmake @@ -38,6 +38,18 @@ function(four_c_auto_define_module) # them on other users of the library. target_link_libraries(${_target}_objs PRIVATE four_c_private_compile_interface) + if(FOUR_C_CLANGCUDA) + set_target_properties( + ${_target}_objs + PROPERTIES CXX_COMPILER_LAUNCHER "" + C_COMPILER_LAUNCHER "" + CUDA_COMPILER_LAUNCHER "" + RULE_LAUNCH_COMPILE "" + RULE_LAUNCH_LINK "" + ) + target_compile_definitions(${_target}_objs PRIVATE CLANGCUDA_MODE_HOST) + endif() + if(FOUR_C_ENABLE_IWYU) set_target_properties( ${_target}_objs PROPERTIES CXX_INCLUDE_WHAT_YOU_USE ${FOUR_C_IWYU_EXECUTABLE} diff --git a/cmake/setup_global_options.cmake b/cmake/setup_global_options.cmake index f8f2e9dac30..8bba1e48842 100644 --- a/cmake/setup_global_options.cmake +++ b/cmake/setup_global_options.cmake @@ -235,6 +235,20 @@ four_c_process_global_option( OFF ) +four_c_process_global_option( + FOUR_C_CLANGCUDA + DESCRIPTION + "Enable the relevant CMake compile definitions needed to use utilities/clangcuda++ as the compiler. This is currently necessary to use the CUDA backend of Kokkos in 4C, e.g. along with MIRCO." + DEFAULT + OFF + ) +if(FOUR_C_CLANGCUDA AND FOUR_C_WITH_ARBORX) + message( + WARNING + "Enabling both FOUR_C_CLANGCUDA and FOUR_C_WITH_ARBORX is not advised. This requires using an external CUDA-enabled ArborX installation and has not been tested." + ) +endif() + ## # Optimization flags # These flags are reasonable defaults. Users may amend them by setting FOUR_C_CXX_FLAGS and/or FOUR_C_CXX_FLAGS_. diff --git a/dependencies/trilinos_kokkosparallel/trilinos/install_cuda.sh b/dependencies/trilinos_kokkosparallel/trilinos/install_cuda.sh new file mode 100755 index 00000000000..94554a1a98b --- /dev/null +++ b/dependencies/trilinos_kokkosparallel/trilinos/install_cuda.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# This file is part of 4C multiphysics licensed under the +# GNU Lesser General Public License v3.0 or later. +# +# See the LICENSE.md file in the top-level for license information. +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +# Install trilinos with the CUDA backend enabled +# Call with +# ./install_cuda.sh /path/to/install/dir + +# Exit the script at the first failure +set -e + +INSTALL_DIR="$1" +# Number of procs for building (default 4) +NPROCS=${NPROCS:=4} +# git sha from Trilinos repository: +VERSION="f4d642715185dca1b94c91f434a2cf6db9f82014" +#CHECKSUM="" + + +# Location of script to apply patches later +SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +CMAKE_COMMAND=cmake + +git clone https://github.com/trilinos/Trilinos.git +cd Trilinos +git checkout $VERSION +cd .. && mkdir trilinos_build && cd trilinos_build + +MPI_DIR=/usr +MPI_BIN_DIR=$MPI_DIR/bin + +$CMAKE_COMMAND \ + -D CMAKE_BUILD_TYPE:STRING="RELEASE" \ + -D CMAKE_CXX_STANDARD:STRING="17" \ + -D CMAKE_CXX_COMPILER:FILEPATH="$MPI_BIN_DIR/mpic++" \ + -D CMAKE_C_COMPILER:FILEPATH="$MPI_BIN_DIR/mpicc" \ + -D CMAKE_Fortran_COMPILER:FILEPATH="$MPI_BIN_DIR/mpif90" \ + -D CMAKE_INSTALL_PREFIX:STRING=$INSTALL_DIR \ + -D BUILD_SHARED_LIBS:BOOL=ON \ + \ + -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON \ + -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_TESTS:BOOL=OFF \ + -D Trilinos_ENABLE_EXAMPLES:BOOL=OFF \ + \ + -D Trilinos_ASSERT_MISSING_PACKAGES=OFF \ + -D Trilinos_ENABLE_Gtest:BOOL=OFF \ + -D Trilinos_ENABLE_Amesos:BOOL=ON \ + -D Amesos_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Amesos2:BOOL=ON \ + -D Trilinos_ENABLE_AztecOO:BOOL=ON \ + -D AztecOO_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Belos:BOOL=ON \ + -D Trilinos_ENABLE_Epetra:BOOL=ON \ + -D Epetra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_EpetraExt:BOOL=ON \ + -D EpetraExt_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Intrepid2:BOOL=ON \ + -D Trilinos_ENABLE_Ifpack:BOOL=ON \ + -D Ifpack_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Ifpack2:BOOL=ON \ + -D Trilinos_ENABLE_Kokkos:BOOL=ON \ + -D Trilinos_ENABLE_ML:BOOL=ON \ + -D ML_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_MueLu:BOOL=ON \ + -D Trilinos_ENABLE_NOX:BOOL=ON \ + -D NOX_ENABLE_ABSTRACT_IMPLEMENTATION_EPETRA:BOOL=OFF \ + -D NOX_ENABLE_STRATIMIKOS_EPETRA_STACK:BOOL=OFF \ + -D Trilinos_ENABLE_Sacado:BOOL=ON \ + -D Trilinos_ENABLE_SEACASExodus:BOOL=ON \ + -D Trilinos_ENABLE_SEACASNemesis:BOOL=OFF \ + -D Trilinos_ENABLE_Shards:BOOL=ON \ + -D Trilinos_ENABLE_Stratimikos:BOOL=ON \ + -D Trilinos_ENABLE_Teko:BOOL=ON \ + -D Trilinos_ENABLE_Teuchos:BOOL=ON \ + -D Trilinos_ENABLE_Thyra:BOOL=ON \ + -D Thyra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_ThyraEpetraAdapters:BOOL=ON \ + -D Trilinos_ENABLE_ThyraEpetraExtAdapters:BOOL=ON \ + -D Trilinos_ENABLE_Tpetra:BOOL=ON \ + -D Tpetra_INST_INT_INT:BOOL=ON \ + -D Trilinos_ENABLE_Xpetra:BOOL=ON \ + -D Xpetra_ENABLE_Epetra:BOOL=ON \ + -D Xpetra_ENABLE_EpetraExt:BOOL=ON \ + -D Xpetra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Zoltan:BOOL=ON \ + -D Trilinos_ENABLE_Zoltan2:BOOL=ON \ + \ + -D Trilinos_MUST_FIND_ALL_TPL_LIBS=TRUE \ + -D TPL_ENABLE_DLlib:BOOL=OFF \ + -D TPL_ENABLE_Netcdf:BOOL=ON \ + -D TPL_ENABLE_MPI:BOOL=ON \ + -D TPL_ENABLE_MUMPS:BOOL=ON \ + -D TPL_ENABLE_ScaLAPACK:BOOL=ON \ + -D TPL_ENABLE_LAPACK:BOOL=ON \ + -D TPL_ENABLE_BLAS:BOOL=ON \ + -D TPL_ENABLE_ParMETIS:BOOL=ON \ + -D ParMETIS_INCLUDE_DIRS:PATH="/usr/include" \ + -D TPL_ENABLE_UMFPACK:BOOL=ON \ + -D UMFPACK_INCLUDE_DIRS:FILEPATH="/usr/include/suitesparse" \ + -D TPL_ENABLE_SuperLUDist:BOOL=ON \ + -D SuperLUDist_INCLUDE_DIRS:PATH="$INSTALL_DIR/../include" \ + -D SuperLUDist_LIBRARY_DIRS:PATH="$INSTALL_DIR/../lib" \ + \ + -D Trilinos_ENABLE_KokkosKernels=TRUE \ + -D KokkosKernels_ENABLE_TPL_BLAS=TRUE \ + -D KokkosKernels_ENABLE_TPL_LAPACK=TRUE \ + \ + -D Kokkos_ENABLE_SERIAL=TRUE \ + -D Tpetra_INST_SERIAL=TRUE \ + \ + -D Trilinos_ENABLE_CUDA=TRUE \ + -D TPL_ENABLE_CUDA=TRUE \ + -D Kokkos_ENABLE_CUDA=TRUE \ + -D Kokkos_ARCH_HOPPER90=TRUE \ + -D Kokkos_ENABLE_CUDA_CONSTEXPR=TRUE \ + -D Tpetra_INST_CUDA=FALSE \ + -D KokkosKernels_ENABLE_TPL_CUSOLVER=TRUE \ + \ + ../Trilinos + +make -j${NPROCS} install +cd .. +rm -rf Trilinos trilinos_build diff --git a/dependencies/trilinos_kokkosparallel/trilinos/install_openmp.sh b/dependencies/trilinos_kokkosparallel/trilinos/install_openmp.sh new file mode 100755 index 00000000000..fdf38127d0c --- /dev/null +++ b/dependencies/trilinos_kokkosparallel/trilinos/install_openmp.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# This file is part of 4C multiphysics licensed under the +# GNU Lesser General Public License v3.0 or later. +# +# See the LICENSE.md file in the top-level for license information. +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +# Install trilinos with the OpenMP backend enabled +# Call with +# ./install_openmp.sh /path/to/install/dir + +# Exit the script at the first failure +set -e + +INSTALL_DIR="$1" +# Number of procs for building (default 4) +NPROCS=${NPROCS:=4} +# git sha from Trilinos repository: +VERSION="f4d642715185dca1b94c91f434a2cf6db9f82014" +#CHECKSUM="" + + +# Location of script to apply patches later +SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +CMAKE_COMMAND=cmake + +git clone https://github.com/trilinos/Trilinos.git +cd Trilinos +git checkout $VERSION +cd .. && mkdir trilinos_build && cd trilinos_build + +MPI_DIR=/usr +MPI_BIN_DIR=$MPI_DIR/bin + +$CMAKE_COMMAND \ + -D CMAKE_BUILD_TYPE:STRING="RELEASE" \ + -D CMAKE_CXX_STANDARD:STRING="17" \ + -D CMAKE_CXX_COMPILER:FILEPATH="$MPI_BIN_DIR/mpic++" \ + -D CMAKE_C_COMPILER:FILEPATH="$MPI_BIN_DIR/mpicc" \ + -D CMAKE_Fortran_COMPILER:FILEPATH="$MPI_BIN_DIR/mpif90" \ + -D CMAKE_INSTALL_PREFIX:STRING=$INSTALL_DIR \ + -D BUILD_SHARED_LIBS:BOOL=ON \ + \ + -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON \ + -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_TESTS:BOOL=OFF \ + -D Trilinos_ENABLE_EXAMPLES:BOOL=OFF \ + \ + -D Trilinos_ASSERT_MISSING_PACKAGES=OFF \ + -D Trilinos_ENABLE_Gtest:BOOL=OFF \ + -D Trilinos_ENABLE_Amesos:BOOL=ON \ + -D Amesos_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Amesos2:BOOL=ON \ + -D Trilinos_ENABLE_AztecOO:BOOL=ON \ + -D AztecOO_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Belos:BOOL=ON \ + -D Trilinos_ENABLE_Epetra:BOOL=ON \ + -D Epetra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_EpetraExt:BOOL=ON \ + -D EpetraExt_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Intrepid2:BOOL=ON \ + -D Trilinos_ENABLE_Ifpack:BOOL=ON \ + -D Ifpack_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Ifpack2:BOOL=ON \ + -D Trilinos_ENABLE_Kokkos:BOOL=ON \ + -D Trilinos_ENABLE_ML:BOOL=ON \ + -D ML_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_MueLu:BOOL=ON \ + -D Trilinos_ENABLE_NOX:BOOL=ON \ + -D NOX_ENABLE_ABSTRACT_IMPLEMENTATION_EPETRA:BOOL=OFF \ + -D NOX_ENABLE_STRATIMIKOS_EPETRA_STACK:BOOL=OFF \ + -D Trilinos_ENABLE_Sacado:BOOL=ON \ + -D Trilinos_ENABLE_SEACASExodus:BOOL=ON \ + -D Trilinos_ENABLE_SEACASNemesis:BOOL=OFF \ + -D Trilinos_ENABLE_Shards:BOOL=ON \ + -D Trilinos_ENABLE_Stratimikos:BOOL=ON \ + -D Trilinos_ENABLE_Teko:BOOL=ON \ + -D Trilinos_ENABLE_Teuchos:BOOL=ON \ + -D Trilinos_ENABLE_Thyra:BOOL=ON \ + -D Thyra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_ThyraEpetraAdapters:BOOL=ON \ + -D Trilinos_ENABLE_ThyraEpetraExtAdapters:BOOL=ON \ + -D Trilinos_ENABLE_Tpetra:BOOL=ON \ + -D Tpetra_INST_INT_INT:BOOL=ON \ + -D Trilinos_ENABLE_Xpetra:BOOL=ON \ + -D Xpetra_ENABLE_Epetra:BOOL=ON \ + -D Xpetra_ENABLE_EpetraExt:BOOL=ON \ + -D Xpetra_SHOW_DEPRECATED_WARNINGS:BOOL=OFF \ + -D Trilinos_ENABLE_Zoltan:BOOL=ON \ + -D Trilinos_ENABLE_Zoltan2:BOOL=ON \ + \ + -D Trilinos_MUST_FIND_ALL_TPL_LIBS=TRUE \ + -D TPL_ENABLE_DLlib:BOOL=OFF \ + -D TPL_ENABLE_Netcdf:BOOL=ON \ + -D TPL_ENABLE_MPI:BOOL=ON \ + -D TPL_ENABLE_MUMPS:BOOL=ON \ + -D TPL_ENABLE_ScaLAPACK:BOOL=ON \ + -D TPL_ENABLE_LAPACK:BOOL=ON \ + -D TPL_ENABLE_BLAS:BOOL=ON \ + -D TPL_ENABLE_ParMETIS:BOOL=ON \ + -D ParMETIS_INCLUDE_DIRS:PATH="/usr/include" \ + -D TPL_ENABLE_UMFPACK:BOOL=ON \ + -D UMFPACK_INCLUDE_DIRS:FILEPATH="/usr/include/suitesparse" \ + -D TPL_ENABLE_SuperLUDist:BOOL=ON \ + -D SuperLUDist_INCLUDE_DIRS:PATH="$INSTALL_DIR/../include" \ + -D SuperLUDist_LIBRARY_DIRS:PATH="$INSTALL_DIR/../lib" \ + \ + -D Trilinos_ENABLE_KokkosKernels=TRUE \ + -D KokkosKernels_ENABLE_TPL_BLAS=TRUE \ + -D KokkosKernels_ENABLE_TPL_LAPACK=TRUE \ + \ + -D Kokkos_ENABLE_SERIAL=TRUE \ + -D Tpetra_INST_SERIAL=TRUE \ + \ + -D Trilinos_ENABLE_OpenMP=TRUE \ + -D Kokkos_ENABLE_OPENMP=TRUE \ + -D Tpetra_INST_OPENMP=TRUE \ + \ + ../Trilinos + +make -j${NPROCS} install +cd .. +rm -rf Trilinos trilinos_build diff --git a/doc/documentation/src/installation/installation.rst b/doc/documentation/src/installation/installation.rst index 52c9b7a17f5..84fc7917823 100644 --- a/doc/documentation/src/installation/installation.rst +++ b/doc/documentation/src/installation/installation.rst @@ -110,7 +110,7 @@ Currently supported versions are listed in ``<4C_sourceDir>/dependencies/support MIRCO can be used as optional dependency inside |FOURC| to be used for linear elastic frictionless normal contact between a rigid rough indentor and an elastic half-space. See the `MIRCO repository `_ for details and downloads. -Building |FOURC| with MIRCO enabled automatically fetches the repository during the configure stage and later builds the library as dependency. +Building |FOURC| with MIRCO enabled automatically fetches the repository during the configure stage and later builds the library as dependency. Alternatively, one can specify an external MIRCO installation. In either case, MIRCO can make use of shared memory parallelism through Kokkos :ref:`when enabled ` in |FOURC|. Note that 4C and MIRCO must depend on the same Kokkos installation. In case using Kokkos with CUDA enabled, MIRCO must be built with `CMAKE_POSITION_INDEPENDENT_CODE=ON`. .. _qhull: @@ -605,3 +605,15 @@ This will install |FOURC| in the specified location. You can then use the instal # This pulls in all the necessary dependencies and headers. target_link_libraries( PRIVATE 4C::lib4C) +.. _build4Cwithkokkoscuda: + +Building |FOURC| with OpenMP and CUDA support through Kokkos +------------------------------------------------------------ + +|FOURC| is primarily developed around MPI parallelism, but also offers the ability to use shared memory parallelism through `Kokkos _`, enabling hybrid parallelism on the CPU through OpenMP and GPU acceleration through CUDA. + +Kokkos (and Kokkos-Kernels) can be built within Trilinos or specified as an external TPL in Trilinos, and its configuration follows the usual procedure for the desired backend (see the `Kokkos configuration guide _`). Trilinos then requires `Trilinos_ENABLE_=ON` and, specifically for CUDA, `Trilinos_ENABLE_TPL_CUDA=ON`. To prevent oversubscription and unwanted shared memory parallelism in 4C, one should disable these backends for Tpetra with `TPETRA_INST_=OFF` and explicitly set `TPETRA_INST_SERIAL=ON`. + +To build 4C with this configuration, a compiler wrapper, `utilities/clangcuda++` must be used as the `CMAKE_CXX_COMPILER`, while clang should be used as the `CMAKE_C_COMPILER`. When using MPI, these should instead be set as the `OMPI_CXX` and `OMPI_CC` environment variables respectively. To change the GPU architecture or default clang++ and CUDA paths, one should set the corresponding environment variables listed at the start of the `utilities/clangcuda++` compiler wrapper. Additionally, the `FOUR_C_CLANGCUDA` compile option must be enabled in 4C. Due to incompatibility with the serial version of ArborX, it is recommended to disable `FOUR_C_WITH_ARBORX`. + +For developers, it is important to know that any target in 4C which contains Kokkos device code (e.g. `Kokkos::parallel_for()` or `KOKKOS_LAMBDA`) must be marked with the `CLANGCUDA_MODE_DEVICE` compile definition for CUDA compilation to be possible. diff --git a/docker/trilinos_kokkosparallel/Dockerfile b/docker/trilinos_kokkosparallel/Dockerfile new file mode 100644 index 00000000000..5b5641247b6 --- /dev/null +++ b/docker/trilinos_kokkosparallel/Dockerfile @@ -0,0 +1,130 @@ +# This file is part of 4C multiphysics licensed under the +# GNU Lesser General Public License v3.0 or later. +# +# See the LICENSE.md file in the top-level for license information. +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +ARG BASE_IMAGE=nvidia/cuda:12.8.1-devel-ubuntu24.04 +FROM ${BASE_IMAGE} +LABEL org.opencontainers.image.description="Image containing all the dependencies required for building and testing 4C" +LABEL org.4c-multiphysics.project=4C + +# Prevents tzdata asking for user feedback +ENV DEBIAN_FRONTEND=noninteractive + +USER root + +# Set locale information: region and timezone +RUN apt-get update && apt-get install -y --no-install-recommends \ + locales \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ + && locale-gen en_US.UTF-8 \ + && rm -rf /var/lib/apt/lists/* + +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ffmpeg \ + git \ + libglu1-mesa \ + python3 \ + sudo \ + unzip \ + vim \ + wget \ + && \ + apt-get update && apt-get install -y \ + doxygen \ + graphviz \ + texinfo \ + lcov \ + libblas-dev \ + libboost-all-dev \ + libcln-dev \ + libhdf5-dev \ + libhdf5-openmpi-dev \ + libnetcdf-dev \ + libfftw3-dev \ + lld \ + python3-venv \ + python-is-python3 \ + liblapack-dev \ + libopenmpi-dev \ + libparmetis-dev \ + libmetis-dev \ + libsuitesparse-dev \ + libmumps-dev \ + libscalapack-mpi-dev \ + libqhull-dev \ + mpi-default-dev \ + ninja-build \ + libyaml-dev \ + clang \ + clang-tidy \ + clang-tools \ + libomp-dev \ + libvtk9-dev \ + && rm -rf /var/lib/apt/lists/* + +# Create directory for dependencies +ARG NPROCS=12 +ENV NPROCS=$NPROCS \ + INSTALL_DIR="/opt/4C-dependencies" +RUN mkdir -p ${INSTALL_DIR} + +COPY dependencies /dependencies + +# Make `nvcc` available as an environment variable +ENV PATH=/usr/local/cuda/bin:${PATH} + +# Install cmake +RUN /dependencies/current/cmake/install.sh /usr/local + +# Install superLU_dist 7.2.0 +RUN /dependencies/current/superlu_dist/install.sh ${INSTALL_DIR} + +# Install Trilinos 2025.6 with Kokkos' CUDA backend enabled +RUN /dependencies/trilinos_kokkosparallel/trilinos/install_cuda.sh ${INSTALL_DIR}/tk_cuda + +# Install Trilinos 2025.6 with Kokkos' OpenMP backend enabled +RUN /dependencies/trilinos_kokkosparallel/trilinos/install_openmp.sh ${INSTALL_DIR}/tk_openmp +# Install deal.II (needs to happen after Trilinos and within the same installation directory) +RUN /dependencies/current/dealii/install.sh ${INSTALL_DIR}/tk_openmp + +# Install (optional) backtrace library +RUN /dependencies/current/backtrace/install.sh ${INSTALL_DIR} + +# install (optional) gmsh library +RUN /dependencies/current/gmsh/install.sh ${INSTALL_DIR} + +# Packages for testing +# Installation directory for dependencies concerning testing +ENV FOUR_C_TESTING_DEPENDENCIES_DIR="/opt/4C-dependencies-testing/" +RUN mkdir ${FOUR_C_TESTING_DEPENDENCIES_DIR} + +# Install Mathjax +RUN /dependencies/testing/mathjax/install.sh ${FOUR_C_TESTING_DEPENDENCIES_DIR} + +# Add dependencies hash +# The label is added at the end because the label causes a cache miss +ARG DEPENDENCIES_HASH +LABEL org.4c-multiphysics.dependencies_hash="${DEPENDENCIES_HASH}" +ENV DEPENDENCIES_HASH=${DEPENDENCIES_HASH} + +# add and enable the default user +ENV USER=user +RUN adduser --disabled-password --shell '/usr/bin/bash' --gecos '' $USER +RUN adduser $USER sudo; echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +#make sure everything is in place +RUN chown -R $USER:$USER /home/$USER +USER $USER +ENV HOME=/home/$USER +ENV USER=$USER +# see https://github.com/open-mpi/ompi/issues/4948 +ENV OMPI_MCA_btl_vader_single_copy_mechanism=none +WORKDIR $HOME diff --git a/presets/docker/CMakePresets.json b/presets/docker/CMakePresets.json index e2380727571..5ee3398d84e 100644 --- a/presets/docker/CMakePresets.json +++ b/presets/docker/CMakePresets.json @@ -152,6 +152,33 @@ "FOUR_C_WITH_QHULL": "OFF", "FOUR_C_ENABLE_PYTHON_BINDINGS": "OFF" } + }, + { + "name": "docker_kokkoscuda_clangcuda", + "displayName": "Release build for CUDA-enabled Kokkos", + "description": "Release build using clangcuda++ compiler wrapper for CUDA-enabled Kokkos", + "inherits": [ + ".docker_base" + ], + "cacheVariables": { + "FOUR_C_CLANGCUDA": "ON", + "FOUR_C_TRILINOS_ROOT": "/opt/4C-dependencies/tk_cuda", + "FOUR_C_WITH_ARBORX": "OFF", + "FOUR_C_WITH_DEAL_II": "OFF", + "FOUR_C_ENABLE_METADATA_GENERATION": "OFF" + } + }, + { + "name": "docker_kokkosopenmp", + "displayName": "Release build forOpenMP-enabled Kokkos", + "description": "Release build forOpenMP-enabled Kokkos", + "inherits": [ + ".docker_base" + ], + "cacheVariables": { + "FOUR_C_TRILINOS_ROOT": "/opt/4C-dependencies/tk_openmp", + "FOUR_C_DEAL_II_ROOT": "/opt/4C-dependencies/tk_openmp" + } } ] } diff --git a/src/cut/4C_cut_pointgraph.cpp b/src/cut/4C_cut_pointgraph.cpp index ae87b9bff6a..35e38f78386 100644 --- a/src/cut/4C_cut_pointgraph.cpp +++ b/src/cut/4C_cut_pointgraph.cpp @@ -12,8 +12,21 @@ #include "4C_cut_pointgraph_simple.hpp" #include "4C_cut_side.hpp" +#if defined(CLANGCUDA_MODE_HOST) || defined(CLANGCUDA_MODE_DEVICE) +#ifdef __noinline__ +#pragma push_macro("__noinline__") +#undef __noinline__ +#define FOUR_C_RESTORE_NOINLINE_MACRO +#endif +#endif + #include +#ifdef FOUR_C_RESTORE_NOINLINE_MACRO +#pragma pop_macro("__noinline__") +#undef FOUR_C_RESTORE_NOINLINE_MACRO +#endif + #include #include #include diff --git a/utilities/clangcuda++ b/utilities/clangcuda++ new file mode 100755 index 00000000000..70c0deafdc9 --- /dev/null +++ b/utilities/clangcuda++ @@ -0,0 +1,125 @@ +#!/bin/bash + +# This file is part of 4C multiphysics licensed under the +# GNU Lesser General Public License v3.0 or later. +# +# See the LICENSE.md file in the top-level for license information. +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +# This is a compiler wrapper around clang++ for building a project which uses Kokkos with the Cuda backend enabled, due to issues with nvcc and the nvcc_wrapper provided by Kokkos. +# +# When relevant, this wrapper should be used as the CXX_COMPILER or OMPI_CXX backend in combination with CMake flags to signal which targets (or objects) should be compiled for CUDA device (CLANGCUDA_MODE_DEVICE), CUDA host-only (CLANGCUDA_MODE_HOST), or normal (non-CUDA) C++. +# +# The following environment variables can be used to override the defaults: +# - CLANGCUDA_CLANG_PATH: path to clang++ +# - CLANGCUDA_CUDA_PATH: path to the CUDA toolkit +# - CLANGCUDA_ARCH: GPU architecture to target (default is sm_90) +# - CLANGCUDA_LOG: optional path to a log file for recording final commands + +clang_path="${CLANGCUDA_CLANG_PATH:-/bin/clang++}" +cuda_path="${CLANGCUDA_CUDA_PATH:-${CUDA_HOME:-/usr/local/cuda}}" + +compile=0 +mode_cuda_host=0 +mode_cuda_device=0 +arch="${CLANGCUDA_ARCH:-sm_90}" + +args=() +skip_next_x=0 + +for arg in "$@"; do + if [[ "$skip_next_x" == "1" ]]; then + skip_next_x=0 + continue + fi + + case "$arg" in + -c) + compile=1 + args+=("$arg") + ;; + + -DCLANGCUDA_MODE_HOST) + mode_cuda_host=1 + args+=("$arg") + ;; + + -DCLANGCUDA_MODE_DEVICE) + mode_cuda_device=1 + args+=("$arg") + ;; + + -extended-lambda|--extended-lambda|--expt-extended-lambda|-expt-extended-lambda) + ;; + + -expt-relaxed-constexpr|--expt-relaxed-constexpr) + ;; + + -arch=sm_*|--cuda-gpu-arch=sm_*) + # Ignore incoming arch flags. Wrapper decides from CLANGCUDA_ARCH or default. + ;; + + -ccbin=*|--compiler-bindir=*) + ;; + + -x) + # Drop incoming language override and the following language token. + skip_next_x=1 + ;; + + *) + args+=("$arg") + ;; + esac +done + +# Sanity check +if [[ "$mode_cuda_host" == "1" && "$mode_cuda_device" == "1" ]]; then + has_explicit_device=0 + for arg in "$@"; do + if [[ "$arg" == "-DCLANGCUDA_MODE_DEVICE" ]]; then + has_explicit_device=1 + break + fi + done + + if [[ "$has_explicit_device" == "1" ]]; then + echo "clangcuda++ wrapper error: both CLANGCUDA_MODE_HOST and CLANGCUDA_MODE_DEVICE were set" >&2 + exit 1 + fi +fi + +if [[ "$compile" == "1" && "$mode_cuda_host" == "1" ]]; then + final=( + "$clang_path" + -x cuda + --cuda-host-only + --cuda-path="$cuda_path" + --cuda-gpu-arch="$arch" + -Wno-unknown-cuda-version + "${args[@]}" + ) +elif [[ "$compile" == "1" && "$mode_cuda_device" == "1" ]]; then + final=( + "$clang_path" + -x cuda + --cuda-path="$cuda_path" + --cuda-gpu-arch="$arch" + -Wno-unknown-cuda-version + "${args[@]}" + ) +else + final=( + "$clang_path" + -Wno-unknown-cuda-version + "${args[@]}" + ) +fi + +if [[ -n "$CLANGCUDA_LOG" ]]; then + printf '%q ' "${final[@]}" >> "$CLANGCUDA_LOG" + printf '\n' >> "$CLANGCUDA_LOG" +fi + +exec "${final[@]}"