diff --git a/.gitignore b/.gitignore index 9dc9e984..fe73d50c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ Trilinos CMakeCache.txt CMakeCache CMakeFiles +*~ diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ea97b36..199e88f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,7 @@ set(CMAKE_CXX_STANDARD 17) # Build type if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RelWithDebInfo) - # set(CMAKE_BUILD_TYPE Release) -endif(NOT CMAKE_BUILD_TYPE) +endif() # Set module path set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake) @@ -22,6 +21,28 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake) ################################################################################ # Dependencies ################################################################################ +# MATAR submodule +# (Note: the way the MATAR submodule is handled may need to change in the +# future. Currently, it is deployed as a header-only library, which is why it +# seemed sufficient to simply include the MATAR header. However, the +# requirements of linking Kokkos and other parts of Trilinos may change this. +# Please update this as needed.) +add_subdirectory(matar) +include_directories(matar/src) + +# Optional Kokkos dependency for MATAR +option(MATAR_WITH_KOKKOS "MATAR is to be compiled against Kokkos" OFF) +if (MATAR_WITH_KOKKOS) + find_package(Kokkos REQUIRED) +endif() + +# Optional Doxygen and Sphinx dependencies +option(WITH_DOCS "Build the documentation locally" OFF) +if (WITH_DOCS) + find_package(Doxygen REQUIRED) + find_package(Sphinx REQUIRED) +endif() + # Optional BLAS/LAPACK dependency (disabled by default) option(WITH_BLAS_LAPACK "Build ELEMENTS with BLAS/LAPACK" OFF) if (WITH_BLAS_LAPACK) @@ -37,22 +58,6 @@ if (WITH_VTK) find_package(VTK REQUIRED NO_MODULE) endif() -# Optional Doxygen and Sphinx dependencies -option(WITH_DOCS "Build the documentation locally" OFF) -if (WITH_DOCS) - find_package(Doxygen REQUIRED) - find_package(Sphinx REQUIRED) -endif() - -# MATAR submodule -# (Note: the way the MATAR submodule is handled may need to change in the -# future. Currently, it is deployed as a header-only library, which is why it -# seemed sufficient to simply include the MATAR header. However, the -# requirements of linking Kokkos and other parts of Trilinos may change this. -# Please update this as needed.) -add_subdirectory(matar) -include_directories(matar/src) - ################################################################################ # Build @@ -60,6 +65,9 @@ include_directories(matar/src) # Includes include_directories(common) include_directories(elements) +if (MATAR_WITH_KOKKOS) + add_subdirectory(elements-kokkos) +endif() include_directories(geometry) include_directories(slam) include_directories(swage) diff --git a/build_scripts/build_matar_kokkos_openmp.sh b/build_scripts/build_matar_kokkos_openmp.sh new file mode 100755 index 00000000..c097d6e5 --- /dev/null +++ b/build_scripts/build_matar_kokkos_openmp.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +# Build configuration (modify this!) +ELEMENTS_DIR=/path/to/elements +MATAR_DIR=/path/to/matar/within/elements +PROJECT_BUILD_DIR=/path/to/project/build/directory +export OMP_NUM_THREADS=sensible_number_of_openmp_threads_for_your_machine +export OMP_PROC_BIND=spread + + +# Build and install Kokkos with OpenMP +KOKKOS_SOURCE_DIR=${MATAR_DIR}/src/Kokkos/kokkos +KOKKOS_BUILD_DIR=${PROJECT_BUILD_DIR}/kokkos +KOKKOS_INSTALL_DIR=${KOKKOS_BUILD_DIR} + +OPTIONS=( + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${KOKKOS_INSTALL_DIR} + -DCMAKE_CXX_STANDARD=17 + -DKokkos_ENABLE_SERIAL=ON + -DKokkos_ENABLE_OPENMP=ON + -DKokkos_ENABLE_TESTS=OFF + -DBUILD_TESTING=OFF +) + +cmake "${OPTIONS[@]}" -S ${KOKKOS_SOURCE_DIR} -B ${KOKKOS_BUILD_DIR} +cmake --build ${KOKKOS_BUILD_DIR} +cmake --install ${KOKKOS_BUILD_DIR} + + +# Build and install ELEMENTS with MATAR/Kokkos +ELEMENTS_SOURCE_DIR=${ELEMENTS_DIR} +ELEMENTS_BUILD_DIR=${PROJECT_BUILD_DIR}/elements +ELEMENTS_INSTALL_DIR=${ELEMENTS_BUILD_DIR} + +OPTIONS=( + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${ELEMENTS_INSTALL_DIR} + -DMATAR_WITH_KOKKOS=ON + -DKokkos_DIR=${KOKKOS_INSTALL_DIR}/lib/cmake/Kokkos + -DKOKKOS=ON + -DOPENMP=ON +) + +cmake "${OPTIONS[@]}" -S ${ELEMENTS_SOURCE_DIR} -B ${ELEMENTS_BUILD_DIR} +cmake --build ${ELEMENTS_BUILD_DIR} --verbose +cmake --install ${ELEMENTS_BUILD_DIR} diff --git a/common/error.h b/common/error.h index ee33c36e..e93b26c5 100644 --- a/common/error.h +++ b/common/error.h @@ -19,3 +19,8 @@ class VtkGridError : public std::runtime_error { public: VtkGridError(const std::string info) : std::runtime_error(info) {} }; + +class NotImplementedError : public std::runtime_error { + public: + NotImplementedError(const std::string info) : std::runtime_error(info) {} +}; diff --git a/elements-kokkos/CMakeLists.txt b/elements-kokkos/CMakeLists.txt new file mode 100644 index 00000000..debdc8ff --- /dev/null +++ b/elements-kokkos/CMakeLists.txt @@ -0,0 +1,3 @@ +set(ELEMENTS_KOKKOS_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include) + +add_subdirectory(src) diff --git a/elements-kokkos/include/BernsteinProductBasis.h b/elements-kokkos/include/BernsteinProductBasis.h new file mode 100644 index 00000000..5d6f1f4b --- /dev/null +++ b/elements-kokkos/include/BernsteinProductBasis.h @@ -0,0 +1,36 @@ +#pragma once + + +#include "TensorProductBasis.h" + + +template +struct BernsteinProductBasis : TensorProductBasis { + using TensorProductBasis::Nd, TensorProductBasis::Np, + TensorProductBasis::ijk, TensorProductBasis::rad; + + SizeType N; + SizeType Ne; + + // Work array for intermediate coefficients + NumType *C; + + BernsteinProductBasis(const SizeType); + ~BernsteinProductBasis(); + + // Basis functions and basis function gradients + NumType eval_basis(const SizeType, const NumType *); + void eval_grad_basis(const SizeType, const NumType *, NumType *); + + // Function approximation over element + NumType eval_approx(const NumType *, const NumType *); + void eval_grad_approx(const NumType *, const NumType *, NumType *); + + // Jacobian of spatial mapping + void eval_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); + NumType eval_det_jac(const NumType *, const NumType *, const NumType *, + const NumType *); + void eval_inv_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); +}; diff --git a/elements-kokkos/include/HexRef.h b/elements-kokkos/include/HexRef.h new file mode 100644 index 00000000..7a2de14d --- /dev/null +++ b/elements-kokkos/include/HexRef.h @@ -0,0 +1,106 @@ +#pragma once + + +// MATAR includes +#include "MatarTypeDefs.h" + + +enum BasisType { + LAGRANGE, + LEGENDRE, + BERNSTEIN +}; + + +// Forward declarations +template +struct TensorProductBasis; + + +struct HexRef { + HexRef(int order_basis, BasisType type_basis); + ~HexRef(); + + + // Cells (subelements used in volume integration) + int num_ref_cells_1D_; + + int num_ref_cells_in_elem_; + int num_ref_cells_in_elem() const; + + int cell_rid(int i, int j, int k) const; + + MatarRealCArray ref_cell_positions_; + Real ref_cell_positions(int cell_rid, int dim) const; + + MatarRealCArray ref_cell_g_weights_; + Real ref_cell_g_weights(int cell_rid) const; + + + // Patches (subelement faces used in surface integration) + int num_patches_in_elem_; + int num_sides_in_elem_; + + const int patch_rlid_cell_neighbor_[6] = { + 1, // side in neighbor cell mated with -xi side in this cell + 0, // side in neighbor cell mated with +xi side in this cell + 3, // side in neighbor cell mated with -eta side in this cell + 2, // side in neighbor cell mated with +eta side in this cell + 5, // side in neighbor cell mated with -zeta side in this cell + 4 // side in neighbor cell mated with +zeta side in this cell + }; + int patch_rlid_in_cell_neighbor(int patch_rlid) const; + + MatarIntCArray ref_patches_in_cell_list_; + int ref_patches_in_cell(int cell_lid, int patch_rlid) const; + + MatarRealCArray ref_patch_positions_; + Real ref_patch_positions(int patch_rid, int dim) const; + + MatarRealCArray ref_patch_g_weights_; + Real ref_patch_g_weights(int patch_rid) const; + + const Real cell_side_unit_normals_[18] = { + -1, 0, 0, // -xi + 1, 0, 0, // +xi + 0, -1, 0, // -eta + 0, 1, 0, // +eta + 0, 0, -1, // -zeta + 0, 0, 1 // +zeta + }; + Real cell_side_unit_normals(int side_rlid, int dim) const; + + + // Nodes (quadrature points) and vertices (degrees of freedom) + int num_ref_nodes_1D_; + int num_ref_nodes_in_elem_; + int num_ref_nodes() const; + + int num_ref_verts_1D_; + int num_ref_verts_in_elem_; + + Real *ref_verts_1D_; + + + // Evaluations of basis functions and their gradients at quadrature points + // in cells and on patches + static const int num_dim_ = 3; + int num_dim() const; + + int num_basis_; + int num_basis() const; + + MatarRealCArray ref_cell_basis_; + Real ref_cell_basis(int cell_rid, int basis_id) const; + + MatarRealCArray ref_cell_gradient_; + Real ref_cell_gradient(int cell_rid, int basis_id, int dim) const; + + MatarRealCArray ref_patch_basis_; + Real ref_patch_basis(int patch_rid, int basis_id) const; + + MatarRealCArray ref_patch_gradient_; + Real ref_patch_gradient(int patch_rid, int basis_id, int dim) const; + + TensorProductBasis *basis; +}; diff --git a/elements-kokkos/include/LagrangeProductBasis.h b/elements-kokkos/include/LagrangeProductBasis.h new file mode 100644 index 00000000..13fc80b6 --- /dev/null +++ b/elements-kokkos/include/LagrangeProductBasis.h @@ -0,0 +1,38 @@ +#pragma once + + +#include "TensorProductBasis.h" + + +template +struct LagrangeProductBasis : TensorProductBasis { + using TensorProductBasis::Nd, TensorProductBasis::Np, + TensorProductBasis::ijk, TensorProductBasis::rad; + + SizeType N; + SizeType Ne; + const NumType *Z; + NumType *w; + + // Work array for intermediate coefficients + NumType *C; + + LagrangeProductBasis(const SizeType, const NumType *); + ~LagrangeProductBasis(); + + // Basis functions and basis function gradients + NumType eval_basis(const SizeType, const NumType *); + void eval_grad_basis(const SizeType, const NumType *, NumType *); + + // Function approximation over element + NumType eval_approx(const NumType *, const NumType *); + void eval_grad_approx(const NumType *, const NumType *, NumType *); + + // Jacobian of spatial mapping + void eval_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); + NumType eval_det_jac(const NumType *, const NumType *, const NumType *, + const NumType *); + void eval_inv_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); +}; diff --git a/elements-kokkos/include/LegendreProductBasis.h b/elements-kokkos/include/LegendreProductBasis.h new file mode 100644 index 00000000..f9bf369c --- /dev/null +++ b/elements-kokkos/include/LegendreProductBasis.h @@ -0,0 +1,28 @@ +#pragma once + + +#include "TensorProductBasis.h" + + +template +struct LegendreProductBasis : TensorProductBasis { + using TensorProductBasis::Nd, TensorProductBasis::Np, + TensorProductBasis::ijk, TensorProductBasis::rad; + + SizeType N; + SizeType Ne; + + // Work array for intermediate coefficients + NumType *C; + + LegendreProductBasis(SizeType); + ~LegendreProductBasis(); + + // Basis functions and basis function gradients + NumType eval_basis(const SizeType, const NumType *); + void eval_grad_basis(const SizeType, const NumType *, NumType *); + + // Function approximation over element + NumType eval_approx(const NumType *, const NumType *); + void eval_grad_approx(const NumType *, const NumType *, NumType *); +}; diff --git a/elements-kokkos/include/MatarTypeDefs.h b/elements-kokkos/include/MatarTypeDefs.h new file mode 100644 index 00000000..aa1398f5 --- /dev/null +++ b/elements-kokkos/include/MatarTypeDefs.h @@ -0,0 +1,12 @@ +#include "common.h" +#include "matar.h" + +#ifdef MATAR_WITH_KOKKOS +typedef CArrayKokkos MatarRealCArray; +typedef CArrayKokkos MatarUIntCArray; +typedef CArrayKokkos MatarIntCArray; +#else +typedef CArray MatarRealCArray; +typedef CArray MatarUIntCArray; +typedef CArray MatarIntCArray; +#endif // MATAR_WITH_KOKKOS diff --git a/elements-kokkos/include/TensorProductBasis.h b/elements-kokkos/include/TensorProductBasis.h new file mode 100644 index 00000000..4d1b1f51 --- /dev/null +++ b/elements-kokkos/include/TensorProductBasis.h @@ -0,0 +1,41 @@ +#pragma once + + +#include "common.h" + + +/* + * Interface class for 3D tensor product basis, to be called for evaluating: + * - basis functions and their gradients; + * - approximations in the basis and their derivatives; and, + * - for choices of basis that are typically used to represent the geometry of + * the mesh (Lagrange and Bernstein), the Jacobian of the spatial mapping. + */ +template +struct TensorProductBasis { + static const SizeType Nd = 3; + SizeType Np; + + // Arrays for converting from flat to multidimensional indices + SizeType ijk[Nd]; + SizeType rad[Nd]; + + TensorProductBasis(const SizeType); + ~TensorProductBasis() {}; + + // Basis functions and basis function gradients + virtual NumType eval_basis(const SizeType, const NumType *) = 0; + virtual void eval_grad_basis(const SizeType, const NumType *, NumType *) = 0; + + // Function approximation over element + virtual NumType eval_approx(const NumType *, const NumType *) = 0; + virtual void eval_grad_approx(const NumType *, const NumType *, NumType *) = 0; + + // Jacobian of spatial mapping + virtual void eval_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); + virtual NumType eval_det_jac(const NumType *, const NumType *, const NumType *, + const NumType *); + virtual void eval_inv_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); +}; diff --git a/elements-kokkos/include/bernstein_polynomials.h b/elements-kokkos/include/bernstein_polynomials.h new file mode 100644 index 00000000..1f9de1e4 --- /dev/null +++ b/elements-kokkos/include/bernstein_polynomials.h @@ -0,0 +1,22 @@ +#pragma once + +#include "common.h" + +// Bernstein polynomial of order n: +// B(n,v) = (0.5)^n * (C^n_v) * (1-x)^(n-v) * (1+x)^v +// for x \in [-1, 1] + +namespace bernstein{ + // Bernstein polynomials + // n is the order, v is the "index", X \in [-1, 1] + template NumType eval(const int n, const int v, const NumType X); + + template NumType eval_der(const int n, const int v, const NumType X); + + //Bernstein Approximations + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X); + + template + NumType eval_der_approx(const SizeType N, const NumType *c, const NumType X); +} diff --git a/elements-kokkos/include/jacobi_polynomials.h b/elements-kokkos/include/jacobi_polynomials.h new file mode 100644 index 00000000..f076ceb7 --- /dev/null +++ b/elements-kokkos/include/jacobi_polynomials.h @@ -0,0 +1,35 @@ +#pragma once + +#include "common.h" + +namespace jacobi { + /* Recurrence relation parameters */ + inline Real a(Real alpha, Real beta, int n) { + if (n == 1) return 0.5*(alpha + beta) + 1.0; + return (2.0*Real(n) + alpha + beta - 1.0)*(2.0*Real(n) + alpha + beta) + /(2.0*Real(n)*(Real(n) + alpha + beta)); + }; + + inline Real b(Real alpha, Real beta, int n) { + if (n == 1) return 0.5*(alpha - beta); + return (alpha*alpha - beta*beta)*(2*Real(n) + alpha + beta - 1.0) + /(2.0*Real(n)*(Real(n) + alpha + beta) + *(2.0*Real(n) + alpha + beta - 2.0)); + }; + + inline Real c(Real alpha, Real beta, int n) { + if (n == 1) return 0.0; + return (Real(n) + alpha - 1.0)*(Real(n) + beta - 1.0) + *(2.0*Real(n) + alpha + beta) + /(Real(n)*(Real(n) + alpha + beta) + *(2.0*Real(n) + alpha + beta - 2.0)); + }; + + /* Polynomial and polynomial derivative evaluation */ + template + NumType eval(int n, Real alpha, Real beta, NumType X); + + template + NumType eval_der(const int n, const int k, const Real alpha, + const Real beta, const NumType X); +} diff --git a/elements-kokkos/include/lagrange_polynomials.h b/elements-kokkos/include/lagrange_polynomials.h new file mode 100644 index 00000000..7a124b61 --- /dev/null +++ b/elements-kokkos/include/lagrange_polynomials.h @@ -0,0 +1,35 @@ +#pragma once + +#include "common.h" + +namespace lagrange { + // Identifying singularities + template + SizeType find_coincident_vertex(const SizeType &, const NumType *, + const NumType &); + + // Barycentric weights + template + void compute_barycentric_weights(const SizeType &, const NumType *, + NumType *); + + // Lagrange polynomials (basis functions) + template + NumType eval(const SizeType Nv, const SizeType i, const SizeType ic, + const NumType *Z, const NumType *w, const NumType X); + + template + NumType eval_der(const SizeType Nv, const SizeType n, + const SizeType i, const SizeType ic, const NumType *Z, + const NumType *w, const NumType X, NumType *c); + + // Lagrange interpolants (sums of products of bases and coefficients) + template + NumType eval_interp(const SizeType Nv, const SizeType i, + const NumType *Z, const NumType *w, const NumType X, const NumType *c); + + template + NumType eval_der_interp(const SizeType Nv, const SizeType n, + const SizeType i, const NumType *Z, const NumType *w, const NumType X, + NumType *c); +} diff --git a/elements-kokkos/include/legendre_polynomials.h b/elements-kokkos/include/legendre_polynomials.h new file mode 100644 index 00000000..526def45 --- /dev/null +++ b/elements-kokkos/include/legendre_polynomials.h @@ -0,0 +1,20 @@ +#pragma once + +#include "common.h" + +namespace legendre { + // Legendre polynomials (basis functions) + template + NumType eval(const int n, const NumType X); + + template + NumType eval_der(const int n, const int k, const NumType X); + + // Legendre approximations (sums of products of bases and coefficients) + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X); + + template + NumType eval_der_approx(const SizeType N, const SizeType k, const NumType *c, + const NumType X); +} diff --git a/elements-kokkos/include/points_and_weights.h b/elements-kokkos/include/points_and_weights.h new file mode 100644 index 00000000..eade491c --- /dev/null +++ b/elements-kokkos/include/points_and_weights.h @@ -0,0 +1,21 @@ +#pragma once + + +#include "MatarTypeDefs.h" +#include "common.h" + + +/* Equispaced and Chebyshev point distributions */ +template +void equispaced_points(SizeType N, NumType &zl, NumType &zr, NumType *z); + +template +void chebyshev_points(SizeType N, NumType &zl, NumType &zr, NumType *z); + + +/* Hard-coded Gauss-Lobatto and Gauss-Legendre quadrature points and weights */ +void lobatto_points(MatarRealCArray &lob_nodes_1D, const int &num); +void lobatto_weights(MatarRealCArray &lob_weights_1D, const int &num); + +void legendre_points(MatarRealCArray &leg_nodes_1D, const int &num); +void legendre_weights(MatarRealCArray &leg_weights_1D, const int &num); diff --git a/elements-kokkos/src/BernsteinProductBasis.cpp b/elements-kokkos/src/BernsteinProductBasis.cpp new file mode 100644 index 00000000..98a97a5c --- /dev/null +++ b/elements-kokkos/src/BernsteinProductBasis.cpp @@ -0,0 +1,176 @@ +#include "BernsteinProductBasis.h" +#include "bernstein_polynomials.h" + + +template +BernsteinProductBasis::BernsteinProductBasis(const SizeType order) + : TensorProductBasis(order) { + N = Np + 1; + Ne = std::pow(N, Nd); + + rad[0] = N; + rad[1] = N; + rad[2] = N; + + //Allocate memory for intermediate coefficients + C = new NumType[2*N]; +} + +template +BernsteinProductBasis::~BernsteinProductBasis() { } + + +template +NumType BernsteinProductBasis::eval_basis(const SizeType I, + const NumType *X) { + // Decompose index of 3D tensor product basis function into indices of + // Bernstein polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Bernstein polynomials + NumType Bi = bernstein::eval(N, int(ijk[0]), X[0]); + NumType Bj = bernstein::eval(N, int(ijk[1]), X[1]); + NumType Bk = bernstein::eval(N, int(ijk[2]), X[2]); + + return Bi*Bj*Bk; +} + +template +void BernsteinProductBasis::eval_grad_basis(const SizeType I, + const NumType *X, NumType *grad_phi) { + // Decompose index of 3D tensor product basis function into indices of + // Bernstein polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Bernstein polynomials + NumType Bi = bernstein::eval(N, int(ijk[0]), X[0]); + NumType Bj = bernstein::eval(N, int(ijk[1]), X[1]); + NumType Bk = bernstein::eval(N, int(ijk[2]), X[2]); + + // Evaluate derivatives of Bernstein polynomials + NumType dBi = bernstein::eval_der(N, int(ijk[0]), X[0]); + NumType dBj = bernstein::eval_der(N, int(ijk[1]), X[1]); + NumType dBk = bernstein::eval_der(N, int(ijk[2]), X[2]); + + // Store partial derivatives in entries of gradient + grad_phi[0] = dBi*Bj*Bk; + grad_phi[1] = Bi*dBj*Bk; + grad_phi[3] = Bi*Bj*dBk; +} + +/* Return evaluation of local function approximation */ +template +NumType BernsteinProductBasis::eval_approx(const NumType *c, + const NumType *X) { + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++){ + // Collapse first dimension into coefficients for second dimension + C[j] = bernstein::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + // Collapse second dimension into coefficients for third dimension + C[N+k] = bernstein::eval_approx(N, &C[N], X[2]); + } + // Collapse third dimension into approximation evaluation + return bernstein::eval_approx(N, &C[N], X[2]); +} + +/* + * Evaluate gradient of local function approximation, which is formed by the + * sum of the products of tensor-product Bernstein basis functions and the + * provided coefficients, at specified coordinates. + */ +template +void BernsteinProductBasis::eval_grad_approx(const NumType *c, + const NumType *X, NumType *grad_f) { + for (int l = 0; l < Nd; l++) { + for (int k =0; k < N; k++) { + for (int j =0; j < N; j++) { + //Collapse first dimension into coefficients for second dimension + if (l==0) { + C[j] = bernstein::eval_der_approx(N, &c[j*N+k*N*N], X[0]); + } else { + C[j] = bernstein::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + } + // Collapse second dimension into coefficients for third dimension + if ( l == 1) { + C[N+k] = bernstein::eval_der_approx(N, &C[N], X[2]); + } else { + C[N+k] = bernstein::eval_approx(N, C, X[1]); + } + } + + // Collapse third dimension into approximation evaluation + if ( l ==2 ) { + grad_f[l] = bernstein::eval_der_approx(N, &C[N], X[2]); + } else { + grad_f[l] = bernstein::eval_approx(N, &C[N], X[2]); + } + } +} + + +////////////////* Evaluate the Jacobian of the spatial mapping *//////////////////////// + +/* x, y, z in physical space. X in reference space. J = Jacobian (column major). */ + +template +void BernsteinProductBasis::eval_jac(const NumType *x, + const NumType *y, const NumType *z, const NumType *X, NumType *J) { + // Evaluate gradient of x=x(X,Y,Z); + this->eval_grad_approx(x,X,J); + + // Evaluate gradient of y=y(X,Y,Z); + this->eval_grad_approx(y,X,J+3); + + // Evaluate gradient of z=z(X,Y,Z); + this->eval_grad_approx(z,X,J+6); +} + + +//////////////* Determinant of Jacobian *///////////////////////// + +/* x, y, z in physical space. X in reference space. */ + +template +NumType BernsteinProductBasis::eval_det_jac(const NumType *x, + const NumType *y, const NumType *z, const NumType *X) { + NumType J[9]; + this->eval_jac(x, y, z, X, J); + + return J[0]*(J[4]*J[8]-J[5]*J[7]) - J[1]*(J[3]*J[8]-J[5]*J[6]) + J[2]*(J[3]*J[7]-J[4]*J[6]); +} + + +//////////////* Inverse Jacobian *////////////////////////////// + +/* x, y, z in physical space. X in reference space. Jinv = Invers Jacobian (column-major). */ + +template +void BernsteinProductBasis::eval_inv_jac(const NumType *x, + const NumType *y, const NumType *z, const NumType *X, NumType *Jinv) { + // instantiate jacobian // + NumType J[9]; + + // get jacobian // + this->eval_jac(x,y,z,X,J); + + // invert determinant of J // + NumType d = 1.0/(this->eval_det_jac(x,y,z,X)); + + // get Jinv (Jinv = adj(J)/det(J) ) // + Jinv[0] = d*(J[4]*J[8]-J[5]*J[7]); + Jinv[1] = -d*(J[1]*J[8]-J[2]*J[7]); + Jinv[2] = d*(J[1]*J[5]-J[2]*J[4]); + Jinv[3] = -d*(J[3]*J[8]-J[5]*J[6]); + Jinv[4] = d*(J[0]*J[8]-J[2]*J[6]); + Jinv[5] = -d*(J[0]*J[5]-J[2]*J[3]); + Jinv[6] = d*(J[3]*J[7]-J[4]*J[6]); + Jinv[7] = -d*(J[0]*J[7]-J[1]*J[6]); + Jinv[8] = d*(J[0]*J[4]-J[1]*J[3]); +} + + +//////////////// Explicit instantiation of template class ////////////////////// +template class BernsteinProductBasis; +template class BernsteinProductBasis; diff --git a/elements-kokkos/src/CMakeLists.txt b/elements-kokkos/src/CMakeLists.txt new file mode 100644 index 00000000..ff6a3771 --- /dev/null +++ b/elements-kokkos/src/CMakeLists.txt @@ -0,0 +1,48 @@ +add_library( + elements_kokkos + HexRef.cpp + LagrangeProductBasis.cpp + LegendreProductBasis.cpp + BernsteinProductBasis.cpp + TensorProductBasis.cpp + lagrange_polynomials.cpp + legendre_polynomials.cpp + jacobi_polynomials.cpp + bernstein_polynomials.cpp + points_and_weights.cpp +) + +target_compile_definitions( + elements_kokkos + PUBLIC + MATAR_WITH_KOKKOS + HAVE_KOKKOS=1 + HAVE_OPENMP=1 +) + +target_include_directories( + elements_kokkos + PUBLIC + ${ELEMENTS_KOKKOS_INCLUDES} +) + + +if (CUDA) + target_compile_definitions(elements_kokkos PUBLIC -DHAVE_CUDA=1) +elseif (HIP) + target_compile_definitions(elements_kokkos PUBLIC -DHAVE_HIP=1) +elseif (OPENMP) + target_compile_definitions(elements_kokkos PUBLIC -DHAVE_OPENMP=1) +elseif (THREADS) + target_compile_definitions(elements_kokkos PUBLIC -DHAVE_THREADS=1) +else() + message(FATAL_ERROR "Please specify whether you are using CUDA, HIP, OPENMP, etc.") +endif() + +target_link_libraries( + elements_kokkos + PUBLIC + common + matar + Kokkos::kokkos +) diff --git a/elements-kokkos/src/HexRef.cpp b/elements-kokkos/src/HexRef.cpp new file mode 100644 index 00000000..53019e2e --- /dev/null +++ b/elements-kokkos/src/HexRef.cpp @@ -0,0 +1,347 @@ +#include "HexRef.h" +#include "LagrangeProductBasis.h" +#include "LegendreProductBasis.h" +#include "BernsteinProductBasis.h" +#include "points_and_weights.h" +#include "error.h" + + +HexRef::HexRef(int order_basis, BasisType type_basis=LAGRANGE) { + // Determine sizes of data structures based on the order of the basis + assert(order_basis >= 0 && "Error: order of basis must be positive"); + if (order_basis == 0){ + num_ref_nodes_1D_ = 2; + num_ref_verts_1D_ = 2; + } else if (order_basis > 0) { + num_ref_nodes_1D_ = 2*order_basis + 1; + num_ref_verts_1D_ = order_basis + 1; + } + + num_ref_cells_1D_ = num_ref_nodes_1D_ - 1; + num_ref_cells_in_elem_ = (num_ref_nodes_1D_ - 1)*(num_ref_nodes_1D_ - 1) + *(num_ref_nodes_1D_ - 1); + + num_patches_in_elem_ = (num_ref_cells_1D_ + 1) + *(num_ref_cells_1D_*num_ref_cells_1D_)*3; + num_sides_in_elem_ = num_ref_cells_in_elem_*6; + + num_ref_verts_in_elem_ = num_ref_verts_1D_*num_ref_verts_1D_ + *num_ref_verts_1D_; + num_basis_ = num_ref_verts_in_elem_; + + + // Allocate memory for data structures + ref_cell_positions_ = MatarRealCArray(num_ref_cells_in_elem_, 3); + ref_cell_g_weights_ = MatarRealCArray(num_ref_cells_in_elem_); + + ref_patches_in_cell_list_ = MatarIntCArray(num_sides_in_elem_); + ref_patch_positions_ = MatarRealCArray(num_patches_in_elem_, 3); + ref_patch_g_weights_ = MatarRealCArray(num_patches_in_elem_); + + ref_cell_basis_ = MatarRealCArray(num_ref_cells_in_elem_, num_basis_); + ref_cell_gradient_ = MatarRealCArray(num_ref_cells_in_elem_, num_basis_, num_dim_); + + ref_patch_basis_ = MatarRealCArray(num_patches_in_elem_, num_basis_); + ref_patch_gradient_ = MatarRealCArray(num_patches_in_elem_, num_basis_, num_dim_); + + ref_verts_1D_ = new Real[num_ref_verts_1D_]; + + + // Retrieve Gauss-Legendre and Gauss-Lobatto quadrature points and weights + auto leg_points_1D = MatarRealCArray(num_ref_cells_1D_); + legendre_points(leg_points_1D, num_ref_cells_1D_); + + auto leg_weights_1D = MatarRealCArray(num_ref_cells_1D_); + legendre_weights(leg_weights_1D, num_ref_cells_1D_); + + auto lob_points_1D = MatarRealCArray(num_ref_nodes_1D_); + lobatto_points(lob_points_1D, num_ref_nodes_1D_); + + auto lob_weights_1D = MatarRealCArray(num_ref_nodes_1D_); + lobatto_weights(lob_weights_1D, num_ref_nodes_1D_); + + + // Store every other node (quadrature point) coordinate as a vertex coordinate + int vert_id = 0, node_id = 0; + while (node_id < num_ref_nodes_1D_) { + ref_verts_1D_[vert_id] = lob_points_1D(node_id); + vert_id += 1; + node_id += 2; + } + + + // Fill in cell data structures + for (int k = 0; k < num_ref_cells_1D_; k++) { + for (int j = 0; j < num_ref_cells_1D_; j++) { + for (int i = 0; i < num_ref_cells_1D_; i++) { + int this_cell_rid = cell_rid(i, j, k); + + ref_cell_positions_(this_cell_rid, 0) = leg_points_1D(i); + ref_cell_positions_(this_cell_rid, 1) = leg_points_1D(j); + ref_cell_positions_(this_cell_rid, 2) = leg_points_1D(k); + + ref_cell_g_weights_(this_cell_rid) = leg_weights_1D(i) + *leg_weights_1D(j)*leg_weights_1D(k); + } + } + } + + + // Fill in patch data structures + for (int side_rid = 0; side_rid < num_sides_in_elem_; side_rid++) { + ref_patches_in_cell_list_(side_rid) = -1; + } + + int patch_rid = 0; + for (int k = 0; k < num_ref_cells_1D_; k++) { + for (int j = 0; j < num_ref_cells_1D_; j++) { + for (int i = 0; i < num_ref_cells_1D_; i++) { + int this_cell_rid = cell_rid(i, j, k); + + for (int side_lid = 0; side_lid < 6; side_lid++) { + int side_rid = side_lid + this_cell_rid*6; + + // Check to see that this patch has not been seen already + if (ref_patches_in_cell_list_(side_rid) == -1) { + // Store the index for this patch in the cell + ref_patches_in_cell_list_(side_rid) = patch_rid; + + // Store the quadrature point area values on the patch + switch (side_lid) { + case 0: // patch with -xi normal + ref_patch_positions_(patch_rid,0) = lob_points_1D(i); + ref_patch_positions_(patch_rid,1) = leg_points_1D(j); + ref_patch_positions_(patch_rid,2) = leg_points_1D(k); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(j)*leg_weights_1D(k); + break; + case 1: // patch with +xi normal + ref_patch_positions_(patch_rid,0) = lob_points_1D(i+1); + ref_patch_positions_(patch_rid,1) = leg_points_1D(j); + ref_patch_positions_(patch_rid,2) = leg_points_1D(k); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(j)*leg_weights_1D(k); + break; + case 2: // patch with -eta normal + ref_patch_positions_(patch_rid,0) = leg_points_1D(i); + ref_patch_positions_(patch_rid,1) = lob_points_1D(j); + ref_patch_positions_(patch_rid,2) = leg_points_1D(k); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(i)*leg_weights_1D(k); + break; + case 3: // patch with +eta normal + ref_patch_positions_(patch_rid,0) = leg_points_1D(i); + ref_patch_positions_(patch_rid,1) = lob_points_1D(j+1); + ref_patch_positions_(patch_rid,2) = leg_points_1D(k); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(i)*leg_weights_1D(k); + break; + case 4: // patch with -zeta normal + ref_patch_positions_(patch_rid,0) = leg_points_1D(i); + ref_patch_positions_(patch_rid,1) = leg_points_1D(j); + ref_patch_positions_(patch_rid,2) = lob_points_1D(k); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(i)*leg_weights_1D(j); + break; + case 5: // patch with +zeta normal + ref_patch_positions_(patch_rid,0) = leg_points_1D(i); + ref_patch_positions_(patch_rid,1) = leg_points_1D(j); + ref_patch_positions_(patch_rid,2) = lob_points_1D(k+1); + + ref_patch_g_weights_(patch_rid) = leg_weights_1D(i)*leg_weights_1D(j); + break; + } + + // Get the rID for the neighboring cell in the element + // that communicates with this cell across this patch + int neighbor_cell_rid; + if (side_lid == 0 && 0 < i) { + neighbor_cell_rid = cell_rid(i-1, j, k); + } else if (side_lid == 1 && i < num_ref_cells_1D_ - 1) { + neighbor_cell_rid = cell_rid(i+1, j, k); + } else if (side_lid == 2 && 0 < j) { + neighbor_cell_rid = cell_rid(i, j-1, k); + } else if (side_lid == 3 && j < num_ref_cells_1D_ - 1) { + neighbor_cell_rid = cell_rid(i, j+1, k); + } else if (side_lid == 4 && 0 < k) { + neighbor_cell_rid = cell_rid(i, j, k-1); + } else if (side_lid == 5 && k < num_ref_cells_1D_ - 1) { + neighbor_cell_rid = cell_rid(i, j, k+1); + } else { + neighbor_cell_rid = -1; // no neighboring cell in element + } + + // If there is a neighboring cell, set this patch rID in its list + if (0 <= neighbor_cell_rid && neighbor_cell_rid < num_ref_cells_in_elem_) { + // Get the side lid for this patch in the neighboring cell + int neighbor_side_lid = patch_rlid_in_cell_neighbor(side_lid); + + // Get side rID from that side lid + int neighbor_side_rid = neighbor_side_lid + (neighbor_cell_rid)*6; + + // Store the patch rID for this patch in the neighboring cell + ref_patches_in_cell_list_(neighbor_side_rid) = patch_rid; + } + + // Increment the patch index + patch_rid ++; + } + } + } + } + } + + + // Initialize basis object based on type specified + switch (type_basis) { + case BasisType::LAGRANGE: + basis = new LagrangeProductBasis(order_basis, ref_verts_1D_); + break; + case BasisType::LEGENDRE: + basis = new LegendreProductBasis(order_basis); + break; + case BasisType::BERNSTEIN: + basis = new BernsteinProductBasis(order_basis); + break; + default: + basis = nullptr; + std::string err_msg("Error: basis class not implemented for this type."); + throw NotImplementedError(err_msg); + } + + + // Fill in evaluations of basis functions and their gradients in cells + for (int cell_rlid = 0; cell_rlid < num_ref_cells_in_elem_; cell_rlid++) { + // Get the cell coordinates + Real point[num_dim_]; + for (int dim = 0; dim < num_dim_; dim++) + point[dim] = ref_cell_positions(cell_rlid, dim); + + // Compute and store all the basis values at the cell + for (int basis_id = 0; basis_id < num_basis_; basis_id++) + ref_cell_basis_(cell_rlid, basis_id) = basis->eval_basis(basis_id, point); + + // Compute and store all the basis gradient values at the cell + for (int basis_id = 0; basis_id < num_basis_; basis_id++) { + Real grad_basis[num_dim_]; + basis->eval_grad_basis(basis_id, point, grad_basis); + ref_cell_gradient_(cell_rlid, basis_id, 0) = grad_basis[0]; + ref_cell_gradient_(cell_rlid, basis_id, 1) = grad_basis[1]; + ref_cell_gradient_(cell_rlid, basis_id, 2) = grad_basis[2]; + } + } + + + // Fill in evaluations of basis functions and their gradients on patches + for (int patch_rlid = 0; patch_rlid < num_patches_in_elem_; patch_rlid++) { + // Get the cell coordinates + // Get the patch coordinates + Real point[num_dim_]; + for (int dim = 0; dim < num_dim_; dim++) + point[dim] = ref_patch_positions(patch_rlid, dim); + + // Compute and store all the basis values at the patch + for (int basis_id = 0; basis_id < num_basis_; basis_id++) + ref_patch_basis_(patch_rlid, basis_id) = basis->eval_basis(basis_id, + point); + + // loop over the basis polynomials where there is one from each vertex + for (int basis_id = 0; basis_id < num_basis_; basis_id++) { + Real grad_basis[num_dim_]; + basis->eval_grad_basis(basis_id, point, grad_basis); + ref_patch_gradient_(patch_rlid, basis_id, 0) = grad_basis[0]; + ref_patch_gradient_(patch_rlid, basis_id, 1) = grad_basis[1]; + ref_patch_gradient_(patch_rlid, basis_id, 2) = grad_basis[2]; + } + } +} + + +HexRef::~HexRef() { + delete[] ref_verts_1D_; + delete basis; +} + + +int HexRef::num_ref_cells_in_elem() const { + return num_ref_cells_in_elem_; +}; + + +int HexRef::cell_rid(int i, int j, int k) const { + return i + j*num_ref_cells_1D_ + k*num_ref_cells_1D_*num_ref_cells_1D_; +}; + + +Real HexRef::ref_cell_positions(int cell_rid, int dim) const { + return ref_cell_positions_(cell_rid, dim); +} + + +Real HexRef::ref_cell_g_weights(int cell_rid) const { + return ref_cell_g_weights_(cell_rid); +} + + +int HexRef::patch_rlid_in_cell_neighbor(int patch_rlid) const { + return patch_rlid_cell_neighbor_[patch_rlid]; +} + + +int HexRef::ref_patches_in_cell(int cell_rid, int patch_rlid) const { + int index = patch_rlid + cell_rid*6; + return ref_patches_in_cell_list_(index); +} + + +Real HexRef::ref_patch_positions(int patch_rid, int dim) const { + return ref_patch_positions_(patch_rid, dim); +} + + +Real HexRef::ref_patch_g_weights(int patch_rid) const { + return ref_patch_g_weights_(patch_rid); +} + + +Real HexRef::cell_side_unit_normals(int side_rlid, int dim) const { + int index = side_rlid*3 + dim; + return cell_side_unit_normals_[index]; +} + + +int HexRef::num_ref_nodes() const { + return num_ref_nodes_in_elem_; +} + + +int HexRef::num_dim() const { + return num_dim_; +} + + +int HexRef::num_basis() const { + return num_basis_; +} + + +Real HexRef::ref_cell_basis(int cell_rid, int basis_id) const { + return ref_cell_basis_(cell_rid, basis_id); +}; + + +Real HexRef::ref_cell_gradient(int cell_rid, int basis_id, + int dim) const { + return ref_cell_gradient_(cell_rid, basis_id, dim); +}; + + +Real HexRef::ref_patch_basis(int patch_rid, int basis_id) const { + return ref_patch_basis_(patch_rid, basis_id); +}; + + +Real HexRef::ref_patch_gradient(int patch_rid, int basis_id, + int dim) const { + return ref_patch_gradient_(patch_rid, basis_id, dim); +}; diff --git a/elements-kokkos/src/LagrangeProductBasis.cpp b/elements-kokkos/src/LagrangeProductBasis.cpp new file mode 100644 index 00000000..963c4a61 --- /dev/null +++ b/elements-kokkos/src/LagrangeProductBasis.cpp @@ -0,0 +1,289 @@ +#include "LagrangeProductBasis.h" +#include "lagrange_polynomials.h" + + +template +LagrangeProductBasis::LagrangeProductBasis(const SizeType order, + const NumType *vert_coords) : TensorProductBasis(order), + Z(vert_coords) { + N = Np + 1; + Ne = std::pow(N, Nd); + + rad[0] = N; + rad[1] = N; + rad[2] = N; + + // Allocate memory for weights and compute + w = new NumType[N]; + lagrange::compute_barycentric_weights(N, Z, w); + + // Allocate memory for intermediate coefficients + C = new NumType[3*N]; +} + +template +LagrangeProductBasis::~LagrangeProductBasis() { + delete [] w, C; +} + +/* + * Return evaluation of basis function, which is the tensor product of Lagrange + * polynomials, at specified coordinates. + * + * Parameters + * ---------- + * I : index of basis function + * X : coordinates (in reference space) + */ +template +NumType LagrangeProductBasis::eval_basis(const SizeType I, const NumType *X) { + // Decompose index of 3D tensor product basis function into indices of + // Lagrange polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Check coincidence of coordinates with vertex coordinates + SizeType ix = lagrange::find_coincident_vertex(N, Z, X[0]); + SizeType iy = lagrange::find_coincident_vertex(N, Z, X[1]); + SizeType iz = lagrange::find_coincident_vertex(N, Z, X[2]); + + // Evaluate Lagrange polynomials + NumType li = lagrange::eval(N, ijk[0], ix, Z, w, X[0]); + NumType lj = lagrange::eval(N, ijk[1], iy, Z, w, X[1]); + NumType lk = lagrange::eval(N, ijk[2], iz, Z, w, X[2]); + + return li*lj*lk; +} + +/* + * Evaluate gradient of basis function + * + * Parameters + * ---------- + * I : index of basis function + * X : coordinates (in reference space) + * + * Returns + * ------- + * grad_phi : gradient of the basis function + */ +template +void LagrangeProductBasis::eval_grad_basis(const SizeType I, + const NumType *X, NumType *grad_phi) { + // Decompose index of 3D tensor product basis function into indices of + // Lagrange polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Check coincidence of coordinates with vertex coordinates + SizeType ix = lagrange::find_coincident_vertex(N, Z, X[0]); + SizeType iy = lagrange::find_coincident_vertex(N, Z, X[1]); + SizeType iz = lagrange::find_coincident_vertex(N, Z, X[2]); + + // Evaluate Lagrange polynomials + NumType li = lagrange::eval(N, ijk[0], ix, Z, w, X[0]); + NumType lj = lagrange::eval(N, ijk[1], iy, Z, w, X[1]); + NumType lk = lagrange::eval(N, ijk[2], iz, Z, w, X[2]); + + // Evaluate Lagrange polynomial derivatives + SizeType n = 1; // order of derivative + NumType dli = lagrange::eval_der(N, n, ijk[0], ix, Z, w, X[0], C); + NumType dlj = lagrange::eval_der(N, n, ijk[1], iy, Z, w, X[1], C); + NumType dlk = lagrange::eval_der(N, n, ijk[2], iz, Z, w, X[2], C); + + // Store partial derivatives in entries of gradient + grad_phi[0] = dli*lj*lk; + grad_phi[1] = li*dlj*lk; + grad_phi[2] = li*lj*dlk; +} + +/* + * Return evaluation of local function approximation, which is formed by + * the sum of the products of tensor-product Lagrange basis functions and the + * provided coefficients, at specified coordinates + * + * Parameters + * ---------- + * c : coefficients + * X : coordinates (in reference space) + * + * Note + * ---- + * The intermediate coefficients or workspace array is divided into 3 lines of + * length equal to the number of vertices. In the dimension-by-dimension + * approach used for the approximation evaluation, each line is filled by + * evaluations at the previous dimensional level. The initial coefficients are + * cycled through in the X-dimension loop to produce coefficients, stored in + * the workspace array, to be used in the Y-dimension loop. The output from the + * Y-dimension loop is coefficients, stored again in the workspace array, to be + * used in the final Z-dimension evaluation. This process, as described, only + * requires 2 lines of workspace. The extra line of workspace is needed for + * derivative evaluations, and so it is unused here. + */ +template +NumType LagrangeProductBasis::eval_approx(const NumType *c, + const NumType *X) { + // Check the coincidence of the coordinates with vertex coordinates + SizeType ix = lagrange::find_coincident_vertex(N, Z, X[0]); + SizeType iy = lagrange::find_coincident_vertex(N, Z, X[1]); + SizeType iz = lagrange::find_coincident_vertex(N, Z, X[2]); + + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++) { + // Collapse first dimension into coefficients for second dimension + C[j] = lagrange::eval_interp(N, ix, Z, w, X[0], &c[j*N+k*N*N]); + } + + // Collapse second dimension into coefficients for third dimension + C[N+k] = lagrange::eval_interp(N, iy, Z, w, X[1], C); + } + + // Collapse third dimension into interpolant evaluation + return lagrange::eval_interp(N, iz, Z, w, X[2], &C[N]); +} + +/* + * Evaluate gradient of the local function approximation, which is formed by + * the sum of the products of tensor-product Lagrange basis functions and the + * provided coefficients, at specified coordinates + * + * Parameters + * ---------- + * c : coefficients + * X : coordinates (in reference space) + * + * Returns + * ------- + * grad_f : gradient of the interpolant + * + * Note + * ---- + * This routine uses a dimension-by-dimension approach as in the approximation + * routine. See the documentation of that routine for details. This routine + * differs from that one in that the first line of the workspace array is + * reserved for the derivative evaluations. These derivative evaluations use + * the first line to store divided differences. + */ +template +void LagrangeProductBasis::eval_grad_approx(const NumType *c, + const NumType *X, NumType *grad_f) { + const SizeType n = 1; // order of partial derivative + + // Check the coincidence of the coordinates with vertex coordinates + SizeType ix = lagrange::find_coincident_vertex(N, Z, X[0]); + SizeType iy = lagrange::find_coincident_vertex(N, Z, X[1]); + SizeType iz = lagrange::find_coincident_vertex(N, Z, X[2]); + + for (int l = 0; l < Nd; l++) { + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++) { + // Collapse first dimension into coefficients for second dimension + std::copy(c+j*N+k*N*N, c+j*N+k*N*N+N, C); // load first line + if (l == 0) { + C[N+j] = lagrange::eval_der_interp(N, n, ix, Z, w, X[0], C); + } else { + C[N+j] = lagrange::eval_interp(N, ix, Z, w, X[0], C); + } + } + + // Collapse second dimension into coefficients for third dimension + std::copy(C+N, C+2*N, C); // second line back into first line + if (l == 1) { + C[2*N+k] = lagrange::eval_der_interp(N, n, iy, Z, w, X[1], C); + } else { + C[2*N+k] = lagrange::eval_interp(N, iy, Z, w, X[1], C); + } + } + + // Collapse third dimension into interpolant evaluation + std::copy(C+2*N, C+3*N, C); // third line back into first line + if (l == 2) { + grad_f[l] = lagrange::eval_der_interp(N, n, iz, Z, w, X[2], C); + } else { + grad_f[l] = lagrange::eval_interp(N, iz, Z, w, X[2], C); + } + } +} + +/* + * Evaluate the Jacobian of the spatial mapping + * + * Parameters + * ---------- + * x : x coordinates in physical space + * y : y coordinates in physical space + * z : z coordinates in physical space + * X : coordinates in reference space at which to evaluate + * + * Returns + * ------- + * J : Jacobian (stored in column-major order) + */ +template +void LagrangeProductBasis::eval_jac(const NumType *x, const NumType *y, + const NumType *z, const NumType *X, NumType *J) { + // Evaluate the gradient of x = x(X, Y, Z) + this->eval_grad_approx(x, X, J); + + // Evaluate the gradient of y = y(X, Y, Z) + this->eval_grad_approx(y, X, J+3); + + // Evaluate the gradient of z = z(X, Y, Z) + this->eval_grad_approx(z, X, J+6); +} + +/* + * Return the determinant of the Jacobian of the spatial mapping + * + * Parameters + * ---------- + * x : x coordinates in physical space + * y : y coordinates in physical space + * z : z coordinates in physical space + * X : coordinates in reference space at which to evaluate + */ +template +NumType LagrangeProductBasis::eval_det_jac(const NumType *x, + const NumType *y, const NumType *z, const NumType *X) { + NumType J[9]; + this->eval_jac(x, y, z, X, J); + + return J[0]*(J[4]*J[8] - J[5]*J[7]) - J[1]*(J[3]*J[8] - J[5]*J[6]) + + J[2]*(J[3]*J[7] - J[4]*J[6]); +} + +/* Evaluate the inverse of the Jacobian of the spatial mapping + * + * Parameters + * ---------- + * x : x coordinates in physical space + * y : y coordinates in physical space + * z : z coordinates in physical space + * X : coordinates in reference space at which to evaluate + * + * Returns + * ------- + * Jinv : Jacobian inverse (stored in column-major order) + */ +template +void LagrangeProductBasis::eval_inv_jac(const NumType *x, const NumType *y, + const NumType *z, const NumType *X, NumType *Jinv) { + NumType J[9]; + this->eval_jac(x, y, z, X, J); + + NumType d = 1.0/(this->eval_det_jac(x, y, z, X)); + + Jinv[0] = d*(J[4]*J[8] - J[5]*J[7]); + Jinv[1] = -d*(J[1]*J[8] - J[2]*J[7]); + Jinv[2] = d*(J[1]*J[5] - J[2]*J[4]); + + Jinv[3] = -d*(J[3]*J[8] - J[5]*J[6]); + Jinv[4] = d*(J[0]*J[8] - J[2]*J[6]); + Jinv[5] = -d*(J[0]*J[5] - J[2]*J[3]); + + Jinv[6] = d*(J[3]*J[7] - J[4]*J[6]); + Jinv[7] = -d*(J[0]*J[7] - J[1]*J[6]); + Jinv[8] = d*(J[0]*J[4] - J[1]*J[3]); +} + +// Explicit instantiation of template class +template class LagrangeProductBasis; +template class LagrangeProductBasis; diff --git a/elements-kokkos/src/LegendreProductBasis.cpp b/elements-kokkos/src/LegendreProductBasis.cpp new file mode 100644 index 00000000..72de37f1 --- /dev/null +++ b/elements-kokkos/src/LegendreProductBasis.cpp @@ -0,0 +1,168 @@ +#include "LegendreProductBasis.h" +#include "legendre_polynomials.h" + + +template +LegendreProductBasis::LegendreProductBasis(const SizeType order) + : TensorProductBasis(order) { + N = Np + 1; + Ne = std::pow(N, Nd); + + rad[0] = N; + rad[1] = N; + rad[2] = N; + + // Allocate memory for intermediate coefficients + C = new NumType[2*N]; +} + +template +LegendreProductBasis::~LegendreProductBasis() {} + +/* + * Return evaluation of basis function, which is the tensor product of Legendre + * polynomials, at specified coordinates. + * + * Parameters + * ---------- + * I : index of basis function + * X : coordinates (in reference space) + */ +template +NumType LegendreProductBasis::eval_basis(const SizeType I, + const NumType *X) { + // Decompose index of 3D tensor product basis function into indices of + // Legendre polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Legendre polynomials + NumType Pi = legendre::eval(int(ijk[0]), X[0]); + NumType Pj = legendre::eval(int(ijk[1]), X[1]); + NumType Pk = legendre::eval(int(ijk[2]), X[2]); + + return Pi*Pj*Pk; +} + +/* + * Evaluate gradient of basis function + * + * Parameters + * ---------- + * I : index of basis function + * X : coordinates (in reference space) + * + * Returns + * ------- + * grad_phi : gradient of the basis function + */ +template +void LegendreProductBasis::eval_grad_basis(const SizeType I, + const NumType *X, NumType *grad_phi) { + // Decompose index of 3D tensor product basis function into indices of + // Legendre polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Legendre polynomials + NumType Pi = legendre::eval(int(ijk[0]), X[0]); + NumType Pj = legendre::eval(int(ijk[1]), X[1]); + NumType Pk = legendre::eval(int(ijk[2]), X[2]); + + // Evaluate Legendre polynomial derivatives + SizeType q = 1; // order of derivative + NumType dPi = legendre::eval_der(int(ijk[0]), q, X[0]); + NumType dPj = legendre::eval_der(int(ijk[1]), q, X[1]); + NumType dPk = legendre::eval_der(int(ijk[2]), q, X[2]); + + // Store partial derivatives in entries of gradient + grad_phi[0] = dPi*Pj*Pk; + grad_phi[1] = Pi*dPj*Pk; + grad_phi[2] = Pi*Pj*dPk; +} + +/* + * Return evaluation of local function approximation, which is formed by the + * sum of the products of tensor-product Legendre basis functions and the + * provided coefficients, at specified coordinates + * + * Parameters + * ---------- + * c : coefficients + * X : coordinates (in reference space) + * + * Note + * ---- + * The intermediate coefficients or workspace array is divided into 2 lines of + * length equal to the number of vertices. In the dimension-by-dimension + * approach used for the approximation evaluation, each line is filled by + * evaluations at the previous dimensional level. The initial coefficients are + * cycled through in the X-dimension loop to produce coefficients, stored in + * the workspace array, to be used in the Y-dimension loop. The output from the + * Y-dimension loop is coefficients, stored again in the workspace array, to be + * used in the final Z-dimension evaluation. + */ +template +NumType LegendreProductBasis::eval_approx(const NumType *c, + const NumType *X) { + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++) { + // Collapse first dimension into coefficients for second dimension + C[j] = legendre::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + + // Collapse second dimension into coefficients for third dimension + C[N+k] = legendre::eval_approx(N, C, X[1]); + } + + // Collapse third dimension into approximation evaluation + return legendre::eval_approx(N, &C[N], X[2]); +} + +/* + * Evaluate gradient of the local function approximation, which is formed by + * the sum of the products of tensor-product Legendre basis functions and the + * provided coefficients, at specified coordinates + * + * Parameters + * ---------- + * c : coefficients + * X : coordinates (in reference space) + * + * Returns + * ------- + * grad_f : gradient of the approximation + */ +template +void LegendreProductBasis::eval_grad_approx(const NumType *c, + const NumType *X, NumType *grad_f) { + SizeType q = 1; // order of derivative + for (int l = 0; l < Nd; l++) { + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++) { + // Collapse first dimension into coefficients for second dimension + if (l == 0) { + C[j] = legendre::eval_der_approx(N, 1, &c[j*N+k*N*N], X[0]); + } else { + C[j] = legendre::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + } + + // Collapse second dimension into coefficients for third dimension + if (l == 1) { + C[N+k] = legendre::eval_der_approx(N, q, C, X[1]); + } else { + C[N+k] = legendre::eval_approx(N, C, X[1]); + } + } + + // Collapse third dimension into approximation evaluation + if (l == 2) { + grad_f[l] = legendre::eval_der_approx(N, q, &C[N], X[2]); + } else { + grad_f[l] = legendre::eval_approx(N, &C[N], X[2]); + } + } +} + +// Explicit instantiation of template class +template class LegendreProductBasis; +template class LegendreProductBasis; diff --git a/elements-kokkos/src/TensorProductBasis.cpp b/elements-kokkos/src/TensorProductBasis.cpp new file mode 100644 index 00000000..272cce2a --- /dev/null +++ b/elements-kokkos/src/TensorProductBasis.cpp @@ -0,0 +1,36 @@ +#include "TensorProductBasis.h" + +#include "error.h" + + +template +TensorProductBasis::TensorProductBasis(const SizeType order) : Np(order) {} + +template +void TensorProductBasis::eval_jac(const NumType *, const NumType *, + const NumType *, const NumType *, NumType *) { + std::string err_msg( + "Error: Jacobian routines not implemented for this basis type."); + throw NotImplementedError(err_msg); +}; + +template +NumType TensorProductBasis::eval_det_jac(const NumType *, + const NumType *, const NumType *, const NumType *) { + std::string err_msg( + "Error: Jacobian routines not implemented for this basis type."); + throw NotImplementedError(err_msg); + return 0.0; +} + +template +void TensorProductBasis::eval_inv_jac(const NumType *, + const NumType *, const NumType *, const NumType *, NumType *) { + std::string err_msg( + "Error: Jacobian routines not implemented for this basis type."); + throw NotImplementedError(err_msg); +} + +// Explicit instantiation of template class +template class TensorProductBasis; +template class TensorProductBasis; diff --git a/elements-kokkos/src/bernstein_polynomials.cpp b/elements-kokkos/src/bernstein_polynomials.cpp new file mode 100644 index 00000000..4df9858f --- /dev/null +++ b/elements-kokkos/src/bernstein_polynomials.cpp @@ -0,0 +1,55 @@ +#include "bernstein_polynomials.h" + + +namespace bernstein { + template + NumType eval( const int n, const int v, const NumType X) { + if ( n == 0 && v != 0 ) return 0; + if ( n == 0 && v == 0 ) return 1; + if ( n < v ) return 0; + return 0.5*((1.0 - X)*eval(n-1, v, X) + (1.0 + X)*eval(n-1, v-1, X)); + }; + template + NumType eval_der( const int n, const int v, const NumType X) { + if ( n == 0) return 0; + if ( n < v) return 0; + if ( n == 1 && v == 0) return -0.5; + if ( n == 1 && v == 1) return 0.5; + return 0.5*((1.0 - X)*eval_der(n-1,v, X) + (1.0 + X)*eval_der(n-1, v-1, X) + + eval(n-1, v-1, X) - eval(n-1, v, X)); + }; + + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j++) { + sum += c[j]*eval(N, j, X); + } + return sum; + } + + template + NumType eval_der_approx(const SizeType N, const NumType *c, const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j ++) { + sum += c[j]*eval_der( N, j, X); + } + return sum; + }; + + + // Explicit instantiations of template functions + template Real eval(const int n, const int v, const Real X); + template Complex eval(const int n, const int v, const Complex X); + + template Real eval_der(const int n, const int v, const Real X); + template Complex eval_der(const int n, const int v, const Complex X); + + template Real eval_approx(const SizeType N, const Real *c, const Real X); + template Complex eval_approx(const SizeType N, const Complex *c, + const Complex X); + + template Real eval_der_approx(const SizeType N, const Real *c, const Real X); + template Complex eval_der_approx(const SizeType N, const Complex *c, + const Complex X); +} diff --git a/elements-kokkos/src/jacobi_polynomials.cpp b/elements-kokkos/src/jacobi_polynomials.cpp new file mode 100644 index 00000000..98e4df68 --- /dev/null +++ b/elements-kokkos/src/jacobi_polynomials.cpp @@ -0,0 +1,33 @@ +#include "jacobi_polynomials.h" + +namespace jacobi { + template + NumType eval(const int n, const Real alpha, const Real beta, + const NumType X) { + if (n == -1) return 0.0; + if (n == 0) return 1.0; + return (a(alpha, beta, n)*X + b(alpha, beta, n)) + *eval(n - 1, alpha, beta, X) + - c(alpha, beta, n)*eval(n - 2, alpha, beta, X); + } + + template + NumType eval_der(const int n, const int k, const Real alpha, + const Real beta, const NumType X) { + if (n == 0 || k > n) return 0.0; + Real theta = Real(n) + Real(alpha + alpha) + 1.0; + return std::pow(0.5, k)*std::tgamma(theta + Real(k)) + *eval(n - k, alpha + Real(k), beta + Real(k), X)/std::tgamma(theta); + } + + // Explicit instantiations of template functions + template Real eval(const int n, const Real alpha, const Real beta, + const Real X); + template Complex eval(const int n, const Real alpha, const Real beta, + const Complex X); + + template Real eval_der(const int n, const int k, const Real alpha, + const Real beta, const Real X); + template Complex eval_der(const int n, const int k, const Real alpha, + const Real beta, const Complex X); +} diff --git a/elements-kokkos/src/lagrange_polynomials.cpp b/elements-kokkos/src/lagrange_polynomials.cpp new file mode 100644 index 00000000..28ccb511 --- /dev/null +++ b/elements-kokkos/src/lagrange_polynomials.cpp @@ -0,0 +1,295 @@ +#include "lagrange_polynomials.h" + +namespace lagrange { + /* + * If the input coordinate is coincident with any of the input vertices, + * return the index of the vertex with which it coincides + * + * It is typically bad practice to check if two floating point numbers, say x + * and y, are equal by evaluating x - y == 0. But that is precisely what the + * authors suggest to do in "Barycentric Lagrange Interpolation" (Berrut and + * Trefethen, 2004). It is also what the authors of Nektar++ do in their + * implementation of barycentric Lagrange interpolation. I'm not sure what to + * do + * + * Parameters + * ---------- + * N : number of vertices + * z : vertex coordinates + * x : coordinate to test + */ + template + SizeType find_coincident_vertex(const SizeType &N, const NumType *z, + const NumType &x) { + // The largest number an unsigned integer can be + SizeType k = -1; + + // Adding to -1 in unsigned rolls over zero + for (SizeType j = 0; j < N; j++) { + //k += (j + 1)*SizeType(z[j] - x == 0.0); + k += (j + 1)*SizeType(common::almost_equal(z[j], x)); + } + + return k; + } + + /* + * Calculation of barycentric weights of Lagrange interpolant vertices + * + * Parameters + * ---------- + * N : number of vertices + * z : vertex coordinates + * + * Returns + * ------- + * w : barycentric vertex weights + * + * Notes + * ----- + * If one evaluates the interpolant using the barycentric formula of the + * second kind, the barycentric weights may be scaled by an arbitrary scalar + * (the maximum weight, for example). Since I am using the barycentric + * formula of the first kind (also called the modified Lagrange formula), I + * cannot and do not scale the weights + */ + template + void compute_barycentric_weights(const SizeType &N, const NumType *z, + NumType *w) { + // Compute weights + for (SizeType j = 0; j < N; j++) { + w[j] = 1.0; + for (SizeType k = 0; k < N; k++) { + // TODO optimization: make branchless? Trefethen uses log o exp trick + if (j != k) w[j] *= 1.0/(z[j] - z[k]); + } + } + }; + + /* + * Return evaluation of Lagrange polynomial associated with a given vertex at + * a given coordinate + * + * Parameters + * ---------- + * Nv : number of vertices + * i : index of vertex + * ic : index of vertex coincident with X (-1 if not coincident) + * Z : vertex coordinates + * w : barycentric weights + * X : coordinate at which to evaluate + */ + template + NumType eval(const SizeType Nv, const SizeType i, const SizeType ic, + const NumType *Z, const NumType *w, const NumType X) { + if (ic < Nv) return i == ic ? 1.0 : 0.0; + + // Evaluate nodal polynomial + NumType L = 1.0; + for (SizeType j = 0; j < Nv; j++) L *= (X - Z[j]); + + return L*w[i]/(X - Z[i]); + } + + /* + * Return evaluation of derivative of Lagrange polynomial associated with a + * given vertex at a given coordinate + * + * Currently implemented as a special case of interpolant derivative + * evaluation. There is probably a better way to do this, but I haven't + * gotten around to finding it and implementing it + * + * Parameters + * ---------- + * Nv : number of vertices + * n : order of derivative + * i : index of vertex + * ic : index of vertex coincident with X (-1 if not coincident) + * Z : vertex coordinates + * w : barycentric weights + * X : coordinate at which to evaluate + * c : work array for intermediate coefficients + */ + template + NumType eval_der(const SizeType Nv, const SizeType n, const SizeType i, + const SizeType ic, const NumType *Z, const NumType *w, + const NumType X, NumType *c) { + // Initialize coefficients of interpolant to Kronecker deltas to create + // interpolant equivalent to Lagrange polynomial at vertex + for (SizeType j = 0; j < Nv; j++) { + if (j != i) { c[j] = 0.0; } else { c[j] = 1.0; } + } + + NumType dnl = eval_der_interp(Nv, n, ic, Z, w, X, c); + + return dnl; + } + + /* + * Return evaluation of Lagrange interpolant, which is the sum of the + * products of Lagrange polynomials and provided coefficients, at specified + * coordinates + * + * This implementation uses the modified Lagrange formula (barycentric + * formula of the first kind) from "Barycentric Lagrange Interpolation" + * (Berrut and Trefethen, 2004). + * + * Parameters + * ---------- + * Nv : number of vertices + * i : index of vertex coincident with coordinate (-1 if not coincident) + * Z : vertex coordinates + * w : barycentric vertex weights + * X : coordinate at which to evaluate interpolant + * c : input coefficients (function values at vertices) + */ + template + NumType eval_interp(const SizeType Nv, const SizeType i, const NumType *Z, + const NumType *w, const NumType X, const NumType *c) { + NumType p; + NumType L = 1.0; // nodal polynomial evaluation + + // Evaluate the interpolant + if (i < Nv) { // coincident + return c[i]; + } else { // non-coincident + // Loop over vertices + p = 0.0; + for (SizeType j = 0; j < Nv; j++) { + // Contribution to interpolant evaluation + p += w[j]*c[j]/(X - Z[j]); + + // Contribution to scalar (nodal polynomial evaluation) + L *= (X - Z[j]); + } + + // Apply scaling + p *= L; + } + + return p; + } + + /* + * Return evaluation of derivative of Lagrange interpolant, which is the sum + * of the products of the derivatives of Lagrange polynomials and provided + * coefficients, at specified coordinates + * + * This implementation is based on the formula for the derivative of a + * rational interpolant given in "Some New Aspects of Rational Interpolation" + * (Schneider and Werner, 1986). + * + * Parameters + * ---------- + * Nv : number of vertices + * n : order of derivative + * i : index of vertex coincident with coordinate (-1 if not coincident) + * Z : vertex coordinates + * w : barycentric vertex weights + * X : coordinate at which to eval interpolant + * c : input coefficients (will be modified) + */ + template + NumType eval_der_interp(const SizeType Nv, const SizeType n, const SizeType i, + const NumType *Z, const NumType *w, const NumType X, NumType *c) { + // Interpolant and nodal polynomial evaluation + NumType p = 0.0; // interpolant evaluation + NumType L = 1.0; // nodal polynomial + if (i < Nv) { // coincident + p = c[i]; + } else { // non-coincident + // Loop over vertices + p = 0.0; + for (SizeType j = 0; j < Nv; j++) { + // Contribution to interpolant evaluation + p += w[j]*c[j]/(X - Z[j]); + + // Contribution to scalar (nodal polynomial evaluation) + L *= (X - Z[j]); + } + + // Apply scaling + p *= L; + } + + // Evaluate derivatives, building up to specified order + NumType dkp = p, dnp = 0.0; + NumType M = 1.0; // factorial(k) + if (i < Nv) { // coincident + for (SizeType k = 1; k <= n; k++) { + // Zero the output + dnp = 0.0; + + for (SizeType j = 0; j < Nv; j++) { + // Calculate divided difference and store in intermediate coefficients + NumType sx = 1.0/(Z[i] - Z[j]); + c[j] = sx*(dkp - c[j]); + + // Update output coefficient with contribution from vertex + // TODO optimization: make branchless? Trefethen uses log o exp trick + if (j != i) dnp += w[j]*c[j]; + } + + // Scale the output and copy for use in calculating next order + M *= k; + dnp *= -M/w[i]; + dkp = dnp; + } + } else { // non-coincident + for (SizeType k = 1; k <= n; k++) { + // Zero the output + dnp = 0.0; + + for (SizeType j = 0; j < Nv; j++) { + // Calculate divided difference and store in intermediate coefficients + NumType sx = 1.0/(X - Z[j]); + c[j] = sx*(dkp - c[j]); + + // Update output coefficient with contribution from vertex + dnp += w[j]*c[j]/(X - Z[j]); + } + + // Scale the output and copy for use in calculating next order + M *= k; + dnp *= L*M; + dkp = dnp; + } + } + + return dnp; + } + + // Explicit instantiations of template functions + template SizeType find_coincident_vertex(const SizeType &N, const Real *z, + const Real &x); + template SizeType find_coincident_vertex(const SizeType &N, const Complex *z, + const Complex &x); + + template void compute_barycentric_weights(const SizeType &N, const Real *z, + Real *w); + template void compute_barycentric_weights(const SizeType &N, const Complex *z, + Complex *w); + + template Real eval(const SizeType Nv, const SizeType i, const SizeType ic, + const Real *Z, const Real *w, const Real X); + template Complex eval(const SizeType Nv, const SizeType i, const SizeType ic, + const Complex *Z, const Complex *w, const Complex X); + + template Real eval_der(const SizeType Nv, const SizeType n, const SizeType i, + const SizeType ic, const Real *Z, const Real *w, const Real X, Real *c); + template Complex eval_der(const SizeType Nv, const SizeType n, + const SizeType i, const SizeType ic, const Complex *Z, const Complex *w, + const Complex X, Complex *c); + + template Real eval_interp(const SizeType Nv, const SizeType i, const Real *Z, + const Real *w, const Real X, const Real *c); + template Complex eval_interp(const SizeType Nv, const SizeType i, + const Complex *Z, const Complex *w, const Complex X, const Complex *c); + + template Real eval_der_interp(const SizeType Nv, const SizeType n, + const SizeType i, const Real *Z, const Real *w, const Real X, + Real *c); + template Complex eval_der_interp(const SizeType Nv, const SizeType n, + const SizeType i, const Complex *Z, const Complex *w, const Complex X, + Complex *c); +} diff --git a/elements-kokkos/src/legendre_polynomials.cpp b/elements-kokkos/src/legendre_polynomials.cpp new file mode 100644 index 00000000..3df8b496 --- /dev/null +++ b/elements-kokkos/src/legendre_polynomials.cpp @@ -0,0 +1,95 @@ +#include "legendre_polynomials.h" +#include "jacobi_polynomials.h" + +namespace legendre { + /* + * Return evaluation of Legendre polynomial of specified order at given + * coordinate + * + * Parameters + * ---------- + * n : order of Legendre polynomial + * X : coordinate in reference space [-1, 1] + */ + template + NumType eval(const int n, const NumType X) { + if (n == -1) return 0.0; + if (n == 0) return 1.0; + return (1.0/double(n))*((2.0*double(n) - 1.0)*X*eval(n - 1, X) + - (double(n) - 1.0)*eval(n - 2, X)); + }; + + /* + * Return evaluation of Legendre polynomial of specified order at given + * coordinate + * + * Parameters + * ---------- + * n : order of Legendre polynomial + * k : order of derivative + * X : coordinate in reference space [-1, 1] + */ + template + NumType eval_der(const int n, const int k, const NumType X) { + return jacobi::eval_der(n, k, 0.0, 0.0, X); + }; + + /* + * Return evaluation of Legendre polynomial approximation, which is the sum + * of the products of Legendre polynomials and provided coefficients, at a + * specified coordinate + * + * Parameters + * ---------- + * N : maximum polynomial order + * c : coefficients + * X : coordinate in reference space [-1, 1] + */ + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j++) { + sum += c[j]*eval(j, X); + } + + return sum; + } + + /* + * Return evaluation of derivative of Legendre polynomial approximation, + * which is the sum of the products of the derivatives of the Legendre + * polynomials and provided coefficients, at a specified coordinate + * + * Parameters + * ---------- + * N : maximum polynomial order + * c : coefficients + * X : coordinate in reference space [-1, 1] + */ + template + NumType eval_der_approx(const SizeType N, const SizeType k, const NumType *c, + const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j++) { + sum += c[j]*eval_der(j, k, X); + } + + return sum; + } + + // Explicit instatiations of template functions + template Real eval(const int n, const Real X); + template Complex eval(const int n, const Complex X); + + template Real eval_der(const int n, const int k, const Real X); + template Complex eval_der(const int n, const int k, const Complex X); + + template Real eval_approx(const SizeType N, const Real *c, const Real X); + template Complex eval_approx(const SizeType N, const Complex *c, + const Complex X); + + template Real eval_der_approx(const SizeType N, const SizeType k, + const Real *c, const Real X); + template Complex eval_der_approx(const SizeType N, const SizeType k, + const Complex *c, const Complex X); +} diff --git a/elements-kokkos/src/points_and_weights.cpp b/elements-kokkos/src/points_and_weights.cpp new file mode 100644 index 00000000..aa625bcd --- /dev/null +++ b/elements-kokkos/src/points_and_weights.cpp @@ -0,0 +1,1104 @@ +#include "points_and_weights.h" +#include "error.h" + + +/* + * Fill an array with points equally spaced between end points (inclusive) + * + * Parameters + * ---------- + * N : number of points + * Zl : left end point + * Zr : right end point + * + * Returns + * ------- + * Z : array of equispaced points + */ +template +void equispaced_points(SizeType N, NumType &Zl, NumType &Zr, NumType *Z) { + for (SizeType i = 0; i < N; i++) + Z[i] = Zl + double(i)/double(N - 1)*(Zr - Zl); +} + + +/* + * Fill an array with Chebyshev points of the second kind in the interval + * defined by the specified end points. The symmetry-preserving technique from + * Chebfun (chebtech1/chebpts.m) is used. + * + * Parameters + * ---------- + * N : number of points + * Zl : left end point + * Zr : right end point + * + * Returns + * ------- + * Z : array of Chebyshev points + */ +template +void chebyshev_points(SizeType N, NumType &Zl, NumType &Zr, NumType *Z) { + // Evaluate the points using sine function to preserve symmetry + NumType f = 0.5*M_PI/double(N); + for (SizeType i = 0; i < N; i++) { + int j = (-1*int(N) + 1) + 2*int(i); + Z[i] = sin(f*double(j)); + } + + // Scale the points to fit the domain + for (int i = 0; i < N; i++) + Z[i] = 0.5*(1.0 - Z[i])*Zl + 0.5*(1.0 + Z[i])*Zr; +} + + +// Explicit instantations of template functions +template void equispaced_points(SizeType N, Real &Zl, Real &Zr, Real *Z); +template void equispaced_points(SizeType N, Complex &Zl, Complex &Zr, Complex *Z); + +template void chebyshev_points(SizeType N, Real &Zl, Real &Zr, Real *Z); +template void chebyshev_points(SizeType N, Complex &Zl, Complex &Zr, Complex *Z); + + +/* + * Fill an array with Gauss-Lobatto quadrature points (endpoints are -1 and 1) + * + * Parameters + * ---------- + * lob_points : MATAR CArray to fill with points + * num : number of points in quadrature rule + * + * Returns + * ------- + * lob_points : MATAR CArray to fill with points + */ + +void lobatto_points(MatarRealCArray &lob_points, const int &num) { + switch (num) { + case 1: + lob_points(0) = 0.0; + break; + case 2: + lob_points(0) = -1.0; + lob_points(1) = 1.0; + break; + case 3: + lob_points(0) = -1.0; + lob_points(1) = 0.0; + lob_points(2) = 1.0; + break; + case 4: + lob_points(0) = -1.0; + lob_points(1) = -1.0/5.0*sqrt(5.0); + lob_points(2) = 1.0/5.0*sqrt(5.0); + lob_points(3) = 1.0; + break; + case 5: + lob_points(0) = -1.0; + lob_points(1) = -1.0/7.0*sqrt(21.0); + lob_points(2) = 0.0; + lob_points(3) = 1.0/7.0*sqrt(21.0); + lob_points(4) = 1.0; + break; + case 6: + lob_points(0) = -1.0; + lob_points(1) = -sqrt(1.0/21.0*(7.0 + 2.0*sqrt(7.0))); + lob_points(2) = -sqrt(1.0/21.0*(7.0 - 2.0*sqrt(7.0))); + lob_points(3) = sqrt(1.0/21.0*(7.0 - 2.0*sqrt(7.0))); + lob_points(4) = sqrt(1.0/21.0*(7.0 +2.0*sqrt(7.0))); + lob_points(5) = 1.0; + break; + case 7: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.830223896278566929872032213967E+00; + lob_points(2) = - 0.468848793470714213803771881909E+00; + lob_points(3) = 0.0E+00; + lob_points(4) = 0.468848793470714213803771881909E+00; + lob_points(5) = 0.830223896278566929872032213967E+00; + lob_points(6) = 1.0E+00; + break; + case 8: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.871740148509606615337445761221E+00; + lob_points(2) = - 0.591700181433142302144510731398E+00; + lob_points(3) = - 0.209299217902478868768657260345E+00; + lob_points(4) = 0.209299217902478868768657260345E+00; + lob_points(5) = 0.591700181433142302144510731398E+00; + lob_points(6) = 0.871740148509606615337445761221E+00; + lob_points(7) = 1.0E+00; + break; + case 9: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.899757995411460157312345244418E+00; + lob_points(2) = - 0.677186279510737753445885427091E+00; + lob_points(3) = - 0.363117463826178158710752068709E+00; + lob_points(4) = 0.0E+00; + lob_points(5) = 0.363117463826178158710752068709E+00; + lob_points(6) = 0.677186279510737753445885427091E+00; + lob_points(7) = 0.899757995411460157312345244418E+00; + lob_points(8) = 1.0E+00; + break; + case 10: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.919533908166458813828932660822E+00; + lob_points(2) = - 0.738773865105505075003106174860E+00; + lob_points(3) = - 0.477924949810444495661175092731E+00; + lob_points(4) = - 0.165278957666387024626219765958E+00; + lob_points(5) = 0.165278957666387024626219765958E+00; + lob_points(6) = 0.477924949810444495661175092731E+00; + lob_points(7) = 0.738773865105505075003106174860E+00; + lob_points(8) = 0.919533908166458813828932660822E+00; + lob_points(9) = 1.0E+00; + break; + case 11: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.934001430408059134332274136099E+00; + lob_points(2) = - 0.784483473663144418622417816108E+00; + lob_points(3) = - 0.565235326996205006470963969478E+00; + lob_points(4) = - 0.295758135586939391431911515559E+00; + lob_points(5) = 0.0E+00; + lob_points(6) = 0.295758135586939391431911515559E+00; + lob_points(7) = 0.565235326996205006470963969478E+00; + lob_points(8) = 0.784483473663144418622417816108E+00; + lob_points(9) = 0.934001430408059134332274136099E+00; + lob_points(10) = 1.0E+00; + break; + case 12: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.944899272222882223407580138303E+00; + lob_points(2) = - 0.819279321644006678348641581717E+00; + lob_points(3) = - 0.632876153031869677662404854444E+00; + lob_points(4) = - 0.399530940965348932264349791567E+00; + lob_points(5) = - 0.136552932854927554864061855740E+00; + lob_points(6) = 0.136552932854927554864061855740E+00; + lob_points(7) = 0.399530940965348932264349791567E+00; + lob_points(8) = 0.632876153031869677662404854444E+00; + lob_points(9) = 0.819279321644006678348641581717E+00; + lob_points(10) = 0.944899272222882223407580138303E+00; + lob_points(11) = 1.0E+00; + break; + case 13: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.953309846642163911896905464755E+00; + lob_points(2) = - 0.846347564651872316865925607099E+00; + lob_points(3) = - 0.686188469081757426072759039566E+00; + lob_points(4) = - 0.482909821091336201746937233637E+00; + lob_points(5) = - 0.249286930106239992568673700374E+00; + lob_points(6) = 0.0E+00; + lob_points(7) = 0.249286930106239992568673700374E+00; + lob_points(8) = 0.482909821091336201746937233637E+00; + lob_points(9) = 0.686188469081757426072759039566E+00; + lob_points(10) = 0.846347564651872316865925607099E+00; + lob_points(11) = 0.953309846642163911896905464755E+00; + lob_points(12) = 1.0E+00; + break; + case 14: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.959935045267260901355100162015E+00; + lob_points(2) = - 0.867801053830347251000220202908E+00; + lob_points(3) = - 0.728868599091326140584672400521E+00; + lob_points(4) = - 0.550639402928647055316622705859E+00; + lob_points(5) = - 0.342724013342712845043903403642E+00; + lob_points(6) = - 0.116331868883703867658776709736E+00; + lob_points(7) = 0.116331868883703867658776709736E+00; + lob_points(8) = 0.342724013342712845043903403642E+00; + lob_points(9) = 0.550639402928647055316622705859E+00; + lob_points(10) = 0.728868599091326140584672400521E+00; + lob_points(11) = 0.867801053830347251000220202908E+00; + lob_points(12) = 0.959935045267260901355100162015E+00; + lob_points(13) = 1.0E+00; + break; + case 15: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.965245926503838572795851392070E+00; + lob_points(2) = - 0.885082044222976298825401631482E+00; + lob_points(3) = - 0.763519689951815200704118475976E+00; + lob_points(4) = - 0.606253205469845711123529938637E+00; + lob_points(5) = - 0.420638054713672480921896938739E+00; + lob_points(6) = - 0.215353955363794238225679446273E+00; + lob_points(7) = 0.0E+00; + lob_points(8) = 0.215353955363794238225679446273E+00; + lob_points(9) = 0.420638054713672480921896938739E+00; + lob_points(10) = 0.606253205469845711123529938637E+00; + lob_points(11) = 0.763519689951815200704118475976E+00; + lob_points(12) = 0.885082044222976298825401631482E+00; + lob_points(13) = 0.965245926503838572795851392070E+00; + lob_points(14) = 1.0E+00; + break; + case 16: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.969568046270217932952242738367E+00; + lob_points(2) = - 0.899200533093472092994628261520E+00; + lob_points(3) = - 0.792008291861815063931088270963E+00; + lob_points(4) = - 0.652388702882493089467883219641E+00; + lob_points(5) = - 0.486059421887137611781890785847E+00; + lob_points(6) = - 0.299830468900763208098353454722E+00; + lob_points(7) = - 0.101326273521949447843033005046E+00; + lob_points(8) = 0.101326273521949447843033005046E+00; + lob_points(9) = 0.299830468900763208098353454722E+00; + lob_points(10) = 0.486059421887137611781890785847E+00; + lob_points(11) = 0.652388702882493089467883219641E+00; + lob_points(12) = 0.792008291861815063931088270963E+00; + lob_points(13) = 0.899200533093472092994628261520E+00; + lob_points(14) = 0.969568046270217932952242738367E+00; + lob_points(15) = 1.0E+00; + break; + case 17: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.973132176631418314156979501874E+00; + lob_points(2) = - 0.910879995915573595623802506398E+00; + lob_points(3) = - 0.815696251221770307106750553238E+00; + lob_points(4) = - 0.691028980627684705394919357372E+00; + lob_points(5) = - 0.541385399330101539123733407504E+00; + lob_points(6) = - 0.372174433565477041907234680735E+00; + lob_points(7) = - 0.189511973518317388304263014753E+00; + lob_points(8) = 0.0E+00; + lob_points(9) = 0.189511973518317388304263014753E+00; + lob_points(10) = 0.372174433565477041907234680735E+00; + lob_points(11) = 0.541385399330101539123733407504E+00; + lob_points(12) = 0.691028980627684705394919357372E+00; + lob_points(13) = 0.815696251221770307106750553238E+00; + lob_points(14) = 0.910879995915573595623802506398E+00; + lob_points(15) = 0.973132176631418314156979501874E+00; + lob_points(16) = 1.0E+00; + break; + case 18: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.976105557412198542864518924342E+00; + lob_points(2) = - 0.920649185347533873837854625431E+00; + lob_points(3) = - 0.835593535218090213713646362328E+00; + lob_points(4) = - 0.723679329283242681306210365302E+00; + lob_points(5) = - 0.588504834318661761173535893194E+00; + lob_points(6) = - 0.434415036912123975342287136741E+00; + lob_points(7) = - 0.266362652878280984167665332026E+00; + lob_points(8) = - 0.897490934846521110226450100886E-01; + lob_points(9) = 0.897490934846521110226450100886E-01; + lob_points(10) = 0.266362652878280984167665332026E+00; + lob_points(11) = 0.434415036912123975342287136741E+00; + lob_points(12) = 0.588504834318661761173535893194E+00; + lob_points(13) = 0.723679329283242681306210365302E+00; + lob_points(14) = 0.835593535218090213713646362328E+00; + lob_points(15) = 0.920649185347533873837854625431E+00; + lob_points(16) = 0.976105557412198542864518924342E+00; + lob_points(17) = 1.0E+00; + break; + case 19: + lob_points(0)= - 1.0E+00; + lob_points(1)= - 0.978611766222080095152634063110E+00; + lob_points(2)= - 0.928901528152586243717940258797E+00; + lob_points(3)= - 0.852460577796646093085955970041E+00; + lob_points(4)= - 0.751494202552613014163637489634E+00; + lob_points(5)= - 0.628908137265220497766832306229E+00; + lob_points(6)= - 0.488229285680713502777909637625E+00; + lob_points(7)= - 0.333504847824498610298500103845E+00; + lob_points(8)= - 0.169186023409281571375154153445E+00; + lob_points(9)= 0.0E+00; + lob_points(10) = 0.169186023409281571375154153445E+00; + lob_points(11) = 0.333504847824498610298500103845E+00; + lob_points(12) = 0.488229285680713502777909637625E+00; + lob_points(13) = 0.628908137265220497766832306229E+00; + lob_points(14) = 0.751494202552613014163637489634E+00; + lob_points(15) = 0.852460577796646093085955970041E+00; + lob_points(16) = 0.928901528152586243717940258797E+00; + lob_points(17) = 0.978611766222080095152634063110E+00; + lob_points(18) = 1.0E+00; + break; + case 20: + lob_points(0) = - 1.0E+00; + lob_points(1) = - 0.980743704893914171925446438584E+00; + lob_points(2) = - 0.935934498812665435716181584931E+00; + lob_points(3) = - 0.866877978089950141309847214616E+00; + lob_points(4) = - 0.775368260952055870414317527595E+00; + lob_points(5) = - 0.663776402290311289846403322971E+00; + lob_points(6) = - 0.534992864031886261648135961829E+00; + lob_points(7) = - 0.392353183713909299386474703816E+00; + lob_points(8) = - 0.239551705922986495182401356927E+00; + lob_points(9) = - 0.805459372388218379759445181596E-01; + lob_points(10) = 0.805459372388218379759445181596E-01; + lob_points(11) = 0.239551705922986495182401356927E+00; + lob_points(12) = 0.392353183713909299386474703816E+00; + lob_points(13) = 0.534992864031886261648135961829E+00; + lob_points(14) = 0.663776402290311289846403322971E+00; + lob_points(15) = 0.775368260952055870414317527595E+00; + lob_points(16) = 0.866877978089950141309847214616E+00; + lob_points(17) = 0.935934498812665435716181584931E+00; + lob_points(18) = 0.980743704893914171925446438584E+00; + lob_points(19) = 1.0E+00; + break; + default: + std::ostringstream ss; + ss << "Error: " << num << "-point quadrature rule not implemented."; + throw NotImplementedError(ss.str()); + } +} + + +/* + * Fill an array with Gauss-Lobatto quadrature weights + * + * Parameters + * ---------- + * lob_weights : MATAR CArray to fill with weights + * num : number of points in quadrature rule + * + * Returns + * ------- + * lob_weights : MATAR CArray to fill with weights + */ +void lobatto_weights(MatarRealCArray &lob_weights, const int &num) { + switch (num) { + case 1: + lob_weights(0) = 2.0; + break; + case 2: + lob_weights(0) = 1.0; + lob_weights(1) = 1.0; + break; + case 3: + lob_weights(0) = 1.0/3.0; + lob_weights(1) = 4.0/3.0; + lob_weights(2) = 1.0/3.0; + break; + case 4: + lob_weights(0) = 1.0/6.0; + lob_weights(1) = 5.0/6.0; + lob_weights(2) = 5.0/6.0; + lob_weights(3) = 1.0/6.0; + break; + case 5: + lob_weights(0) = 1.0/10.0; + lob_weights(1) = 49.0/90.0; + lob_weights(2) = 32.0/45.0; + lob_weights(3) = 49.0/90.0; + lob_weights(4) = 1.0/10.0; + break; + case 6: + lob_weights(0) = 1.0/15.0; + lob_weights(1) = 1.0/30.0*(14.0 - sqrt(7.0)); + lob_weights(2) = 1.0/30.0*(14.0 + sqrt(7.0)); + lob_weights(3) = 1.0/30.0*(14.0 + sqrt(7.0)); + lob_weights(4) = 1.0/30.0*(14.0 - sqrt(7.0)); + lob_weights(5) = 1.0/15.0; + break; + case 7: + lob_weights(0) = 0.476190476190476190476190476190E-01; + lob_weights(1) = 0.276826047361565948010700406290E+00; + lob_weights(2) = 0.431745381209862623417871022281E+00; + lob_weights(3) = 0.487619047619047619047619047619E+00; + lob_weights(4) = 0.431745381209862623417871022281E+00; + lob_weights(5) = 0.276826047361565948010700406290E+00; + lob_weights(6) = 0.476190476190476190476190476190E-01; + break; + case 8: + lob_weights(0) = 0.357142857142857142857142857143E-01; + lob_weights(1) = 0.210704227143506039382991065776E+00; + lob_weights(2) = 0.341122692483504364764240677108E+00; + lob_weights(3) = 0.412458794658703881567052971402E+00; + lob_weights(4) = 0.412458794658703881567052971402E+00; + lob_weights(5) = 0.341122692483504364764240677108E+00; + lob_weights(6) = 0.210704227143506039382991065776E+00; + lob_weights(7) = 0.357142857142857142857142857143E-01; + break; + case 9: + lob_weights(0) = 0.277777777777777777777777777778E-01; + lob_weights(1) = 0.165495361560805525046339720029E+00; + lob_weights(2) = 0.274538712500161735280705618579E+00; + lob_weights(3) = 0.346428510973046345115131532140E+00; + lob_weights(4) = 0.371519274376417233560090702948E+00; + lob_weights(5) = 0.346428510973046345115131532140E+00; + lob_weights(6) = 0.274538712500161735280705618579E+00; + lob_weights(7) = 0.165495361560805525046339720029E+00; + lob_weights(8) = 0.277777777777777777777777777778E-01; + break; + case 10: + lob_weights(0) = 0.222222222222222222222222222222E-01; + lob_weights(1) = 0.133305990851070111126227170755E+00; + lob_weights(2) = 0.224889342063126452119457821731E+00; + lob_weights(3) = 0.292042683679683757875582257374E+00; + lob_weights(4) = 0.327539761183897456656510527917E+00; + lob_weights(5) = 0.327539761183897456656510527917E+00; + lob_weights(6) = 0.292042683679683757875582257374E+00; + lob_weights(7) = 0.224889342063126452119457821731E+00; + lob_weights(8) = 0.133305990851070111126227170755E+00; + lob_weights(9) = 0.222222222222222222222222222222E-01; + break; + case 11: + lob_weights(0) = 0.181818181818181818181818181818E-01; + lob_weights(1) = 0.109612273266994864461403449580E+00; + lob_weights(2) = 0.187169881780305204108141521899E+00; + lob_weights(3) = 0.248048104264028314040084866422E+00; + lob_weights(4) = 0.286879124779008088679222403332E+00; + lob_weights(5) = 0.300217595455690693785931881170E+00; + lob_weights(6) = 0.286879124779008088679222403332E+00; + lob_weights(7) = 0.248048104264028314040084866422E+00; + lob_weights(8) = 0.187169881780305204108141521899E+00; + lob_weights(9) = 0.109612273266994864461403449580E+00; + lob_weights(10)= 0.181818181818181818181818181818E-01; + break; + case 12: + lob_weights(0) = 0.151515151515151515151515151515E-01; + lob_weights(1) = 0.916845174131961306683425941341E-01; + lob_weights(2) = 0.157974705564370115164671062700E+00; + lob_weights(3) = 0.212508417761021145358302077367E+00; + lob_weights(4) = 0.251275603199201280293244412148E+00; + lob_weights(5) = 0.271405240910696177000288338500E+00; + lob_weights(6) = 0.271405240910696177000288338500E+00; + lob_weights(7) = 0.251275603199201280293244412148E+00; + lob_weights(8) = 0.212508417761021145358302077367E+00; + lob_weights(9) = 0.157974705564370115164671062700E+00; + lob_weights(10) = 0.916845174131961306683425941341E-01; + lob_weights(11) = 0.151515151515151515151515151515E-01; + break; + case 13: + lob_weights(0) = 0.128205128205128205128205128205E-01; + lob_weights(1) = 0.778016867468189277935889883331E-01; + lob_weights(2) = 0.134981926689608349119914762589E+00; + lob_weights(3) = 0.183646865203550092007494258747E+00; + lob_weights(4) = 0.220767793566110086085534008379E+00; + lob_weights(5) = 0.244015790306676356458578148360E+00; + lob_weights(6) = 0.251930849333446736044138641541E+00; + lob_weights(7) = 0.244015790306676356458578148360E+00; + lob_weights(8) = 0.220767793566110086085534008379E+00; + lob_weights(9) = 0.183646865203550092007494258747E+00; + lob_weights(10) = 0.134981926689608349119914762589E+00; + lob_weights(11) = 0.778016867468189277935889883331E-01; + lob_weights(12) = 0.128205128205128205128205128205E-01; + break; + case 14: + lob_weights(0) = 0.109890109890109890109890109890E-01; + lob_weights(1) = 0.668372844976812846340706607461E-01; + lob_weights(2) = 0.116586655898711651540996670655E+00; + lob_weights(3) = 0.160021851762952142412820997988E+00; + lob_weights(4) = 0.194826149373416118640331778376E+00; + lob_weights(5) = 0.219126253009770754871162523954E+00; + lob_weights(6) = 0.231612794468457058889628357293E+00; + lob_weights(7) = 0.231612794468457058889628357293E+00; + lob_weights(8) = 0.219126253009770754871162523954E+00; + lob_weights(9) = 0.194826149373416118640331778376E+00; + lob_weights(10) = 0.160021851762952142412820997988E+00; + lob_weights(11) = 0.116586655898711651540996670655E+00; + lob_weights(12) = 0.668372844976812846340706607461E-01; + lob_weights(13) = 0.109890109890109890109890109890E-01; + break; + case 15: + lob_weights(0) = 0.952380952380952380952380952381E-02; + lob_weights(1) = 0.580298930286012490968805840253E-01; + lob_weights(2) = 0.101660070325718067603666170789E+00; + lob_weights(3) = 0.140511699802428109460446805644E+00; + lob_weights(4) = 0.172789647253600949052077099408E+00; + lob_weights(5) = 0.196987235964613356092500346507E+00; + lob_weights(6) = 0.211973585926820920127430076977E+00; + lob_weights(7) = 0.217048116348815649514950214251E+00; + lob_weights(8) = 0.211973585926820920127430076977E+00; + lob_weights(9) = 0.196987235964613356092500346507E+00; + lob_weights(10) = 0.172789647253600949052077099408E+00; + lob_weights(11) = 0.140511699802428109460446805644E+00; + lob_weights(12) = 0.101660070325718067603666170789E+00; + lob_weights(13) = 0.580298930286012490968805840253E-01; + lob_weights(14) = 0.952380952380952380952380952381E-02; + case 16: + lob_weights(0) = 0.833333333333333333333333333333E-02; + lob_weights(1) = 0.508503610059199054032449195655E-01; + lob_weights(2) = 0.893936973259308009910520801661E-01; + lob_weights(3) = 0.124255382132514098349536332657E+00; + lob_weights(4) = 0.154026980807164280815644940485E+00; + lob_weights(5) = 0.177491913391704125301075669528E+00; + lob_weights(6) = 0.193690023825203584316913598854E+00; + lob_weights(7) = 0.201958308178229871489199125411E+00; + lob_weights(8) = 0.201958308178229871489199125411E+00; + lob_weights(9) = 0.193690023825203584316913598854E+00; + lob_weights(10) = 0.177491913391704125301075669528E+00; + lob_weights(11) = 0.154026980807164280815644940485E+00; + lob_weights(12) = 0.124255382132514098349536332657E+00; + lob_weights(13) = 0.893936973259308009910520801661E-01; + lob_weights(14) = 0.508503610059199054032449195655E-01; + lob_weights(15) = 0.833333333333333333333333333333E-02; + break; + case 17: + lob_weights(0) = 0.735294117647058823529411764706E-02; + lob_weights(1) = 0.449219405432542096474009546232E-01; + lob_weights(2) = 0.791982705036871191902644299528E-01; + lob_weights(3) = 0.110592909007028161375772705220E+00; + lob_weights(4) = 0.137987746201926559056201574954E+00; + lob_weights(5) = 0.160394661997621539516328365865E+00; + lob_weights(6) = 0.177004253515657870436945745363E+00; + lob_weights(7) = 0.187216339677619235892088482861E+00; + lob_weights(8) = 0.190661874753469433299407247028E+00; + lob_weights(9) = 0.187216339677619235892088482861E+00; + lob_weights(10) = 0.177004253515657870436945745363E+00; + lob_weights(11) = 0.160394661997621539516328365865E+00; + lob_weights(12) = 0.137987746201926559056201574954E+00; + lob_weights(13) = 0.110592909007028161375772705220E+00; + lob_weights(14) = 0.791982705036871191902644299528E-01; + lob_weights(15) = 0.449219405432542096474009546232E-01; + lob_weights(16) = 0.735294117647058823529411764706E-02; + break; + case 18: + lob_weights(0) = 0.653594771241830065359477124183E-02; + lob_weights(1) = 0.399706288109140661375991764101E-01; + lob_weights(2) = 0.706371668856336649992229601678E-01; + lob_weights(3) = 0.990162717175028023944236053187E-01; + lob_weights(4) = 0.124210533132967100263396358897E+00; + lob_weights(5) = 0.145411961573802267983003210494E+00; + lob_weights(6) = 0.161939517237602489264326706700E+00; + lob_weights(7) = 0.173262109489456226010614403827E+00; + lob_weights(8) = 0.179015863439703082293818806944E+00; + lob_weights(9) = 0.179015863439703082293818806944E+00; + lob_weights(10) = 0.173262109489456226010614403827E+00; + lob_weights(11) = 0.161939517237602489264326706700E+00; + lob_weights(12) = 0.145411961573802267983003210494E+00; + lob_weights(13) = 0.124210533132967100263396358897E+00; + lob_weights(14) = 0.990162717175028023944236053187E-01; + lob_weights(15) = 0.706371668856336649992229601678E-01; + lob_weights(16) = 0.399706288109140661375991764101E-01; + lob_weights(17) = 0.653594771241830065359477124183E-02; + break; + case 19: + lob_weights(0) = 0.584795321637426900584795321637E-02; + lob_weights(1) = 0.357933651861764771154255690351E-01; + lob_weights(2) = 0.633818917626297368516956904183E-01; + lob_weights(3) = 0.891317570992070844480087905562E-01; + lob_weights(4) = 0.112315341477305044070910015464E+00; + lob_weights(5) = 0.132267280448750776926046733910E+00; + lob_weights(6) = 0.148413942595938885009680643668E+00; + lob_weights(7) = 0.160290924044061241979910968184E+00; + lob_weights(8) = 0.167556584527142867270137277740E+00; + lob_weights(9) = 0.170001919284827234644672715617E+00; + lob_weights(10) = 0.167556584527142867270137277740E+00; + lob_weights(11) = 0.160290924044061241979910968184E+00; + lob_weights(12) = 0.148413942595938885009680643668E+00; + lob_weights(13) = 0.132267280448750776926046733910E+00; + lob_weights(14) = 0.112315341477305044070910015464E+00; + lob_weights(15) = 0.891317570992070844480087905562E-01; + lob_weights(16) = 0.633818917626297368516956904183E-01; + lob_weights(17) = 0.357933651861764771154255690351E-01; + lob_weights(18) = 0.584795321637426900584795321637E-02; + break; + case 20: + lob_weights(0) = 0.526315789473684210526315789474E-02; + lob_weights(1) = 0.322371231884889414916050281173E-01; + lob_weights(2) = 0.571818021275668260047536271732E-01; + lob_weights(3) = 0.806317639961196031447768461137E-01; + lob_weights(4) = 0.101991499699450815683781205733E+00; + lob_weights(5) = 0.120709227628674725099429705002E+00; + lob_weights(6) = 0.136300482358724184489780792989E+00; + lob_weights(7) = 0.148361554070916825814713013734E+00; + lob_weights(8) = 0.156580102647475487158169896794E+00; + lob_weights(9) = 0.160743286387845749007726726449E+00; + lob_weights(10) = 0.160743286387845749007726726449E+00; + lob_weights(11) = 0.156580102647475487158169896794E+00; + lob_weights(12) = 0.148361554070916825814713013734E+00; + lob_weights(13) = 0.136300482358724184489780792989E+00; + lob_weights(14) = 0.120709227628674725099429705002E+00; + lob_weights(15) = 0.101991499699450815683781205733E+00; + lob_weights(16) = 0.806317639961196031447768461137E-01; + lob_weights(17) = 0.571818021275668260047536271732E-01; + lob_weights(18) = 0.322371231884889414916050281173E-01; + lob_weights(19) = 0.526315789473684210526315789474E-02; + break; + default: + std::ostringstream ss; + ss << "Error: " << num << "-point quadrature rule not implemented."; + throw NotImplementedError(ss.str()); + } +} + + +/* + * Fill an array with Gauss-Legendre quadrature points (endpoints are -1 and 1) + * + * Parameters + * ---------- + * leg_points : MATAR CArray to fill with points + * num : number of points in quadrature rule + * + * Returns + * ------- + * leg_points : MATAR CArray to fill with points + */ +void legendre_points(MatarRealCArray &leg_points, const int &num) { + switch (num) { + case 1: + leg_points(0) = 0.0; + break; + case 2: + leg_points(0) = -0.577350269189625764509148780501; + leg_points(1) = 0.577350269189625764509148780501; + break; + case 3: + leg_points(0) = -0.774596669241483377035853079956; + leg_points(1) = 0.0; + leg_points(2) = 0.774596669241483377035853079956; + break; + case 4: + leg_points(0) = -0.861136311594052575223946488892; + leg_points(1) = -0.339981043584856264802665759103; + leg_points(2) = 0.339981043584856264802665759103; + leg_points(3) = 0.861136311594052575223946488892; + break; + case 5: + leg_points(0) = -0.906179845938663992797626878299; + leg_points(1) = -0.538469310105683091036314420700; + leg_points(2) = 0.0; + leg_points(3) = 0.538469310105683091036314420700; + leg_points(4) = 0.906179845938663992797626878299; + break; + case 6: + leg_points(0) = -0.932469514203152027812301554493; + leg_points(1) = -0.661209386466264513661399595019; + leg_points(2) = -0.238619186083196908630501721680; + leg_points(3) = 0.238619186083196908630501721680; + leg_points(4) = 0.661209386466264513661399595019; + leg_points(5) = 0.932469514203152027812301554493; + break; + case 7: + leg_points(0) = -0.949107912342758524526189684047; + leg_points(1) = -0.741531185599394439863864773280; + leg_points(2) = -0.405845151377397166906606412076; + leg_points(3) = 0.0E+00; + leg_points(4) = 0.405845151377397166906606412076; + leg_points(5) = 0.741531185599394439863864773280; + leg_points(6) = 0.949107912342758524526189684047; + break; + case 8: + leg_points(0) = -0.960289856497536231683560868569; + leg_points(1) = -0.796666477413626739591553936475; + leg_points(2) = -0.525532409916328985817739049189; + leg_points(3) = -0.183434642495649804939476142360; + leg_points(4) = 0.183434642495649804939476142360; + leg_points(5) = 0.525532409916328985817739049189; + leg_points(6) = 0.796666477413626739591553936475; + leg_points(7) = 0.960289856497536231683560868569; + break; + case 9: + leg_points(0) = -0.968160239507626089835576202903; + leg_points(1) = -0.836031107326635794299429788069; + leg_points(2) = -0.613371432700590397308702039341; + leg_points(3) = -0.324253423403808929038538014643; + leg_points(4) = 0.0E+00; + leg_points(5) = 0.324253423403808929038538014643; + leg_points(6) = 0.613371432700590397308702039341; + leg_points(7) = 0.836031107326635794299429788069; + leg_points(8) = 0.968160239507626089835576202903; + break; + case 10: + leg_points(0) = -0.9739065285171717200779640120844; + leg_points(1) = -0.8650633666889845107320966884234; + leg_points(2) = -0.6794095682990244062343273651148; + leg_points(3) = -0.4333953941292471907992659431657; + leg_points(4) = -0.1488743389816312108848260011297; + leg_points(5) = 0.1488743389816312108848260011297; + leg_points(6) = 0.4333953941292471907992659431657; + leg_points(7) = 0.6794095682990244062343273651148; + leg_points(8) = 0.8650633666889845107320966884234; + leg_points(9) = 0.9739065285171717200779640120844; + break; + case 11: + leg_points(0) = -0.9782286581460569928039380011228; + leg_points(1) = -0.8870625997680952990751577693039; + leg_points(2) = -0.7301520055740493240934162520311; + leg_points(3) = -0.5190961292068118159257256694586; + leg_points(4) = -0.2695431559523449723315319854008; + leg_points(5) = 0.0E+00; + leg_points(6) = 0.2695431559523449723315319854008; + leg_points(7) = 0.5190961292068118159257256694586; + leg_points(8) = 0.7301520055740493240934162520311; + leg_points(9) = 0.8870625997680952990751577693039; + leg_points(10) = 0.9782286581460569928039380011228; + break; + case 12: + leg_points(0) = -0.9815606342467192506905490901492; + leg_points(1) = -0.9041172563704748566784658661190; + leg_points(2) = -0.7699026741943046870368938332128; + leg_points(3) = -0.5873179542866174472967024189405; + leg_points(4) = -0.3678314989981801937526915366437; + leg_points(5) = -0.1252334085114689154724413694638; + leg_points(6) = 0.1252334085114689154724413694638; + leg_points(7) = 0.3678314989981801937526915366437; + leg_points(8) = 0.5873179542866174472967024189405; + leg_points(9) = 0.7699026741943046870368938332128; + leg_points(10) = 0.9041172563704748566784658661190; + leg_points(11) = 0.9815606342467192506905490901492; + break; + case 13: + leg_points(0) = -0.98418305471858814947282944880710; + leg_points(1) = -0.91759839922297796520654783650071; + leg_points(2) = -0.80157809073330991279420648958285; + leg_points(3) = -0.64234933944034022064398460699551; + leg_points(4) = -0.44849275103644685287791285212763; + leg_points(5) = -0.23045831595513479406552812109798; + leg_points(6) = 0.0E+00; + leg_points(7) = 0.23045831595513479406552812109798; + leg_points(8) = 0.44849275103644685287791285212763; + leg_points(9) = 0.64234933944034022064398460699551; + leg_points(10) = 0.80157809073330991279420648958285; + leg_points(11) = 0.91759839922297796520654783650071; + leg_points(12) = 0.98418305471858814947282944880710; + break; + case 14: + leg_points(0) = -0.986283808696812338841597266704052; + leg_points(1) = -0.928434883663573517336391139377874; + leg_points(2) = -0.827201315069764993189794742650394; + leg_points(3) = -0.687292904811685470148019803019334; + leg_points(4) = -0.515248636358154091965290718551188; + leg_points(5) = -0.319112368927889760435671824168475; + leg_points(6) = -0.108054948707343662066244650219834; + leg_points(7) = 0.108054948707343662066244650219834; + leg_points(8) = 0.319112368927889760435671824168475; + leg_points(9) = 0.515248636358154091965290718551188; + leg_points(10) = 0.687292904811685470148019803019334; + leg_points(11) = 0.827201315069764993189794742650394; + leg_points(12) = 0.928434883663573517336391139377874; + leg_points(13) = 0.986283808696812338841597266704052; + break; + case 15: + leg_points(0) = -0.987992518020485428489565718586612; + leg_points(1) = -0.937273392400705904307758947710209; + leg_points(2) = -0.848206583410427216200648320774216; + leg_points(3) = -0.724417731360170047416186054613938; + leg_points(4) = -0.570972172608538847537226737253910; + leg_points(5) = -0.394151347077563369897207370981045; + leg_points(6) = -0.201194093997434522300628303394596; + leg_points(7) = 0.0E+00; + leg_points(8) = 0.201194093997434522300628303394596; + leg_points(9) = 0.394151347077563369897207370981045; + leg_points(10) = 0.570972172608538847537226737253910; + leg_points(11) = 0.724417731360170047416186054613938; + leg_points(12) = 0.848206583410427216200648320774216; + leg_points(13) = 0.937273392400705904307758947710209; + leg_points(14) = 0.987992518020485428489565718586612; + break; + case 16: + leg_points(0) = -0.989400934991649932596154173450332; + leg_points(1) = -0.944575023073232576077988415534608; + leg_points(2) = -0.865631202387831743880467897712393; + leg_points(3) = -0.755404408355003033895101194847442; + leg_points(4) = -0.617876244402643748446671764048791; + leg_points(5) = -0.458016777657227386342419442983577; + leg_points(6) = -0.281603550779258913230460501460496; + leg_points(7) = -0.095012509837637440185319335424958; + leg_points(8) = 0.095012509837637440185319335424958; + leg_points(9) = 0.281603550779258913230460501460496; + leg_points(10) = 0.458016777657227386342419442983577; + leg_points(11) = 0.617876244402643748446671764048791; + leg_points(12) = 0.755404408355003033895101194847442; + leg_points(13) = 0.865631202387831743880467897712393; + leg_points(14) = 0.944575023073232576077988415534608; + leg_points(15) = 0.989400934991649932596154173450332; + break; + case 17: + leg_points(0) = -0.990575475314417335675434019940665; + leg_points(1) = -0.950675521768767761222716957895803; + leg_points(2) = -0.880239153726985902122955694488155; + leg_points(3) = -0.781514003896801406925230055520476; + leg_points(4) = -0.657671159216690765850302216643002; + leg_points(5) = -0.512690537086476967886246568629551; + leg_points(6) = -0.351231763453876315297185517095346; + leg_points(7) = -0.178484181495847855850677493654065; + leg_points(8) = 0.0E+00; + leg_points(9) = 0.178484181495847855850677493654065; + leg_points(10) = 0.351231763453876315297185517095346; + leg_points(11) = 0.512690537086476967886246568629551; + leg_points(12) = 0.657671159216690765850302216643002; + leg_points(13) = 0.781514003896801406925230055520476; + leg_points(14) = 0.880239153726985902122955694488155; + leg_points(15) = 0.950675521768767761222716957895803; + leg_points(16) = 0.990575475314417335675434019940665; + break; + case 18: + leg_points(0) = -0.991565168420930946730016004706150; + leg_points(1) = -0.955823949571397755181195892929776; + leg_points(2) = -0.892602466497555739206060591127145; + leg_points(3) = -0.803704958972523115682417455014590; + leg_points(4) = -0.691687043060353207874891081288848; + leg_points(5) = -0.559770831073947534607871548525329; + leg_points(6) = -0.411751161462842646035931793833051; + leg_points(7) = -0.251886225691505509588972854877911; + leg_points(8) = -0.084775013041735301242261852935783; + leg_points(9) = 0.084775013041735301242261852935783; + leg_points(10) = 0.251886225691505509588972854877911; + leg_points(11) = 0.411751161462842646035931793833051; + leg_points(12) = 0.559770831073947534607871548525329; + leg_points(13) = 0.691687043060353207874891081288848; + leg_points(14) = 0.803704958972523115682417455014590; + leg_points(15) = 0.892602466497555739206060591127145; + leg_points(16) = 0.955823949571397755181195892929776; + leg_points(17) = 0.991565168420930946730016004706150; + break; + case 19: + leg_points(0) = -0.992406843843584403189017670253260; + leg_points(1) = -0.960208152134830030852778840687651; + leg_points(2) = -0.903155903614817901642660928532312; + leg_points(3) = -0.822714656537142824978922486712713; + leg_points(4) = -0.720966177335229378617095860823781; + leg_points(5) = -0.600545304661681023469638164946239; + leg_points(6) = -0.464570741375960945717267148104102; + leg_points(7) = -0.316564099963629831990117328849844; + leg_points(8) = -0.160358645640225375868096115740743; + leg_points(9) = 0.0E+00; + leg_points(10) = 0.160358645640225375868096115740743; + leg_points(11) = 0.316564099963629831990117328849844; + leg_points(12) = 0.464570741375960945717267148104102; + leg_points(13) = 0.600545304661681023469638164946239; + leg_points(14) = 0.720966177335229378617095860823781; + leg_points(15) = 0.822714656537142824978922486712713; + leg_points(16) = 0.903155903614817901642660928532312; + leg_points(17) = 0.960208152134830030852778840687651; + leg_points(18) = 0.992406843843584403189017670253260; + break; + default: + std::ostringstream ss; + ss << "Error: " << num << "-point quadrature rule not implemented."; + throw NotImplementedError(ss.str()); + } +} + + +/* + * Fill an array with Gauss-Legendre quadrature weights + * + * Parameters + * ---------- + * leg_weights : MATAR CArray to fill with weights + * num : number of weights in quadrature rule + * + * Returns + * ------- + * leg_weights : MATAR CArray to fill with weights + */ +void legendre_weights(MatarRealCArray &leg_weights, const int &num) { + switch (num) { + case 1: + leg_weights(0) = 2.0; + break; + case 2: + leg_weights(0) = 1.0; + leg_weights(1) = 1.0; + break; + case 3: + leg_weights(0) = 0.555555555555555555555555555555555; + leg_weights(1) = 0.888888888888888888888888888888888; + leg_weights(2) = 0.555555555555555555555555555555555; + break; + case 4: + leg_weights(0) = 0.347854845137453857373063949221999; + leg_weights(1) = 0.652145154862546142626936050778000; + leg_weights(2) = 0.652145154862546142626936050778000; + leg_weights(3) = 0.347854845137453857373063949221999; + break; + case 5: + leg_weights(0) = 0.236926885056189087514264040719917; + leg_weights(1) = 0.478628670499366468041291514835638; + leg_weights(2) = 0.568888888888888888888888888888888; + leg_weights(3) = 0.478628670499366468041291514835638; + leg_weights(4) = 0.236926885056189087514264040719917; + break; + case 6: + leg_weights(0) = 0.171324492379170345040296142172732; + leg_weights(1) = 0.360761573048138607569833513837716; + leg_weights(2) = 0.467913934572691047389870343989550; + leg_weights(3) = 0.467913934572691047389870343989550; + leg_weights(4) = 0.360761573048138607569833513837716; + leg_weights(5) = 0.171324492379170345040296142172732; + break; + case 7: + leg_weights(0) = 0.129484966168869693270611432679082; + leg_weights(1) = 0.279705391489276667901467771423779; + leg_weights(2) = 0.381830050505118944950369775488975; + leg_weights(3) = 0.417959183673469387755102040816326; + leg_weights(4) = 0.381830050505118944950369775488975; + leg_weights(5) = 0.279705391489276667901467771423779; + leg_weights(6) = 0.129484966168869693270611432679082; + break; + case 8: + leg_weights(0) = 0.101228536290376259152531354309962; + leg_weights(1) = 0.222381034453374470544355994426240; + leg_weights(2) = 0.313706645877887287337962201986601; + leg_weights(3) = 0.362683783378361982965150449277195; + leg_weights(4) = 0.362683783378361982965150449277195; + leg_weights(5) = 0.313706645877887287337962201986601; + leg_weights(6) = 0.222381034453374470544355994426240; + leg_weights(7) = 0.101228536290376259152531354309962; + break; + case 9: + leg_weights(0) = 0.081274388361574411971892158110523; + leg_weights(1) = 0.180648160694857404058472031242912; + leg_weights(2) = 0.260610696402935462318742869418632; + leg_weights(3) = 0.312347077040002840068630406584443; + leg_weights(4) = 0.330239355001259763164525069286974; + leg_weights(5) = 0.312347077040002840068630406584443; + leg_weights(6) = 0.260610696402935462318742869418632; + leg_weights(7) = 0.180648160694857404058472031242912; + leg_weights(8) = 0.081274388361574411971892158110523; + break; + case 10: + leg_weights(0) = 0.066671344308688137593568809893331; + leg_weights(1) = 0.149451349150580593145776339657697; + leg_weights(2) = 0.219086362515982043995534934228163; + leg_weights(3) = 0.269266719309996355091226921569469; + leg_weights(4) = 0.295524224714752870173892994651338; + leg_weights(5) = 0.295524224714752870173892994651338; + leg_weights(6) = 0.269266719309996355091226921569469; + leg_weights(7) = 0.219086362515982043995534934228163; + leg_weights(8) = 0.149451349150580593145776339657697; + leg_weights(9) = 0.066671344308688137593568809893331; + break; + case 11: + leg_weights(0) = 0.055668567116173666482753720442548; + leg_weights(1) = 0.125580369464904624634694299223940; + leg_weights(2) = 0.186290210927734251426097641431655; + leg_weights(3) = 0.233193764591990479918523704843175; + leg_weights(4) = 0.262804544510246662180688869890509; + leg_weights(5) = 0.272925086777900630714483528336342; + leg_weights(6) = 0.262804544510246662180688869890509; + leg_weights(7) = 0.233193764591990479918523704843175; + leg_weights(8) = 0.186290210927734251426097641431655; + leg_weights(9) = 0.125580369464904624634694299223940; + leg_weights(10)= 0.055668567116173666482753720442548; + break; + case 12: + leg_weights(0) = 0.04717533638651182719461596148501; + leg_weights(1) = 0.10693932599531843096025471819399; + leg_weights(2) = 0.16007832854334622633465252954335; + leg_weights(3) = 0.20316742672306592174906445580979; + leg_weights(4) = 0.23349253653835480876084989892487; + leg_weights(5) = 0.24914704581340278500056243604295; + leg_weights(6) = 0.24914704581340278500056243604295; + leg_weights(7) = 0.23349253653835480876084989892487; + leg_weights(8) = 0.20316742672306592174906445580979; + leg_weights(9) = 0.16007832854334622633465252954335; + leg_weights(10) = 0.10693932599531843096025471819399; + leg_weights(11) = 0.04717533638651182719461596148501; + break; + case 13: + leg_weights(0) = 0.04048400476531587952002159220098; + leg_weights(1) = 0.09212149983772844791442177595379; + leg_weights(2) = 0.13887351021978723846360177686887; + leg_weights(3) = 0.17814598076194573828004669199609; + leg_weights(4) = 0.20781604753688850231252321930605; + leg_weights(5) = 0.22628318026289723841209018603977; + leg_weights(6) = 0.23255155323087391019458951526883; + leg_weights(7) = 0.22628318026289723841209018603977; + leg_weights(8) = 0.20781604753688850231252321930605; + leg_weights(9) = 0.17814598076194573828004669199609; + leg_weights(10) = 0.13887351021978723846360177686887; + leg_weights(11) = 0.09212149983772844791442177595379; + leg_weights(12) = 0.04048400476531587952002159220098; + break; + case 14: + leg_weights(0) = 0.03511946033175186303183287613819; + leg_weights(1) = 0.08015808715976020980563327706285; + leg_weights(2) = 0.12151857068790318468941480907247; + leg_weights(3) = 0.15720316715819353456960193862384; + leg_weights(4) = 0.18553839747793781374171659012515; + leg_weights(5) = 0.20519846372129560396592406566121; + leg_weights(6) = 0.21526385346315779019587644331626; + leg_weights(7) = 0.21526385346315779019587644331626; + leg_weights(8) = 0.20519846372129560396592406566121; + leg_weights(9) = 0.18553839747793781374171659012515; + leg_weights(10) = 0.15720316715819353456960193862384; + leg_weights(11) = 0.12151857068790318468941480907247; + leg_weights(12) = 0.08015808715976020980563327706285; + leg_weights(13) = 0.03511946033175186303183287613819; + break; + case 15: + leg_weights(0) = 0.03075324199611726835462839357720; + leg_weights(1) = 0.07036604748810812470926741645066; + leg_weights(2) = 0.10715922046717193501186954668586; + leg_weights(3) = 0.13957067792615431444780479451102; + leg_weights(4) = 0.16626920581699393355320086048120; + leg_weights(5) = 0.18616100001556221102680056186642; + leg_weights(6) = 0.19843148532711157645611832644383; + leg_weights(7) = 0.20257824192556127288062019996751; + leg_weights(8) = 0.19843148532711157645611832644383; + leg_weights(9) = 0.18616100001556221102680056186642; + leg_weights(10) = 0.16626920581699393355320086048120; + leg_weights(11) = 0.13957067792615431444780479451102; + leg_weights(12) = 0.10715922046717193501186954668586; + leg_weights(13) = 0.07036604748810812470926741645066; + leg_weights(14) = 0.03075324199611726835462839357720; + break; + case 16: + leg_weights(0) = 0.02715245941175409485178057245601; + leg_weights(1) = 0.06225352393864789286284383699437; + leg_weights(2) = 0.09515851168249278480992510760224; + leg_weights(3) = 0.12462897125553387205247628219201; + leg_weights(4) = 0.14959598881657673208150173054747; + leg_weights(5) = 0.16915651939500253818931207903035; + leg_weights(6) = 0.18260341504492358886676366796921; + leg_weights(7) = 0.18945061045506849628539672320828; + leg_weights(8) = 0.18945061045506849628539672320828; + leg_weights(9) = 0.18260341504492358886676366796921; + leg_weights(10) = 0.16915651939500253818931207903035; + leg_weights(11) = 0.14959598881657673208150173054747; + leg_weights(12) = 0.12462897125553387205247628219201; + leg_weights(13) = 0.09515851168249278480992510760224; + leg_weights(14) = 0.06225352393864789286284383699437; + leg_weights(15) = 0.02715245941175409485178057245601; + break; + case 17: + leg_weights(0) = 0.02414830286854793196011002628756; + leg_weights(1) = 0.05545952937398720112944016535824; + leg_weights(2) = 0.08503614831717918088353537019106; + leg_weights(3) = 0.11188384719340397109478838562635; + leg_weights(4) = 0.13513636846852547328631998170235; + leg_weights(5) = 0.15404576107681028808143159480195; + leg_weights(6) = 0.16800410215645004450997066378832; + leg_weights(7) = 0.17656270536699264632527099011319; + leg_weights(8) = 0.17944647035620652545826564426188; + leg_weights(9) = 0.17656270536699264632527099011319; + leg_weights(10) = 0.16800410215645004450997066378832; + leg_weights(11) = 0.15404576107681028808143159480195; + leg_weights(12) = 0.13513636846852547328631998170235; + leg_weights(13) = 0.11188384719340397109478838562635; + leg_weights(14) = 0.08503614831717918088353537019106; + leg_weights(15) = 0.05545952937398720112944016535824; + leg_weights(16) = 0.02414830286854793196011002628756; + break; + case 18: + leg_weights(0) = 0.02161601352648331031334271026645; + leg_weights(1) = 0.04971454889496979645333494620263; + leg_weights(2) = 0.07642573025488905652912967761663; + leg_weights(3) = 0.10094204410628716556281398492483; + leg_weights(4) = 0.12255520671147846018451912680020; + leg_weights(5) = 0.14064291467065065120473130375194; + leg_weights(6) = 0.15468467512626524492541800383637; + leg_weights(7) = 0.16427648374583272298605377646592; + leg_weights(8) = 0.16914238296314359184065647013498; + leg_weights(9) = 0.16914238296314359184065647013498; + leg_weights(10) = 0.16427648374583272298605377646592; + leg_weights(11) = 0.15468467512626524492541800383637; + leg_weights(12) = 0.14064291467065065120473130375194; + leg_weights(13) = 0.12255520671147846018451912680020; + leg_weights(14) = 0.10094204410628716556281398492483; + leg_weights(15) = 0.07642573025488905652912967761663; + leg_weights(16) = 0.04971454889496979645333494620263; + leg_weights(17) = 0.02161601352648331031334271026645; + break; + case 19: + leg_weights(0) = 0.01946178822972647703631204146443; + leg_weights(1) = 0.04481422676569960033283815740199; + leg_weights(2) = 0.06904454273764122658070825800601; + leg_weights(3) = 0.09149002162244999946446209412383; + leg_weights(4) = 0.11156664554733399471602390168176; + leg_weights(5) = 0.12875396253933622767551578485687; + leg_weights(6) = 0.14260670217360661177574610944190; + leg_weights(7) = 0.15276604206585966677885540089766; + leg_weights(8) = 0.15896884339395434764995643946504; + leg_weights(9) = 0.16105444984878369597916362532091; + leg_weights(10) = 0.15896884339395434764995643946504; + leg_weights(11) = 0.15276604206585966677885540089766; + leg_weights(12) = 0.14260670217360661177574610944190; + leg_weights(13) = 0.12875396253933622767551578485687; + leg_weights(14) = 0.11156664554733399471602390168176; + leg_weights(15) = 0.09149002162244999946446209412383; + leg_weights(16) = 0.06904454273764122658070825800601; + leg_weights(17) = 0.04481422676569960033283815740199; + leg_weights(18) = 0.01946178822972647703631204146443; + break; + default: + std::ostringstream ss; + ss << "Error: " << num << "-point quadrature rule not implemented."; + throw NotImplementedError(ss.str()); + } +} diff --git a/elements/CMakeLists.txt b/elements/CMakeLists.txt index b32b8c77..95927d62 100644 --- a/elements/CMakeLists.txt +++ b/elements/CMakeLists.txt @@ -9,6 +9,7 @@ if (ENABLE_BLAS_LAPACK) jacobi_polynomials.cpp gauss_jacobi_quadrature.cpp point_distributions.cpp + bernstein_polynomials.cpp ) else() add_library( @@ -20,6 +21,7 @@ else() legendre_element.cpp jacobi_polynomials.cpp point_distributions.cpp + bernstein_polynomials.cpp ) endif() target_link_libraries( diff --git a/elements/bernstein_element.cpp b/elements/bernstein_element.cpp new file mode 100644 index 00000000..99e5c344 --- /dev/null +++ b/elements/bernstein_element.cpp @@ -0,0 +1,168 @@ +#include "bernstein_element.h" +#include "bernstein_polynomials.h" + + +template +BernsteinElement::BernsteinElement(const SizeType order) : Np( order) { + N = Np + 1; + Ne = std::pow(N, Nd); + + rad[0] = N; + rad[1] = N; + rad[2] = N; + + //Allocate memory for intermediate coefficients + C = new NumType[2*N]; +} + +template +BernsteinElement::~BernsteinElement() { } + + +template +NumType BernsteinElement::eval_basis(const SizeType I, const NumType *X) { + //Decompose index of 3D tensor product basis function into indices of Bernstein polynomials + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Bernstein polynomials + NumType Bi = bernstein::eval(N, int(ijk[0]), X[0]); + NumType Bj = bernstein::eval(N, int(ijk[1]), X[1]); + NumType Bk = bernstein::eval(N, int(ijk[2]), X[2]); + + return Bi*Bj*Bk; +} + +template +void BernsteinElement::eval_grad_basis(const SizeType I, const NumType *X, NumType *grad_phi) { + // Decompose index of 3D tensor product basis function into indices of Bernstein polynomials + + common::base_10_to_mixed_radix(Nd, rad, I, ijk); + + // Evaluate Bernstein polynomials + NumType Bi = bernstein::eval(N, int(ijk[0]), X[0]); + NumType Bj = bernstein::eval(N, int(ijk[1]), X[1]); + NumType Bk = bernstein::eval(N, int(ijk[2]), X[2]); + + // Evaluate derivatives of Bernstein polynomials + NumType dBi = bernstein::eval_der(N, int(ijk[0]), X[0]); + NumType dBj = bernstein::eval_der(N, int(ijk[1]), X[1]); + NumType dBk = bernstein::eval_der(N, int(ijk[2]), X[2]); + + // Store partial derivatives in entries of gradient + grad_phi[0] = dBi*Bj*Bk; + grad_phi[1] = Bi*dBj*Bk; + grad_phi[3] = Bi*Bj*dBk; +} + +/* Return evaluation of local function approximation */ + +template +NumType BernsteinElement::eval_approx(const NumType *c, const NumType *X) { + for (int k = 0; k < N; k++) { + for (int j = 0; j < N; j++){ + // Collapse first dimension into coefficients for second dimension + C[j] = bernstein::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + // Collapse second dimension into coefficients for third dimension + C[N+k] = bernstein::eval_approx(N, &C[N], X[2]); + } + // Collapse third dimension into approximation evaluation + return bernstein::eval_approx(N, &C[N], X[2]); +} + +/* Evaluate gradient of local function approximation, which is formed by the sum of the products of tensor-product Bernstein basis functions and the provided coefficients, at specified coordinates. */ +template +void BernsteinElement::eval_grad_approx(const NumType *c, const NumType *X, NumType *grad_f) { + for (int l = 0; l < Nd; l++) { + for (int k =0; k < N; k++) { + for (int j =0; j < N; j++) { + //Collapse first dimension into coefficients for second dimension + if (l==0) { + C[j] = bernstein::eval_der_approx(N, &c[j*N+k*N*N], X[0]); + } else { + C[j] = bernstein::eval_approx(N, &c[j*N+k*N*N], X[0]); + } + } + // Collapse second dimension into coefficients for third dimension + if ( l == 1) { + C[N+k] = bernstein::eval_der_approx(N, &C[N], X[2]); + } else { + C[N+k] = bernstein::eval_approx(N, C, X[1]); + } + } + + // Collapse third dimension into approximation evaluation + if ( l ==2 ) { + grad_f[l] = bernstein::eval_der_approx(N, &C[N], X[2]); + } else { + grad_f[l] = bernstein::eval_approx(N, &C[N], X[2]); + } + } +} + + +////////////////* Evaluate the Jacobian of the spatial mapping *//////////////////////// + +/* x, y, z in physical space. X in reference space. J = Jacobian (column major). */ + +template +void BernsteinElement::eval_jac(const NumType *x, const NumType *y, + const NumType *z, const NumType *X, NumType *J) { + // Evaluate gradient of x=x(X,Y,Z); + this->eval_grad_approx(x,X,J); + + // Evaluate gradient of y=y(X,Y,Z); + this->eval_grad_approx(y,X,J+3); + + // Evaluate gradient of z=z(X,Y,Z); + this->eval_grad_approx(z,X,J+6); +} + + +//////////////* Determinant of Jacobian *///////////////////////// + +/* x, y, z in physical space. X in reference space. */ + +template +NumType BernsteinElement::eval_det_jac(const NumType *x, const NumType *y, + const NumType *z, const NumType *X) { + NumType J[9]; + this->eval_jac(x, y, z, X, J); + + return J[0]*(J[4]*J[8]-J[5]*J[7]) - J[1]*(J[3]*J[8]-J[5]*J[6]) + J[2]*(J[3]*J[7]-J[4]*J[6]); +} + + +//////////////* Inverse Jacobian *////////////////////////////// + +/* x, y, z in physical space. X in reference space. Jinv = Invers Jacobian (column-major). */ + +template +void BernsteinElement::eval_inv_jac(const NumType *x, const NumType *y, + const NumType *z, const NumType *X, NumType *Jinv) { + + // instantiate jacobian // + NumType J[9]; + + // get jacobian // + this->eval_jac(x,y,z,X,J); + + // invert determinant of J // + NumType d = 1.0/(this->eval_det_jac(x,y,z,X)); + + // get Jinv (Jinv = adj(J)/det(J) ) // + Jinv[0] = d*(J[4]*J[8]-J[5]*J[7]); + Jinv[1] = -d*(J[1]*J[8]-J[2]*J[7]); + Jinv[2] = d*(J[1]*J[5]-J[2]*J[4]); + Jinv[3] = -d*(J[3]*J[8]-J[5]*J[6]); + Jinv[4] = d*(J[0]*J[8]-J[2]*J[6]); + Jinv[5] = -d*(J[0]*J[5]-J[2]*J[3]); + Jinv[6] = d*(J[3]*J[7]-J[4]*J[6]); + Jinv[7] = -d*(J[0]*J[7]-J[1]*J[6]); + Jinv[8] = d*(J[0]*J[4]-J[1]*J[3]); +} + + +//////////////// Explicit instantiation of template class ////////////////////// +template class BernsteinElement; +template class BernsteinElement; diff --git a/elements/bernstein_element.h b/elements/bernstein_element.h new file mode 100644 index 00000000..e73ccb09 --- /dev/null +++ b/elements/bernstein_element.h @@ -0,0 +1,39 @@ +#pragma once + +#include "common.h" + +template +struct BernsteinElement { + // Element specifications // + static const SizeType Nd = 3; + SizeType Np; + SizeType N; + SizeType Ne; + + // Arrays for converting from flat to multidimensional indices// + SizeType ijk[Nd]; + SizeType rad[Nd]; + + // Work array for intermediate coefficients // + NumType *C; + + BernsteinElement(const SizeType); + ~BernsteinElement(); + + // Basis functions and basis function gradients// + NumType eval_basis(const SizeType, const NumType *); + void eval_grad_basis(const SizeType, const NumType *, NumType *); + + // Function approximation over element // + NumType eval_approx(const NumType *, const NumType *); + void eval_grad_approx(const NumType *, const NumType *, NumType *); + + // Jacobian of spatial mapping // + void eval_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); + + NumType eval_det_jac(const NumType *, const NumType *, const NumType *, const NumType *); + + void eval_inv_jac(const NumType *, const NumType *, const NumType *, + const NumType *, NumType *); +}; diff --git a/elements/bernstein_polynomials.cpp b/elements/bernstein_polynomials.cpp new file mode 100644 index 00000000..0db1c057 --- /dev/null +++ b/elements/bernstein_polynomials.cpp @@ -0,0 +1,50 @@ +#include "bernstein_polynomials.h" + + +namespace bernstein { + template + NumType eval( const int n, const int v, const NumType X) { + if ( n == 0 && v != 0 ) return 0; + if ( n == 0 && v == 0 ) return 1; + if ( n < v ) return 0; + return 0.5*((1-double(X))*eval(n-1, v, X) + (1+double(X))*eval(n-1, v-1, X)); + }; + template + NumType eval_der( const int n, const int v, const NumType X) { + if ( n == 0) return 0; + if ( n < v) return 0; + if ( n == 1 && v == 0) return -0.5; + if ( n == 1 && v == 1) return 0.5; + return 0.5*( (1-double(X))*eval_der(n-1,v, X) + (1+double(X))*eval_der(n-1, v-1, X) + eval(n-1, v-1, X) - eval(n-1, v, X) ) ; + }; + + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j++) { + sum += c[j]*eval(N, j, X); + } + return sum; + } + + template + NumType eval_der_approx(const SizeType N, const NumType *c, const NumType X) { + NumType sum = 0.0; + for (SizeType j = 0; j < N; j ++) { + sum += c[j]*eval_der( N, j, X); + } + return sum; + }; + + template Real eval(const int n, const int v, const Real X); + + + template Real eval_der(const int n, const int v, const Real X); + + + template Real eval_approx(const SizeType N, const Real *c, const Real X); + + + template Real eval_der_approx(const SizeType N, const Real *c, const Real X); + +} diff --git a/elements/bernstein_polynomials.h b/elements/bernstein_polynomials.h new file mode 100644 index 00000000..1f9de1e4 --- /dev/null +++ b/elements/bernstein_polynomials.h @@ -0,0 +1,22 @@ +#pragma once + +#include "common.h" + +// Bernstein polynomial of order n: +// B(n,v) = (0.5)^n * (C^n_v) * (1-x)^(n-v) * (1+x)^v +// for x \in [-1, 1] + +namespace bernstein{ + // Bernstein polynomials + // n is the order, v is the "index", X \in [-1, 1] + template NumType eval(const int n, const int v, const NumType X); + + template NumType eval_der(const int n, const int v, const NumType X); + + //Bernstein Approximations + template + NumType eval_approx(const SizeType N, const NumType *c, const NumType X); + + template + NumType eval_der_approx(const SizeType N, const NumType *c, const NumType X); +} diff --git a/elements/cmake/Modules/FindVector.cmake b/elements/cmake/Modules/FindVector.cmake deleted file mode 100644 index 9cfffcc8..00000000 --- a/elements/cmake/Modules/FindVector.cmake +++ /dev/null @@ -1,455 +0,0 @@ -# Version 0.6 Increment by 0.1 every change -# -# Authors: -# Bob Robey, Los Alamos National Laboratory, brobey@lanl.gov -# Daniel Dunning, Los Alamos, National Laboratory, ddunning@lanl.gov -# -# Operation tested on -# Intel Skylake with clang/8.0.1 gcc/9.1.0 intel/19.0.4 pgi/18.10 -# AMD-Epyc with with clang/8.0.1 gcc/9.1.0 intel/19.0.4 pgi/18.10 -# ARM with clang/8.0.0 gcc/9.1.0 ThunderX2CN99/RHEL/7/gcc-8.2.0/armpl/19.2.0 -# Power9 with clang/8.0.0 gcc/9.1.0 ibm/xlc-16.1.1.3-xlf-16.1.1.3 pgi/19.3 -# -# Main output flags -# VECTOR__FLAGS -- All flags set plus turning on vectorization -# VECTOR_NOVEC__FLAGS -- All flags set same as vectorization, but with vectorization off -# VECTOR__VERBOSE -- Turn on verbose messages when compiling for vectorization feedback -# Component flags -# VECTOR_ALIASING__FLAGS -- Stricter aliasing option to help auto-vectorization -# VECTOR_ARCH__FLAGS -- Set to compile for architecture that it is on -# VECTOR_FPMODEL__FLAGS -- Set so that Kahan sum does not get optimized out (unsafe optimizations) -# VECTOR_NOVEC__OPT -- Turn off vectorization for debugging and performance measurement -# VECTOR_VEC__OPTS -- Turn on vectorization -# -# Main output flags are build from component flags by the following rule -# set(VECTOR_BASE__FLAGS "${VECTOR_ALIASING__FLAGS} ${VECTOR_ARCH__FLAGS} ${VECTOR_FPMODEL__FLAGS}") -# set(VECTOR_NOVEC__FLAGS "${VECTOR_BASE__FLAGS} ${VECTOR_NOVEC__FLAGS}") -# set(VECTOR__FLAGS "${VECTOR_BASE__FLAGS} ${VECTOR__FLAGS} ${VECTOR_OPENMP_SIMD__FLAGS}") -# -# Using in CMakeLists: -# -# These lines setup for turning on verbosity with cmake -DCMAKE_VECTOR_VERBOSE -# -# if (CMAKE_VECTOR_VERBOSE) -# set(VECTOR_C_FLAGS "${VECTOR_C_FLAGS} ${VECTOR_C_VERBOSE}") -# set(VECTOR_CXX_FLAGS "${VECTOR_CXX_FLAGS} ${VECTOR_CXX_VERBOSE}") -# endif (CMAKE_VECTOR_VERBOSE) -# -# Vectorization or vector verbosity can be set for individual files -# -# set_source_files_properties( PROPERTIES COMPILE_FLAGS ${VECTOR_C_FLAGS}) -# -# Can be added to compile flags for all files -# -# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VECTOR_C_FLAGS}") - -include(CheckCCompilerFlag) -include(CheckCXXCompilerFlag) -include(CheckFortranCompilerFlag) - -# Set vectorization flags for a few compilers -if(CMAKE_C_COMPILER_LOADED) - if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") # using Clang - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_C_FLAGS "${VECTOR_OPENMP_SIMD_C_FLAGS} -fopenmp-simd") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -fvectorize") - set(VECTOR_C_FPOPTS "${VECTOR_C_FPOPTS} -fno-math-errno") - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -fno-vectorize") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize") - - elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang") # using AppleClang - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_C_FLAGS "${VECTOR_OPENMP_SIMD_C_FLAGS} -fopenmp-simd") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -fvectorize") - set(VECTOR_C_FPOPTS "${VECTOR_C_FPOPTS} -fno-math-errno") - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -fno-vectorize") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize") - - elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") # using GCC - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_C_FLAGS "${VECTOR_OPENMP_SIMD_C_FLAGS} -fopenmp-simd") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -ftree-vectorize") - set(VECTOR_C_FPOPTS "${VECTOR_C_FPOPTS} -fno-trapping-math -fno-math-errno") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -mprefer-vector-width=512") - endif ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - if ("${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "9.0.0") - message(STATUS "Use a GCC compiler version 9.0.0 or greater to get effective vectorization") - endif ("${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "9.0.0") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -fno-tree-vectorize") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -fopt-info-vec-optimized -fopt-info-vec-missed -fopt-info-loop-optimized -fopt-info-loop-missed") - - elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") # using Intel C - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -ansi-alias") - set(VECTOR_FPMODEL_C_FLAGS "${VECTOR_FPMODEL_C_FLAGS} -fp-model:precise") - - set(VECTOR_OPENMP_SIMD_C_FLAGS "${VECTOR_OPENMP_SIMD_C_FLAGS} -qopenmp-simd") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -march=native -mtune=native -xHOST -vecabi=cmdtarget") - if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -qopt-zmm-usage=high") - endif ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -march=native -mtune=native -no-vec") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -qopt-report=5 -qopt-report-phase=openmp,loop,vec") - - elseif (CMAKE_C_COMPILER_ID MATCHES "PGI") - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -alias=ansi") - set(VECTOR_OPENMP_SIMD_C_FLAGS "${VECTOR_OPENMP_SIMD_C_FLAGS} -Mvect=simd") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - # PGI is converting over to a LLVM based compiler on x86_64. To enable, add -Mllvm - # and then prepend the llvm pgi compiler path something like below. This is important - # when using OpenMP and also adds OpenMP 4.5 support - # module load pgi/18.10 - # export PATH="/projects/opt/centos7/pgi/linux86-64-llvm/18.10/bin:${PATH}" - if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "18.6") - execute_process(COMMAND pgcc --version COMMAND grep LLVM COMMAND wc -l OUTPUT_VARIABLE PGI_VERSION_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE) - if ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - set(VECTOR_ARCH_C_FLAGS "${VECTOR_ARCH_C_FLAGS} -Mllvm") - endif ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - endif ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER "18.6") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -Mnovect ") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -Minfo=loop,inline,vect") - - elseif (CMAKE_C_COMPILER_ID MATCHES "MSVC") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS}" " ") - - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT}" " ") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -Qvec-report:2") - - elseif (CMAKE_C_COMPILER_ID MATCHES "XL") - set(VECTOR_ALIASING_C_FLAGSS "${VECTOR_ALIASING_C_FLAGS} -qalias=restrict") - set(VECTOR_FPMODEL_C_FLAGSS "${VECTOR_FPMODEL_C_FLAGS} -qstrict") - set(VECTOR_ARCH_C_FLAGSS "${VECTOR_ARCH_C_FLAGS} -qhot -qarch=auto -qtune=auto") - - set(CMAKE_VEC_C_FLAGS "${CMAKE_VEC_FLAGS} -qsimd=auto") - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -qsimd=noauto") - # "long vector" optimizations - #set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -qhot=novector") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -qreport") - - elseif (CMAKE_C_COMPILER_ID MATCHES "Cray") - set(VECTOR_ALIASING_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} -h restrict=a") - set(VECTOR_C_OPTS "${VECTOR_C_OPTS} -h vector=3") - - set(VECTOR_NOVEC_C_OPT "${VECTOR_NOVEC_C_OPT} -h vector=0") - set(VECTOR_C_VERBOSE "${VECTOR_C_VERBOSE} -h msgs -h negmsgs -h list=a") - - endif() - - CHECK_C_COMPILER_FLAG("${VECTOR_OPENMP_SIMD_C_FLAGS}" HAVE_OPENMP_SIMD) - if (HAVE_OPENMP_SIMD) - add_definitions(-D_OPENMP_SIMD) - else (HAVE_OPENMP_SIMD) - unset(VECTOR_OPENMP_SIMD_C_FLAGS) - endif (HAVE_OPENMP_SIMD) - - set(VECTOR_BASE_C_FLAGS "${VECTOR_ALIASING_C_FLAGS} ${VECTOR_ARCH_C_FLAGS} ${VECTOR_FPMODEL_C_FLAGS}") - set(VECTOR_NOVEC_C_FLAGS "${VECTOR_BASE_C_FLAGS} ${VECTOR_NOVEC_C_OPT}") - set(VECTOR_C_FLAGS "${VECTOR_BASE_C_FLAGS} ${VECTOR_C_OPTS} ${VECTOR_C_FPOPTS} ${VECTOR_OPENMP_SIMD_C_FLAGS}") - - mark_as_advanced(VECTOR_C_FLAGS - VECTOR_NOVEC_C_FLAGS - VECTOR_C_VERBOSE - VECTOR_ALIASING_C_FLAGS - VECTOR_ARCH_C_FLAGS - VECTOR_FPMODEL_C_FLAGS - VECTOR_NOVEC_C_OPT - VECTOR_VEC_C_OPTS - VECTOR_VEC_C_FPOPTS) - - message(STATUS "Setting Vector C flags to: ${VECTOR_C_FLAGS}") - message(STATUS "Setting Vector C No-Vector flags to: ${VECTOR_NOVEC_C_FLAGS}") - message(STATUS "Setting Vector C Verbose flags to: ${VECTOR_C_VERBOSE}") - -endif(CMAKE_C_COMPILER_LOADED) - -#Start CXX Flags -if(CMAKE_CXX_COMPILER_LOADED) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # using Clang - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_CXX_FLAGS "${VECTOR_OPENMP_SIMD_CXX_FLAGS} -fopenmp-simd") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -fvectorize") - set(VECTOR_CXX_FPOPTS "${VECTOR_CXX_FPOPTS} -fno-math-errno") - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -fno-vectorize") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize") - - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") # using AppleClang - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_CXX_FLAGS "${VECTOR_OPENMP_SIMD_CXX_FLAGS} -fopenmp-simd") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -fvectorize") - set(VECTOR_CXX_FPOPTS "${VECTOR_CXX_FPOPTS} -fno-math-errno") - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -fno-vectorize") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize") - - - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_CXX_FLAGS "${VECTOR_OPENMP_SIMD_CXX_FLAGS} -fopenmp-simd") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -ftree-vectorize") - set(VECTOR_CXX_FPOPTS "${VECTOR_CXX_FPOPTS} -fno-trapping-math -fno-math-errno") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - if ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -mprefer-vector-width=512") - endif ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - if ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "9.0.0") - message(STATUS "Use a GCC compiler version 9.0.0 or greater to get effective vectorization") - endif ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "9.0.0") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -fno-tree-vectorize") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -fopt-info-vec-optimized -fopt-info-vec-missed -fopt-info-loop-optimized -fopt-info-loop-missed") - - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") # using Intel C - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -ansi-alias") - set(VECTOR_FPMODEL_CXX_FLAGS "${VECTOR_FPMODEL_CXX_FLAGS} -fp-model:precise") - - set(VECTOR_OPENMP_SIMD_CXX_FLAGS "${VECTOR_OPENMP_SIMD_CXX_FLAGS} -qopenmp-simd") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -march=native -mtune=native -xHOST -vecabi=cmdtarget") - if ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -qopt-zmm-usage=high") - endif ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -march=native -mtune=native -no-vec") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -qopt-report=5 -qopt-report-phase=openmp,loop,vec") - - elseif (CMAKE_CXX_COMPILER_ID MATCHES "PGI") - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -alias=ansi") - set(VECTOR_OPENMP_SIMD_CXX_FLAGS "${VECTOR_OPENMP_SIMD_CXX_FLAGS} -Mvect=simd") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - if ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "18.6") - execute_process(COMMAND pgc++ --version COMMAND grep LLVM COMMAND wc -l OUTPUT_VARIABLE PGI_VERSION_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE) - if ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - set(VECTOR_ARCH_CXX_FLAGS "${VECTOR_ARCH_CXX_FLAGS} -Mllvm") - endif ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - endif ("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "18.6") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -Mnovect ") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -Minfo=loop,inline,vect") - - elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS}" " ") - - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT}" " ") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -Qvec-report:2") - - elseif (CMAKE_CXX_COMPILER_ID MATCHES "XL") - set(VECTOR_ALIASING_CXX_FLAGSS "${VECTOR_ALIASING_CXX_FLAGS} -qalias=restrict") - set(VECTOR_FPMODEL_CXX_FLAGSS "${VECTOR_FPMODEL_CXX_FLAGS} -qstrict") - set(VECTOR_ARCH_CXX_FLAGSS "${VECTOR_ARCH_CXX_FLAGS} -qhot -qarch=auto -qtune=auto") - - set(CMAKE_VEC_CXX_FLAGS "${CMAKE_VEC_FLAGS} -qsimd=auto") - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -qsimd=noauto") - # "long vector" optimizations - #set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -qhot=novector") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -qreport") - - elseif (CMAKE_CXX_COMPILER_ID MATCHES "Cray") - set(VECTOR_ALIASING_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} -h restrict=a") - set(VECTOR_CXX_OPTS "${VECTOR_CXX_OPTS} -h vector=3") - - set(VECTOR_NOVEC_CXX_OPT "${VECTOR_NOVEC_CXX_OPT} -h vector=0") - set(VECTOR_CXX_VERBOSE "${VECTOR_CXX_VERBOSE} -h msgs -h negmsgs -h list=a") - - endif() - - CHECK_CXX_COMPILER_FLAG("${VECTOR_OPENMP_SIMD_CXX_FLAGS}" HAVE_OPENMP_SIMD) - if (HAVE_OPENMP_SIMD) - add_definitions(-D_OPENMP_SIMD) - else (HAVE_OPENMP_SIMD) - unset(VECTOR_OPENMP_SIMD_CXX_FLAGS) - endif (HAVE_OPENMP_SIMD) - - set(VECTOR_BASE_CXX_FLAGS "${VECTOR_ALIASING_CXX_FLAGS} ${VECTOR_ARCH_CXX_FLAGS} ${VECTOR_FPMODEL_CXX_FLAGS}") - set(VECTOR_NOVEC_CXX_FLAGS "${VECTOR_BASE_CXX_FLAGS} ${VECTOR_NOVEC_CXX_OPT}") - set(VECTOR_CXX_FLAGS "${VECTOR_BASE_CXX_FLAGS} ${VECTOR_CXX_OPTS} ${VECTOR_CXX_FPOPTS} ${VECTOR_OPENMP_SIMD_CXX_FLAGS}") - - mark_as_advanced(VECTOR_CXX_FLAGS - VECTOR_NOVEC_CXX_FLAGS - VECTOR_CXX_VERBOSE - VECTOR_ALIASING_CXX_FLAGS - VECTOR_ARCH_CXX_FLAGS - VECTOR_FPMODEL_CXX_FLAGS - VECTOR_NOVEC_CXX_OPT - VECTOR_VEC_CXX_OPTS - VECTOR_VEC_CXX_FPOPTS) - - message(STATUS "Setting Vector CXX flags to: ${VECTOR_CXX_FLAGS}") - message(STATUS "Setting Vector CXX No-Vector flags to: ${VECTOR_NOVEC_CXX_FLAGS}") - message(STATUS "Setting Vector CXX Verbose flags to: ${VECTOR_CXX_VERBOSE}") - -endif(CMAKE_CXX_COMPILER_LOADED) - -# Start Fortran flags -if(CMAKE_Fortran_COMPILER_LOADED) - if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Clang") # using Clang - set(VECTOR_ALIASING_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_Fortran_FLAGS "${VECTOR_OPENMP_SIMD_Fortran_FLAGS} -fopenmp-simd") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -fvectorize") - set(VECTOR_Fortran_FPOPTS "${VECTOR_Fortran_FPOPTS} -fno-math-errno") - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -fno-vectorize") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize") - - elseif ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "GNU") # using GCC - set(VECTOR_ALIASING_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} -fstrict-aliasing") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -march=native -mtune=native") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "ppc64le") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -mcpu=powerpc64le") - elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -march=native -mtune=native") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_OPENMP_SIMD_Fortran_FLAGS "${VECTOR_OPENMP_SIMD_Fortran_FLAGS} -fopenmp-simd") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -ftree-vectorize") - set(VECTOR_Fortran_FPOPTS "${VECTOR_Fortran_FPOPTS} -fno-trapping-math -fno-math-errno") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - if ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -mprefer-vector-width=512") - endif ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "7.9.0") - if ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_LESS "9.0.0") - message(STATUS "Use a GCC compiler version 9.0.0 or greater to get effective vectorization") - endif ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_LESS "9.0.0") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -fno-tree-vectorize") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -fopt-info-vec-optimized -fopt-info-vec-missed -fopt-info-loop-optimized -fopt-info-loop-missed") - - elseif ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") # using Intel C - set(VECTOR_ALIASING_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} -ansi-alias") - set(VECTOR_FPMODEL_Fortran_FLAGS "${VECTOR_FPMODEL_Fortran_FLAGS} -fp-model:precise") - - set(VECTOR_OPENMP_SIMD_Fortran_FLAGS "${VECTOR_OPENMP_SIMD_Fortran_FLAGS} -qopenmp-simd") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -march=native -mtune=native -xHOST -vecabi=cmdtarget") - if ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -qopt-zmm-usage=high") - endif ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "17.0.4") - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -march=native -mtune=native -no-vec") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -qopt-report=5 -qopt-report-phase=openmp,loop,vec") - - elseif (CMAKE_Fortran_COMPILER_ID MATCHES "PGI") - set(VECTOR_ALIASING_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} -alias=ansi") - set(VECTOR_OPENMP_SIMD_Fortran_FLAGS "${VECTOR_OPENMP_SIMD_Fortran_FLAGS} -Mvect=simd") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - if ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "18.6") - execute_process(COMMAND pgfortran --version COMMAND grep LLVM COMMAND wc -l OUTPUT_VARIABLE PGI_VERSION_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE) - if ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - set(VECTOR_ARCH_Fortran_FLAGS "${VECTOR_ARCH_Fortran_FLAGS} -Mllvm") - endif ("${PGI_VERSION_OUTPUT}" STREQUAL "1") - endif ("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER "18.6") - endif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -Mnovect ") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -Minfo=loop,inline,vect") - - elseif (CMAKE_Fortran_COMPILER_ID MATCHES "MSVC") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS}" " ") - - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT}" " ") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -Qvec-report:2") - - elseif (CMAKE_Fortran_COMPILER_ID MATCHES "XL") - set(VECTOR_ALIASING_Fortran_FLAGSS "${VECTOR_ALIASING_Fortran_FLAGS} -qalias=restrict") - set(VECTOR_FPMODEL_Fortran_FLAGSS "${VECTOR_FPMODEL_Fortran_FLAGS} -qstrict") - set(VECTOR_ARCH_Fortran_FLAGSS "${VECTOR_ARCH_Fortran_FLAGS} -qhot -qarch=auto -qtune=auto") - - set(CMAKE_VEC_Fortran_FLAGS "${CMAKE_VEC_FLAGS} -qsimd=auto") - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -qsimd=noauto") - # "long vector" optimizations - #set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -qhot=novector") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -qreport") - - elseif (CMAKE_Fortran_COMPILER_ID MATCHES "Cray") - set(VECTOR_ALIASING_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} -h restrict=a") - set(VECTOR_Fortran_OPTS "${VECTOR_Fortran_OPTS} -h vector=3") - - set(VECTOR_NOVEC_Fortran_OPT "${VECTOR_NOVEC_Fortran_OPT} -h vector=0") - set(VECTOR_Fortran_VERBOSE "${VECTOR_Fortran_VERBOSE} -h msgs -h negmsgs -h list=a") - - endif() - - CHECK_FORTRAN_COMPILER_FLAG("${VECTOR_OPENMP_SIMD_Fortran_FLAGS}" HAVE_OPENMP_SIMD) - if (HAVE_OPENMP_SIMD) - add_definitions(-D_OPENMP_SIMD) - else (HAVE_OPENMP_SIMD) - unset(VECTOR_OPENMP_SIMD_Fortran_FLAGS) - endif (HAVE_OPENMP_SIMD) - - set(VECTOR_BASE_Fortran_FLAGS "${VECTOR_ALIASING_Fortran_FLAGS} ${VECTOR_ARCH_Fortran_FLAGS} ${VECTOR_FPMODEL_Fortran_FLAGS}") - set(VECTOR_NOVEC_Fortran_FLAGS "${VECTOR_BASE_Fortran_FLAGS} ${VECTOR_NOVEC_Fortran_OPT}") - set(VECTOR_Fortran_FLAGS "${VECTOR_BASE_Fortran_FLAGS} ${VECTOR_Fortran_OPTS} ${VECTOR_Fortran_FPOPTS} ${VECTOR_OPENMP_SIMD_Fortran_FLAGS}") - - mark_as_advanced(VECTOR_Fortran_FLAGS - VECTOR_NOVEC_Fortran_FLAGS - VECTOR_Fortran_VERBOSE - VECTOR_ALIASING_Fortran_FLAGS - VECTOR_ARCH_Fortran_FLAGS - VECTOR_FPMODEL_Fortran_FLAGS - VECTOR_NOVEC_Fortran_OPT - VECTOR_VEC_Fortran_OPTS - VECTOR_VEC_Fortran_FPOPTS) - - message(STATUS "Setting Vector Fortran flags to: ${VECTOR_Fortran_FLAGS}") - message(STATUS "Setting Vector Fortran No-Vector flags to: ${VECTOR_NOVEC_Fortran_FLAGS}") - message(STATUS "Setting Vector Fortran Verbose flags to: ${VECTOR_Fortran_VERBOSE}") - -endif(CMAKE_Fortran_COMPILER_LOADED) diff --git a/elements/cmake/Modules/TargetDistclean.cmake b/elements/cmake/Modules/TargetDistclean.cmake deleted file mode 100644 index d91cfa52..00000000 --- a/elements/cmake/Modules/TargetDistclean.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# add custom target distclean -# cleans and removes cmake generated files etc. -# Jan Woetzel 04/2003 -# - -IF (UNIX) - ADD_CUSTOM_TARGET (distclean @echo cleaning for source distribution) - SET(DISTCLEANED - cmake.depends - cmake.check_depends - CMakeCache.txt - */CMakeCache.txt - cmake.check_cache - *.cmake - *.a - */*.a - Makefile - crux/Makefile - ezcl/Makefile - genmalloc/Makefile - graphics/Makefile - hash/Makefile - l7/Makefile - MallocPlus/Makefile - memstats/Makefile - powerstats/Makefile - mesh/Makefile - mesh/hsfc/Makefile - mesh/kdtree/Makefile - mesh/zorder/Makefile - PowerParser/Makefile - s7/Makefile - timer/Makefile - tests/Makefile - */tests/Makefile - core core.* - gmon.out - *~ - CMakeFiles - */CMakeFiles - */*/CMakeFiles - */CTestTestfile.cmake - */*/CTestTestfile.cmake - */Testing - */*/Testing - Testing - cmake_install.cmake - */cmake_install.cmake - */*/cmake_install.cmake - install_manifest.txt - */install_manifest.txt - *.dSYM - */*.dSYM - */*/*.dSYM - tests/testing - ) - - ADD_CUSTOM_COMMAND( - DEPENDS clean - COMMENT "distribution clean" - COMMAND rm - ARGS -Rf CMakeTmp ${DISTCLEANED} - TARGET distclean - ) -ENDIF(UNIX) diff --git a/elements/elements.cpp b/elements/elements.cpp index ec546b01..3ef5ce9f 100644 --- a/elements/elements.cpp +++ b/elements/elements.cpp @@ -49,13 +49,14 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************************/ - #include // std::cout etc. #include #include "utilities.h" #include "elements.h" +#include "bernstein_polynomials.h" +#define bern // specify bern for bernstein or lagr for lagrange #define EPSILON 1.0e-12 using namespace utils; @@ -2274,7 +2275,7 @@ void jacobian_inverse_4d( // creates nodal positions with Chebyshev spacing void chebyshev_nodes_1D( - ViewCArray &cheb_nodes_1D, // Chebyshev nodes + CArray &cheb_nodes_1D, // Chebyshev nodes (changed to CArray from ViewCArray --steven ) const int &order){ // Interpolation order real_t pi = 3.14159265358979323846; @@ -2645,8 +2646,12 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ } auto node_basis = CArray (num_ref_verts_in_elem_); - - elem.basis(node_basis, point); + + #ifdef bern + elem.bernstein_basis(node_basis, point); + #else + elem.basis(node_basis, point); + #endif for(int vert_rlid = 0; vert_rlid < num_ref_verts_in_elem_; vert_rlid++){ @@ -2672,10 +2677,17 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ for(int dim = 0; dim < 3; dim++){ point(dim) = ref_node_positions_(node_lid, dim); } - - elem.partial_xi_basis(partial_xi, point); - elem.partial_eta_basis(partial_eta, point); - elem.partial_mu_basis(partial_mu, point); + + + #ifdef bern + elem.bernstein_partial_xi_basis(partial_xi, point); + elem.bernstein_partial_eta_basis(partial_eta, point); + elem.bernstein_partial_mu_basis(partial_mu, point); + #else + elem.partial_xi_basis(partial_xi, point); + elem.partial_eta_basis(partial_eta, point); + elem.partial_mu_basis(partial_mu, point); + #endif for(int basis_id = 0; basis_id < num_ref_verts_in_elem_; basis_id++){ @@ -2917,8 +2929,13 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ auto patch_basis = CArray (num_ref_verts_in_elem_); // calculate the patch basis function values at the point, for each vertex - elem.basis(patch_basis, point); - + + #ifdef bern + elem.bernstein_basis(patch_basis, point); + #else + elem.basis(patch_basis, point); + #endif + // save the basis values at the patch for each vertex for(int vert_rlid = 0; vert_rlid < num_ref_verts_in_elem_; vert_rlid++){ ref_patch_basis_(patch_rlid, vert_rlid) = patch_basis(vert_rlid); @@ -2942,10 +2959,17 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ } // calculate the partials at the patch location for each vertex - elem.partial_xi_basis(partial_xi, point); - elem.partial_eta_basis(partial_eta, point); - elem.partial_mu_basis(partial_mu, point); - + + #ifdef bern + elem.bernstein_partial_xi_basis(partial_xi, point); + elem.bernstein_partial_eta_basis(partial_eta, point); + elem.bernstein_partial_mu_basis(partial_mu, point); + #else + elem.partial_xi_basis(partial_xi, point); + elem.partial_eta_basis(partial_eta, point); + elem.partial_mu_basis(partial_mu, point); + #endif + // loop over the basis polynomials where there is one from each vertex for(int basis_id = 0; basis_id < num_ref_verts_in_elem_; basis_id++){ @@ -2978,8 +3002,12 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ auto cell_basis = CArray (num_ref_verts_in_elem_); // calculate the cell basis function values at the point, for each vertex - elem.basis(cell_basis, point); - + #ifdef bern + elem.bernstein_basis(cell_basis, point); + #else + elem.basis(cell_basis, point); + #endif + // save the basis values at the patch for each vertex for(int vert_rlid = 0; vert_rlid < num_ref_verts_in_elem_; vert_rlid++){ ref_cell_basis_(cell_rlid, vert_rlid) = cell_basis(vert_rlid); @@ -3002,10 +3030,16 @@ void ref_element::init(int p_order, int num_dim, HexN &elem){ } // calculate the partials at the cell location for each vertex - elem.partial_xi_basis(partial_xi, point); - elem.partial_eta_basis(partial_eta, point); - elem.partial_mu_basis(partial_mu, point); - + #ifdef bern + elem.bernstein_partial_xi_basis(partial_xi, point); + elem.bernstein_partial_eta_basis(partial_eta, point); + elem.bernstein_partial_mu_basis(partial_mu, point); + #else + elem.partial_xi_basis(partial_xi, point); + elem.partial_eta_basis(partial_eta, point); + elem.partial_mu_basis(partial_mu, point); + #endif + // loop over the basis polynomials where there is one from each vertex for(int basis_id = 0; basis_id < num_ref_verts_in_elem_; basis_id++){ @@ -4018,7 +4052,7 @@ void QuadN::lagrange_1D( } // end of Legrange_1D function -// Corners of Lagrange element for mapping +// Corners of Lagrange element for mappintein_basis void QuadN::corners ( ViewCArray &lag_nodes, // Nodes of Lagrange elements ViewCArray &lag_corner, // corner nodes of QuadN element @@ -5754,7 +5788,7 @@ representative linear element for visualization } // end looping over nodes != vert_i - // writing value to vectors for later use + // writing value to vectors for later use interp(vert_i) = interpolant; // Interpolant value at given point } // end loop over all nodes @@ -5771,10 +5805,10 @@ representative linear element for visualization real_t num_gradient = 0.0; // placeholder for numerator of the gradient real_t gradient = 0.0; - for(int vert_j = 0; vert_j < num_verts_1d_; vert_j++){ + //for(int vert_j = 0; vert_j < num_verts_1d_; vert_j++){ // std::cout<<"HexN 1D Vert "< &cheb_nodes_1D, // Chebyshev nodes + CArray &cheb_nodes_1D, // Chebyshev nodes (changed to CArray from ViewCArray --steven) const int &order); // Interpolation order @@ -1035,6 +1035,37 @@ class Hex8: public Element3D { CArray &partials, //derivative const real_t &x_point); // point of interest in element + void bernstein_basis( + CArray &basis, + CArray &point); + + void bernstein_build_nodal_gradient( + CArray &gradient); + + void bernstein_partial_xi_basis( + CArray &partial_xi, + CArray &point); + + void bernstein_partial_eta_basis( + CArray &partial_eta, + CArray &point); + + void bernstein_partial_mu_basis( + CArray &partial_mu, + CArray &point); + + void bernstein_basis_1D( + CArray &interp, + const real_t &x_point); + + void bernstein_derivative_1D( + CArray &partials, + const real_t &x_point); + + + + + void create_lobatto_nodes(int element_order); }; @@ -1153,7 +1184,7 @@ namespace elem_types Hex8 = 4, // 8 node lagrangian hexahedral element Hex20 = 5, // 20 node serendipity hexahedral element Hex32 = 6, // 32 node serendipity hexahedral element - HexN = 7 // N node lagrangian hexahedral element + HexN = 7 // N node lagrangian or bernstein hexahedral element // add tesseract element diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0817543d..5c211e27 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,3 +3,8 @@ add_subdirectory(average) # "VTK" example, demonstration of visualization using VTK/Paraview add_subdirectory(vtk) + +# "hexref" example, demo of MATAR-Kokkoified hexahedral reference element +if (MATAR_WITH_KOKKOS) + add_subdirectory(hexref) +endif() diff --git a/examples/hexref/CMakeLists.txt b/examples/hexref/CMakeLists.txt new file mode 100644 index 00000000..348556d4 --- /dev/null +++ b/examples/hexref/CMakeLists.txt @@ -0,0 +1,10 @@ +add_executable(hexref hexref.cpp) +target_compile_definitions( + hexref + PUBLIC + MATAR_WITH_KOKKOS + HAVE_KOKKOS=1 + HAVE_OPENMP=1 +) +target_include_directories(hexref PUBLIC ${ELEMENTS_KOKKOS_INCLUDES}) +target_link_libraries(hexref PUBLIC elements_kokkos) diff --git a/examples/hexref/hexref.cpp b/examples/hexref/hexref.cpp new file mode 100644 index 00000000..7c781e08 --- /dev/null +++ b/examples/hexref/hexref.cpp @@ -0,0 +1,30 @@ +#include "HexRef.h" + +#include "Kokkos_Core.hpp" + +#include + + +int main(int argc, char *argv[]) { + Kokkos::initialize(argc, argv); + int order_basis = 2; + BasisType type_basis = BasisType::LAGRANGE; + HexRef elem(order_basis, type_basis); + + // Print out the quadrature points pertaining the the cells (subelements) + for (int i = 0; i < elem.num_ref_cells_in_elem(); ++i) { + std::cout << elem.ref_cell_positions(i, 0) << " " + << elem.ref_cell_positions(i, 1) << " " + << elem.ref_cell_positions(i, 2) << " " + << std::endl; + } + + // Print out the basis function evaluations on the cell (subelements) + for (int i = 0; i < elem.num_ref_cells_in_elem(); ++i) { + for (int j = 0; j < elem.num_basis(); ++j) + std::cout << elem.ref_cell_basis(i, j) << " "; + std::cout << std::endl; + } + + return 0; +} diff --git a/examples/vtk/CMakeLists.txt b/examples/vtk/CMakeLists.txt index f8709766..3da48aa4 100644 --- a/examples/vtk/CMakeLists.txt +++ b/examples/vtk/CMakeLists.txt @@ -2,10 +2,10 @@ if (WITH_VTK AND VTK_FOUND) add_executable( plotting_jacobian_determinants - plotting_jacobian_determinants.cpp + plotting_jacobian_determinants.cpp ) target_link_libraries( plotting_jacobian_determinants - elements swage swage2vtk common + elements swage swage2vtk common ) endif() diff --git a/swage-kokkos/include/HexMesh.h b/swage-kokkos/include/HexMesh.h new file mode 100644 index 00000000..4bb5833d --- /dev/null +++ b/swage-kokkos/include/HexMesh.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +struct HexMesh { + enum { ND = 3 }; + + CArrayKokkos cpts; + + HexMesh() { + } + +}; diff --git a/swage-kokkos/include/vect2d.h b/swage-kokkos/include/vect2d.h new file mode 100644 index 00000000..721961bf --- /dev/null +++ b/swage-kokkos/include/vect2d.h @@ -0,0 +1,31 @@ +#pragma once + +template +struct Vector2D { + T x, y; + +public: + Vector2D(T _x=0, T _y=0) + : x(_x), y(_y) { } + +public: + T & operator[](const int idx) { + assert(idx<2); + return (&x)[idx]; + } + const T & operator[](const int idx) const { + assert(idx<2); + return (&x)[idx]; + } + T & operator()(const int idx) { + assert(idx<2); + return (&x)[idx]; + } + const T & operator()(const int idx) const { + assert(idx<2); + return (&x)[idx]; + } +}; + +typedef Vector2D vect2d; +typedef Vector2D vect2f; diff --git a/swage-kokkos/include/vect3d.h b/swage-kokkos/include/vect3d.h new file mode 100644 index 00000000..c601520a --- /dev/null +++ b/swage-kokkos/include/vect3d.h @@ -0,0 +1,59 @@ +#pragma once + +template +struct Vector3D { + T x, y, z; + +public: + Vector3D(T _x=0, T _y=0, T _z=0) + : x(_x), y(_y), z(_z) { } + +public: + T & operator[](const int idx) { + assert(idx<3); + return (&x)[idx]; + } + const T & operator[](const int idx) const { + assert(idx<3); + return (&x)[idx]; + } + T & operator()(const int idx) { + assert(idx<3); + return (&x)[idx]; + } + const T & operator()(const int idx) const { + assert(idx<3); + return (&x)[idx]; + } + +public: + template + Vector3D & operator=(const Vector3D & rhs) { + x = rhs.x; y = rhs.y; z = rhs.z; + return *this; + } + template + Vector3D & operator+=(const Vector3D & rhs) { + x += rhs.x; y += rhs.y; z += rhs.z; + return *this; + } + template + Vector3D & operator-=(const Vector3D & rhs) { + x -= rhs.x; y -= rhs.y; z -= rhs.z; + return *this; + } + template + Vector3D & operator*=(const S & rhs) { + x *= rhs; y *= rhs; z *= rhs; + return *this; + } + template + Vector3D & operator/=(const S & rhs) { + const T den(1.0/rhs); + x *= den; y *= den; z *= den; + return *this; + } +}; + +typedef Vector3D vect3d; +typedef Vector3D vect3f; diff --git a/swage-kokkos/include/vect4d.h b/swage-kokkos/include/vect4d.h new file mode 100644 index 00000000..be821e1a --- /dev/null +++ b/swage-kokkos/include/vect4d.h @@ -0,0 +1,32 @@ +#pragma once + +template +struct Vector4D { + T x, y, z, w; + +public: + Vector4D(T _x=0, T _y=0, T _z=0, T _w=0) + : x(_x), y(_y), z(_z), w(_w) { } + +public: + T & operator[](const int idx) { + assert(idx<4); + return (&x)[idx]; + } + const T & operator[](const int idx) const { + assert(idx<4); + return (&x)[idx]; + } + T & operator()(const int idx) { + assert(idx<4); + return (&x)[idx]; + } + const T & operator()(const int idx) const { + assert(idx<4); + return (&x)[idx]; + } +}; + + +typedef Vector4D vect4d; +typedef Vector4D vect4f; diff --git a/swage-kokkos/src/HexMesh.cpp b/swage-kokkos/src/HexMesh.cpp new file mode 100644 index 00000000..24fb0073 --- /dev/null +++ b/swage-kokkos/src/HexMesh.cpp @@ -0,0 +1,2 @@ +#include + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bedfa95e..399ebf49 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -76,3 +76,13 @@ if (ENABLE_BLAS_LAPACK) ${LAPACK_LIBRARIES} ) endif() + +#test Bernstein polynomials +add_executable( + test_bernstein_polynomials + test_bernstein_polynomials.cpp +) +target_link_libraries( + test_bernstein_polynomials + elements +) diff --git a/tests/test_bernstein_polynomials.cpp b/tests/test_bernstein_polynomials.cpp new file mode 100644 index 00000000..14c16cd5 --- /dev/null +++ b/tests/test_bernstein_polynomials.cpp @@ -0,0 +1,209 @@ +#include "bernstein_polynomials.h" +#include "point_distributions.h" + +#include +#include + +// Test Parameters +template +struct TestParams { + SizeType Np; + SizeType N; + + NumType X; + NumType *c; + + TestParams(SizeType order){ + Np = order; + N = Np + 1; + + // Generating coefficients for bernstein polynomial expansion + c = new NumType[N]; + for (SizeType i = 0; i &p){ + Real tol = 1e-15; + + bool pass = false; + + Real sum1 = 0.0; + for (SizeType i = 0; i < p.N; i++){ + Real Bi = bernstein::eval(p.N, i, p.X); + sum1 += p.c[i]*Bi; + }; + + Real sum2 = bernstein::eval_approx(p.N, p.c, p.X); + + Real rel_error = common::abs((sum1-sum2)/sum1); + + if (rel_error < tol ){ + pass = true; + } + else{ + std::cout << "Test 1, error: " << rel_error << std::endl; + } + + return pass; +}; + +// Test 2: Checking consistency of the first derivative of the polynomials with derivative of the Bernstein Approximation + +bool test2(TestParams &p) { + Real tol = 1e-15; + + bool pass = false; + Real sum1 = 0.0; + + for (SizeType i = 0; i < p.N; i++) { + Real dBi = bernstein::eval_der(p.N, i, p.X); + sum1 += p.c[i]*dBi; + } + + Real sum2 = bernstein::eval_der_approx(p.N, p.c, p.X); + + Real rel_error = common::abs((sum1 - sum2)/sum1); + + if (rel_error < tol) { + pass = true; + } + else { + std::cout << "Test 2, error: " << rel_error << std::endl; + } + + return pass; +} + +// Test 3: This test ensures the a property of the bernstein polynomials where the +// the sum of the nth degree polynomials from v equals 0 to n-1 is equal to 1. + +bool test3(TestParams &p){ + Real tol = 1e-15; + bool pass = false; + + // Establishes random number between -1 and 1 to use for each Bernstein test + Real Zl = -1.0; + Real Zr = 1.0; + + Real rand_X = Zl + (Zr - Zl)*Real(rand())/Real(RAND_MAX); + + // Sums bernstein polynomials at value rand_X + Real sum = 0.0; + for (SizeType i = 0; i < p.N; i++) { + sum += bernstein::eval(p.N, i, rand_X); + } + Real error = common::abs( sum - 1 ); + if (error< tol) { + pass = true; + } + else { + std::cout << "Test 3, error: " << error << std::endl; + } + + return pass; +} + +// Test 4: Checks for the positivity of the Bernstein polynomials generated by the code + +bool test4(TestParams &p){ + + bool pass = false; + SizeType NumTests = 1000; + for ( SizeType i = 0; i < NumTests + 1; i ++){ + + Real Zl = -1.0; + Real Zr = 1.0; + + Real rand_X = Zl + (Zr - Zl)*Real(rand())/Real(RAND_MAX); + for (SizeType j=0; j < p.N; j++){ + Real val = bernstein::eval(p.N, j, rand_X); + if (0 <= val){ + pass = true; + } + else { + std::cout << "Test 4 Error: At least one of the values tested is negative" << std::endl; + } + } + } + return pass; +} + +bool test5(TestParams &p){ + Real tol = 1e-15; + bool pass = false; + + Real Zl = -1.0; + Real Zr = 1.0; + + Real rand_X = Zl + (Zr - Zl)*Real(rand())/Real(RAND_MAX); + + //Sums bernstein polynomial derivatives at value rand_X + Real sum = 0.0; + for (SizeType i = 0; i < p.N; i++) { + sum += bernstein::eval_der(p.N, i, rand_X); + } + Real error = common::abs( sum ); + if (error< tol) { + pass = true; + } + else { + std::cout << "Test 3, error: " << error << std::endl; + } + return pass; +} + + int main(){ + std::cout.precision(15); + + std::cout << "TEST BERNSTEIN POLYNOMIALS\n" + <<"----------------------------" + << std::endl; + + // Initialize Random Seed + srand(time(NULL)); + + // Generate the Parameters + TestParams rp(8); + + // Run Test 1 + bool pass1 = test1(rp); + std::cout << "Test 1 " << (pass1 ? "PASSED" : "FAILED") << std::endl; + + // Run Test 2 + bool pass2 = test2(rp); + std::cout << "Test 2 " << (pass2 ? "PASSED" : "FAILED") << std::endl; + + // Run Test 3 + bool pass3 = test3(rp); + std::cout << "Test 3 " << (pass3 ? "PASSED" : "FAILED") << std::endl; + + // Run Test 4 + bool pass4 = test4(rp); + std::cout << "Test 4 " << (pass4 ? "PASSED" : "FAILED") << std::endl; + bool pass5 = test5(rp); + std::cout << "Test 5 " << (pass5 ? "PASSED" : "FAILED") << std::endl; + + std::cout << std::endl; + std::cout << "PASSED " + << int(pass1) + int(pass2) + int(pass3) + int(pass4) + int(pass5) + << "/5" << std::endl; + + return 0; + + } + + +