// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced // at the Lawrence Livermore National Laboratory. All Rights reserved. See files // LICENSE and NOTICE for details. LLNL-CODE-806117. // // This file is part of the MFEM library. For more information and source code // availability visit https://mfem.org. // // MFEM is free software; you can redistribute it and/or modify it under the // terms of the BSD-3 license. We welcome feedback and contributions, see file // CONTRIBUTING.md for details. #ifndef MFEM_AMGX_SOLVER #define MFEM_AMGX_SOLVER #include "../config/config.hpp" #ifdef MFEM_USE_AMGX #include #ifdef MFEM_USE_MPI #include #include "hypre.hpp" #else #include "operator.hpp" #include "sparsemat.hpp" #endif namespace mfem { /** MFEM wrapper for Nvidia's multigrid library, AmgX (github.com/NVIDIA/AMGX) AmgX requires building MFEM with CUDA, and AMGX enabled. For distributed memory parallism, MPI and Hypre (version 16.0+) are also required. Although CUDA is required for building, the AmgX solver is compatible with a MFEM CPU device configuration. The AmgXSolver class is designed to work as a solver or preconditioner for existing MFEM solvers. The AmgX solver class may be configured in one of three ways: Serial - Takes a SparseMatrix solves on a single GPU and assumes no MPI communication. Exclusive GPU - Takes a HypreParMatrix and assumes each MPI rank is paired with an Nvidia GPU. MPI Teams - Takes a HypreParMatrix and enables flexibility between number of MPI ranks, and GPUs. Specifically, MPI ranks are grouped with GPUs and a matrix consolidation step is taken so the MPI root of each team performs the necessary AmgX library calls. The solution is then broadcasted to appropriate ranks. This is particularly useful when configuring MFEM's device as CPU. This work is based on the AmgXWrapper of Chuang and Barba. Routines were adopted and modified for setting up MPI communicators. Examples 1/1p in the examples/amgx directory demonstrate configuring the wrapper as a solver and preconditioner, as well as configuring and running with exclusive GPU or MPI teams modes. This work is partially based on: Pi-Yueh Chuang and Lorena A. Barba (2017). AmgXWrapper: An interface between PETSc and the NVIDIA AmgX library. J. Open Source Software, 2(16):280, doi:10.21105/joss.00280 See https://github.com/barbagroup/AmgXWrapper. */ class AmgXSolver : public Solver { public: /// Flags to configure AmgXSolver as a solver or preconditioner enum AMGX_MODE {SOLVER, PRECONDITIONER}; /// Flag to check for convergence bool ConvergenceCheck; /** Flags to determine whether user solver settings are defined internally in the source code or will be read through an external JSON file. */ enum CONFIG_SRC {INTERNAL, EXTERNAL, UNDEFINED}; AmgXSolver(); /** Configures AmgX with a default configuration based on the AmgX mode, and verbosity. Assumes no MPI parallism. */ AmgXSolver(const AMGX_MODE amgxMode_, const bool verbose); /** Once the solver configuration has been established through either the ReadParameters method or the constructor, InitSerial will initialize the library. If configuring with constructor, the constructor will make this call. */ void InitSerial(); #ifdef MFEM_USE_MPI /** Configures AmgX with a default configuration based on the AmgX mode, and verbosity. Pairs each MPI rank with one GPU. */ AmgXSolver(const MPI_Comm &comm, const AMGX_MODE amgxMode_, const bool verbose); /** Configures AmgX with a default configuration based on the AmgX mode, and verbosity. Creates MPI teams around GPUs to support MPI ranks > GPUs. Consolidates linear solver data to avoid multiple ranks sharing GPUs. Requires specifying number of devices in each compute node. */ AmgXSolver(const MPI_Comm &comm, const int nDevs, const AMGX_MODE amgx_Mode_, const bool verbose); /** Once the solver configuration has been established, either through the constructor or the ReadParameters method, InitSerial will initialize the library. If configuring with constructor, the constructor will make this call. */ void InitExclusiveGPU(const MPI_Comm &comm); /** Once the solver configuration has been established, either through the ReadParameters method, InitMPITeams will initialize the library and create MPI teams based on the number of devices on each node (nDevs). If configuring with constructor, the constructor will make this call. */ void InitMPITeams(const MPI_Comm &comm, const int nDevs); #endif /** Sets Operator for AmgX library, either MFEM SparseMatrix or HypreParMatrix */ virtual void SetOperator(const Operator &op); /** Replaces the matrix coefficients in the AmgX solver. */ void UpdateOperator(const Operator &op); virtual void Mult(const Vector& b, Vector& x) const; int GetNumIterations(); void ReadParameters(const std::string config, CONFIG_SRC source); /** @param [in] amgxMode_ AmgXSolver::PRECONDITIONER, AmgXSolver::SOLVER. @param [in] verbose true, false. Specifies the level of verbosity. When configured as a preconditioner, the default configuration is to apply two iterations of an AMG V cycle with AmgX's default smoother (block Jacobi). As a solver the preconditioned conjugate gradient method is used. The AMG V-cycle with a block Jacobi smoother is used as a preconditioner. */ void DefaultParameters(const AMGX_MODE amgxMode_, const bool verbose); /// Add a check for convergence after applying Mult. void SetConvergenceCheck(bool setConvergenceCheck_=true); ~AmgXSolver(); void Finalize(); private: AMGX_MODE amgxMode; std::string amgx_config = ""; CONFIG_SRC configSrc = UNDEFINED; #ifdef MFEM_USE_MPI // Consolidates matrix diagonal and off diagonal data and uploads matrix to // AmgX. void SetMatrixMPIGPUExclusive(const HypreParMatrix &A, const Array &loc_A, const Array &loc_I, const Array &loc_J, const bool update_mat = false); // Consolidates matrix diagonal and off diagonal data for all ranks in an MPI // team. Root rank of each MPI team holds the the consolidated data and sets // matrix. void SetMatrixMPITeams(const HypreParMatrix &A, const Array &loc_A, const Array &loc_I, const Array &loc_J, const bool update_mat = false); // The following methods consolidate array data to the root node in a MPI // team. void GatherArray(const Array &inArr, Array &outArr, const int mpiTeamSz, const MPI_Comm &mpiTeam) const; void GatherArray(const Vector &inArr, Vector &outArr, const int mpiTeamSz, const MPI_Comm &mpiTeam) const; void GatherArray(const Array &inArr, Array &outArr, const int mpiTeamSz, const MPI_Comm &mpiTeam) const; void GatherArray(const Array &inArr, Array &outArr, const int mpiTeamSz, const MPI_Comm &mpiTeam) const; // The following methods consolidate array data to the root node in a MPI // team as well as store array partitions and displacements. void GatherArray(const Vector &inArr, Vector &outArr, const int mpiTeamSz, const MPI_Comm &mpiTeamComm, Array &Apart, Array &Adisp) const; void ScatterArray(const Vector &inArr, Vector &outArr, const int mpiTeamSz, const MPI_Comm &mpi_comm, Array &Apart, Array &Adisp) const; void SetMatrix(const HypreParMatrix &A, const bool update_mat = false); #endif void SetMatrix(const SparseMatrix &A, const bool update_mat = false); static int count; // Indicate if this instance has been initialized. bool isInitialized = false; #ifdef MFEM_USE_MPI // The name of the node that this MPI process belongs to. std::string nodeName; // Number of local GPU devices used by AmgX. int nDevs; // The ID of corresponding GPU device used by this MPI process. int devID; // A flag indicating if this process will invoke AmgX int gpuProc = MPI_UNDEFINED; // Communicator for all MPI ranks MPI_Comm globalCpuWorld = MPI_COMM_NULL; // Communicator for ranks in same node MPI_Comm localCpuWorld; // Communicator for ranks sharing a device MPI_Comm devWorld; // A communicator for MPI processes that will launch AmgX (root of devWorld) MPI_Comm gpuWorld; // Global number of MPI procs + rank id int globalSize; int myGlobalRank; // Total number of MPI procs in a node + rank id int localSize; int myLocalRank; // Total number of MPI ranks sharing a device + rank id int devWorldSize; int myDevWorldRank; // Total number of MPI procs calling AmgX + rank id int gpuWorldSize; int myGpuWorldRank; #endif // A parameter used by AmgX. int ring; // Sets AmgX precision (currently on double is supported) AMGX_Mode precision_mode = AMGX_mode_dDDI; // AmgX config object. AMGX_config_handle cfg = nullptr; // AmgX matrix object. AMGX_matrix_handle AmgXA = nullptr; // AmgX vector object representing unknowns. AMGX_vector_handle AmgXP = nullptr; // AmgX vector object representing RHS. AMGX_vector_handle AmgXRHS = nullptr; // AmgX solver object. AMGX_solver_handle solver = nullptr; // AmgX resource object. static AMGX_resources_handle rsrc; // Set the ID of the corresponding GPU used by this process. void SetDeviceIDs(const int nDevs); // Initialize all MPI communicators. #ifdef MFEM_USE_MPI void InitMPIcomms(const MPI_Comm &comm, const int nDevs); #endif void InitAmgX(); // Row partition for the HypreParMatrix int64_t mat_local_rows; std::string mpi_gpu_mode; }; } #endif // MFEM_USE_AMGX #endif // MFEM_AMGX_SOLVER