Skip to content

Commit

Permalink
Hypre improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastiangrimberg committed Feb 8, 2024
1 parent e40c7c7 commit f6ea580
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 221 deletions.
71 changes: 27 additions & 44 deletions palace/fem/libceed/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,15 @@ void Operator::AssembleDiagonal(Vector &diag) const
{
Ceed ceed;
CeedMemType mem;
CeedScalar *data;
MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!");
diag = 0.0;
PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed));
PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
{
data = diag.ReadWrite();
}
else
{
data = diag.HostReadWrite();
mem = CEED_MEM_HOST;
}
auto *diag_data = diag.ReadWrite(mem == CEED_MEM_DEVICE);

PalacePragmaOmp(parallel if (op.size() > 1))
{
Expand All @@ -94,7 +89,7 @@ void Operator::AssembleDiagonal(Vector &diag) const
MFEM_ASSERT(id < op.size() && op[id],
"Out of bounds access for thread number " << id << "!");
PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, data));
PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, diag_data));
PalaceCeedCall(
ceed, CeedOperatorLinearAssembleAddDiagonal(op[id], v[id], CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed, CeedVectorTakeArray(v[id], mem, nullptr));
Expand All @@ -110,21 +105,14 @@ inline void CeedAddMult(const std::vector<CeedOperator> &op,
{
Ceed ceed;
CeedMemType mem;
const CeedScalar *x_data;
CeedScalar *y_data;
PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed));
PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
{
x_data = x.Read();
y_data = y.ReadWrite();
}
else
if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
{
x_data = x.HostRead();
y_data = y.HostReadWrite();
mem = CEED_MEM_HOST;
}
const auto *x_data = x.Read(mem == CEED_MEM_DEVICE);
auto *y_data = y.ReadWrite(mem == CEED_MEM_DEVICE);

PalacePragmaOmp(parallel if (op.size() > 1))
{
Expand Down Expand Up @@ -346,57 +334,52 @@ std::unique_ptr<hypre::HypreCSRMatrix> OperatorCOOtoCSR(Ceed ceed, CeedInt m, Ce
Jmap[k + 1] += Jmap[k];
}

// Construct and fill the final CSR matrix. This is correctly a non-OpenMP loop when
// executed on CPU (OpenMP is handled in outer scope above).
// Construct and fill the final CSR matrix. On GPU, MFEM and Hypre share the same memory
// space. On CPU, the inner nested OpenMP loop (if enabled in MFEM) should be ignored.
auto mat = std::make_unique<hypre::HypreCSRMatrix>(m, n, nnz_new);
const bool use_dev = (mat->GetMemoryLocation() == HYPRE_MEMORY_DEVICE);
{
const auto *d_I_old = I.Read(use_dev);
const auto *d_I_old = I.Read();
auto *d_I = mat->GetI();
mfem::forall_switch(use_dev, m + 1,
[=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; });
mfem::forall(m + 1, [=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; });
}
{
const auto *d_J_old = J.Read(use_dev);
const auto *d_J_old = J.Read();
auto *d_J = mat->GetJ();
mfem::forall_switch(use_dev, nnz_new,
[=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; });
mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; });
}
{
auto FillValues = [&](const double *vals_array)
{
const auto *d_perm = perm.Read(use_dev);
const auto *d_Jmap = Jmap.Read(use_dev);
const auto *d_perm = perm.Read();
const auto *d_Jmap = Jmap.Read();
auto *d_A = mat->GetData();
if (set)
{
mfem::forall_switch(use_dev, nnz_new,
[=] MFEM_HOST_DEVICE(int k)
{ d_A[k] = vals_array[d_perm[d_Jmap[k]]]; });
mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k)
{ d_A[k] = vals_array[d_perm[d_Jmap[k]]]; });
}
else
{
mfem::forall_switch(use_dev, nnz_new,
[=] MFEM_HOST_DEVICE(int k)
{
double sum = 0.0;
for (int p = d_Jmap[k]; p < d_Jmap[k + 1]; p++)
{
sum += vals_array[d_perm[p]];
}
d_A[k] = sum;
});
mfem::forall(nnz_new,
[=] MFEM_HOST_DEVICE(int k)
{
double sum = 0.0;
for (int p = d_Jmap[k]; p < d_Jmap[k + 1]; p++)
{
sum += vals_array[d_perm[p]];
}
d_A[k] = sum;
});
}
};
Ceed ceed;
const CeedScalar *vals_array;
PalaceCeedCallBackend(CeedVectorGetCeed(vals, &ceed));
PalaceCeedCall(ceed, CeedVectorGetArrayRead(vals, mem, &vals_array));
if (use_dev && mem != CEED_MEM_DEVICE)
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem != CEED_MEM_DEVICE)
{
// Copy values to device before filling.
Vector d_vals(nnz);
d_vals.UseDevice(true);
{
auto *d_vals_array = d_vals.HostWrite();
PalacePragmaOmp(parallel for schedule(static))
Expand Down
9 changes: 3 additions & 6 deletions palace/linalg/amg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print)
// Set additional BoomerAMG options.
int agg_levels = 1; // Number of aggressive coarsening levels
double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D)
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK))
{
HYPRE_MemoryLocation loc;
HYPRE_GetMemoryLocation(&loc);
if (loc == HYPRE_MEMORY_DEVICE) // Modify options for GPU-supported features
{
agg_levels = 0;
}
// Modify options for GPU-supported features.
agg_levels = 0;
}

SetAggressiveCoarsening(agg_levels);
Expand Down
15 changes: 6 additions & 9 deletions palace/linalg/ams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,13 @@ void HypreAmsSolver::InitializeSolver()
int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, 1 = l1-Jacobi, 16 = Chebyshev
double weight = 1.0;
double omega = 1.0;
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK))
{
HYPRE_MemoryLocation loc;
HYPRE_GetMemoryLocation(&loc);
if (loc == HYPRE_MEMORY_DEVICE) // Modify options for GPU-supported features
{
coarsen_type = 8;
amg_agg_levels = 0;
amg_relax_type = 18;
relax_type = 1;
}
// Modify options for GPU-supported features.
coarsen_type = 8;
amg_agg_levels = 0;
amg_relax_type = 18;
relax_type = 1;
}

HYPRE_AMSSetSmoothingOptions(ams, relax_type, ams_smooth_it, weight, omega);
Expand Down
5 changes: 4 additions & 1 deletion palace/linalg/divfree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l);
auto M_l = std::make_unique<ParOperator>(std::move(m_vec[l]), h1_fespace_l);
M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE);
if (l == h1_fespaces.GetNumLevels() - 1)
{
bdr_tdof_list_M = M_l->GetEssentialTrueDofs();
}
M_mg->AddOperator(std::move(M_l));
}
M = std::move(M_mg);
Expand All @@ -48,7 +52,6 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
h1_fespaces.GetFinestFESpace(), false);
}
Grad = &h1_fespaces.GetFinestFESpace().GetDiscreteInterpolator();
bdr_tdof_list_M = &h1_bdr_tdof_lists.back();

// The system matrix for the projection is real and SPD.
auto amg = std::make_unique<MfemWrapperSolver<Operator>>(
Expand Down
9 changes: 3 additions & 6 deletions palace/linalg/hypre.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,20 @@ void HypreVector::Update(const Vector &x)
{
vec = hypre_SeqVectorCreate(N);
hypre_SeqVectorSetDataOwner(vec, 0);
hypre_VectorData(vec) = const_cast<double *>(
x.Read(hypre_VectorMemoryLocation(vec) == HYPRE_MEMORY_DEVICE));
hypre_VectorData(vec) = const_cast<double *>(x.Read());
hypre_SeqVectorInitialize(vec);
}
else
{
hypre_SeqVectorSetSize(vec, N);
hypre_VectorData(vec) = const_cast<double *>(
x.Read(hypre_VectorMemoryLocation(vec) == HYPRE_MEMORY_DEVICE));
hypre_VectorData(vec) = const_cast<double *>(x.Read());
}
}

void HypreCSRMatrix::AssembleDiagonal(Vector &diag) const
{
diag.SetSize(height);
hypre_CSRMatrixExtractDiagonal(
mat, diag.Write(hypre_CSRMatrixMemoryLocation(mat) == HYPRE_MEMORY_DEVICE), 0);
hypre_CSRMatrixExtractDiagonal(mat, diag.Write(), 0);
}

namespace
Expand Down
11 changes: 9 additions & 2 deletions palace/linalg/hypre.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
namespace palace::hypre
{

// Helper function to initialize HYPRE and control use of GPU at runtime. This will call
// HYPRE_SetMemoryLocation and HYPRE_SetExecutionPolicy to match the mfem::Device
// configuration.
inline void Initialize()
{
mfem::Hypre::Init();
// HYPRE_SetSpGemmUseCusparse(1); // MFEM sets to zero, so leave as is for now
}

//
// Wrapper class for HYPRE's hypre_Vector, which can alias an mfem::Vector object for use
// with HYPRE.
Expand All @@ -25,7 +34,6 @@ class HypreVector
HypreVector(const Vector &x) : vec(nullptr) { Update(x); }
~HypreVector() { hypre_SeqVectorDestroy(vec); }

auto GetMemoryLocation() const { return hypre_VectorMemoryLocation(vec); }
auto Size() const { return hypre_VectorSize(vec); }

void Update(const Vector &x);
Expand Down Expand Up @@ -55,7 +63,6 @@ class HypreCSRMatrix : public palace::Operator
}
~HypreCSRMatrix() { hypre_CSRMatrixDestroy(mat); }

auto GetMemoryLocation() const { return hypre_CSRMatrixMemoryLocation(mat); }
auto NNZ() const { return hypre_CSRMatrixNumNonzeros(mat); }

const auto *GetI() const { return hypre_CSRMatrixI(mat); }
Expand Down
Loading

0 comments on commit f6ea580

Please sign in to comment.