Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scalapack usable with distarrays of btas (and other) Tiles. #241

Merged
merged 2 commits into from
Jan 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 46 additions & 15 deletions src/TiledArray/math/linalg/scalapack/block_cyclic.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
#include <TiledArray/config.h>
#if TILEDARRAY_HAS_SCALAPACK

#include <TiledArray/conversions/make_array.h>
#include <TiledArray/dist_array.h>
#include <TiledArray/error.h>
#include <TiledArray/tensor.h>
#include <TiledArray/tiled_range.h>
#include <TiledArray/conversions/make_array.h>

#include <blacspp/grid.hpp>
#include <blacspp/information.hpp>
Expand All @@ -58,7 +58,10 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
col_major_mat_t local_mat_; ///< Local block cyclic buffer
std::pair<size_t, size_t> dims_; ///< Dims of the matrix

void put_tile(const Tensor<T>& tile) {
template <typename Tile,
typename = std::enable_if_t<
TiledArray::detail::is_contiguous_tensor_v<Tile>>>
void put_tile(const Tile& tile) {
// Extract Tile information
const auto* lo = tile.range().lobound_data();
const auto* up = tile.range().upbound_data();
Expand Down Expand Up @@ -104,17 +107,33 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {

} else {
// Send the subblock to a remote rank for processing
Tensor<T> subblock(tile.block({i, j}, {i_last, j_last}));
world_base_t::send(owner(i, j), &BlockCyclicMatrix<T>::put_tile,
subblock);
Tensor<T> subblock;
if constexpr (TiledArray::detail::is_ta_tensor_v<Tile>)
subblock = tile.block({i, j}, {i_last, j_last});
else {
auto tile_blk_range = TiledArray::BlockRange(
TiledArray::detail::make_ta_range(tile.range()), {i, j},
{i_last, j_last});
using std::data;
auto tile_blk_view =
TiledArray::make_const_map(data(tile), tile_blk_range);
subblock = tile_blk_view;
}
world_base_t::send(
owner(i, j),
&BlockCyclicMatrix<T>::template put_tile<decltype(subblock)>,
subblock);
}

} // for (j)
} // for (i)

} // put_tile

Tensor<T> extract_submatrix(std::vector<size_t> lo, std::vector<size_t> up) {
template <typename Tile,
typename = std::enable_if_t<
TiledArray::detail::is_contiguous_tensor_v<Tile>>>
Tile extract_submatrix(std::vector<size_t> lo, std::vector<size_t> up) {
assert(bc_dist_.i_own(lo[0], lo[1]));

auto [i_st, j_st] = bc_dist_.local_indx(lo[0], lo[1]);
Expand All @@ -123,7 +142,7 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
auto j_extent = up[1] - lo[1];

Range range(lo, up);
Tensor<T> tile(range);
Tile tile(range);

auto tile_map = eigen_map(tile);

Expand Down Expand Up @@ -172,7 +191,7 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
array.trange().dim(1).extent(), MB, NB) {
TA_ASSERT(array.trange().rank() == 2);

for (auto it = array.begin(); it != array.end(); ++it) put_tile(*it);
for (auto it = array.begin(); it != array.end(); ++it) put_tile(it->get());
world_base_t::process_pending();
}

Expand All @@ -197,8 +216,9 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {

template <typename Array>
Array tensor_from_matrix(const TiledRange& trange) const {
auto construct_tile = [&](Tensor<T>& tile, const Range& range) {
tile = Tensor<T>(range);
using Tile = typename Array::value_type;
auto construct_tile = [&](Tile& tile, const Range& range) {
tile = Tile(range);

// Extract Tile information
const auto* lo = tile.range().lobound_data();
Expand Down Expand Up @@ -246,13 +266,24 @@ class BlockCyclicMatrix : public madness::WorldObject<BlockCyclicMatrix<T>> {
std::vector<size_t> lo{i, j};
std::vector<size_t> up{i_last, j_last};
madness::Future<Tensor<T>> remtile_fut = world_base_t::send(
owner(i, j), &BlockCyclicMatrix<T>::extract_submatrix, lo, up);

tile.block(lo, up) = remtile_fut.get();
owner(i, j),
&BlockCyclicMatrix<T>::template extract_submatrix<Tensor<T>>,
lo, up);

if constexpr (TiledArray::detail::is_ta_tensor_v<Tile>)
tile.block(lo, up) = remtile_fut.get();
else {
auto tile_blk_range = TiledArray::BlockRange(
TiledArray::detail::make_ta_range(tile.range()), lo, up);
using std::data;
auto tile_blk_view =
TiledArray::make_map(data(tile), tile_blk_range);
tile_blk_view = remtile_fut.get();
}
}
}

return tile.norm();
return norm(tile);
};

return make_array<Array>(world_base_t::get_world(), trange, construct_tile);
Expand Down Expand Up @@ -298,7 +329,7 @@ std::remove_cv_t<Array> block_cyclic_to_array(
return matrix.template tensor_from_matrix<std::remove_cv_t<Array>>(trange);
}

} // namespace TiledArray
} // namespace TiledArray::math::linalg::scalapack

#endif // TILEDARRAY_HAS_SCALAPACK
#endif // TILEDARRAY_MATH_LINALG_SCALAPACK_TO_BLOCKCYCLIC_H__INCLUDED
1 change: 1 addition & 0 deletions src/TiledArray/tensor/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ class Tensor {
template <typename T1,
typename std::enable_if<is_tensor<T1>::value>::type* = nullptr>
Tensor_& operator=(const T1& other) {
pimpl_ = std::make_shared<Impl>(detail::clone_range(other));
detail::inplace_tensor_op(
[](reference MADNESS_RESTRICT tr,
typename T1::const_reference MADNESS_RESTRICT t1) { tr = t1; },
Expand Down
93 changes: 59 additions & 34 deletions tests/linalg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ struct ReferenceFixture {
#endif
}

inline double make_ta_reference(TA::Tensor<double>& t,
TA::Range const& range) {
t = TA::Tensor<double>(range, 0.0);
template <typename Tile>
inline double make_ta_reference(Tile& t, TA::Range const& range) {
t = Tile(range, 0.0);
auto lo = range.lobound_data();
auto up = range.upbound_data();
for (auto m = lo[0]; m < up[0]; ++m) {
Expand All @@ -55,7 +55,7 @@ struct ReferenceFixture {
}
}

return t.norm();
return norm(t);
};

ReferenceFixture(int64_t N = 1000)
Expand Down Expand Up @@ -308,23 +308,35 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) {

auto trange = gen_trange(N, {static_cast<size_t>(NB)});

auto ref_ta = TA::make_array<TA::TSpArray<double>>(
*GlobalFixture::world, trange,
[this](TA::Tensor<double>& t, TA::Range const& range) -> double {
return this->make_ta_reference(t, range);
});
// test with TA and btas tile
using typelist_t =
std::tuple<TA::Tensor<double>, btas::Tensor<double, TA::Range>>;
typelist_t typevals;

GlobalFixture::world->gop.fence();
auto test_ta = scalapack::block_cyclic_to_array<TA::TSpArray<double>>(
ref_matrix, trange);
GlobalFixture::world->gop.fence();
auto test = [&](const auto& typeval_ref) {
using Tile = std::decay_t<decltype(typeval_ref)>;
using Array = TA::DistArray<Tile, TA::SparsePolicy>;

auto norm_diff =
(ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
auto ref_ta = TA::make_array<Array>(
*GlobalFixture::world, trange,
[this](Tile& t, TA::Range const& range) -> double {
return this->make_ta_reference(t, range);
});

BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
GlobalFixture::world->gop.fence();
auto test_ta = scalapack::block_cyclic_to_array<Array>(ref_matrix, trange);
GlobalFixture::world->gop.fence();

GlobalFixture::world->gop.fence();
auto norm_diff =
(ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();

BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());

GlobalFixture::world->gop.fence();
};

test(std::get<0>(typevals));
test(std::get<1>(typevals));
};

BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {
Expand All @@ -337,29 +349,42 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {

auto trange = gen_trange(N, {static_cast<size_t>(NB)});

auto ref_ta = TA::make_array<TA::TSpArray<double>>(
*GlobalFixture::world, trange,
[this](TA::Tensor<double>& t, TA::Range const& range) -> double {
return this->make_ta_reference(t, range);
});
// test with TA and btas tile
using typelist_t =
std::tuple<TA::Tensor<double>, btas::Tensor<double, TA::Range>>;
typelist_t typevals;

GlobalFixture::world->gop.fence();
auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
GlobalFixture::world->gop.fence();
auto test = [&](const auto& typeval_ref) {
using Tile = std::decay_t<decltype(typeval_ref)>;
using Array = TA::DistArray<Tile, TA::SparsePolicy>;

double local_norm_diff =
(test_matrix.local_mat() - ref_matrix.local_mat()).norm();
local_norm_diff *= local_norm_diff;
auto ref_ta = TA::make_array<Array>(
*GlobalFixture::world, trange,
[this](Tile& t, TA::Range const& range) -> double {
return this->make_ta_reference(t, range);
});

double norm_diff;
MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD);
GlobalFixture::world->gop.fence();
auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
GlobalFixture::world->gop.fence();

norm_diff = std::sqrt(norm_diff);
double local_norm_diff =
(test_matrix.local_mat() - ref_matrix.local_mat()).norm();
local_norm_diff *= local_norm_diff;

BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
double norm_diff;
MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD);

GlobalFixture::world->gop.fence();
norm_diff = std::sqrt(norm_diff);

BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());

GlobalFixture::world->gop.fence();
};

test(std::get<0>(typevals));
test(std::get<1>(typevals));
};

BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
Expand Down