From 95fc80b5ee3e776801231b95a7d1a5fea4c7e395 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 15 Sep 2024 11:16:34 -0400 Subject: [PATCH] btas <-> ta tensor conversions work for non-0-lobound --- src/TiledArray/conversions/btas.h | 248 ++++++++++++++++++++++++------ tests/CMakeLists.txt | 1 + tests/btas.cpp | 5 +- 3 files changed, 204 insertions(+), 50 deletions(-) diff --git a/src/TiledArray/conversions/btas.h b/src/TiledArray/conversions/btas.h index 28e5790e8f..ab07e97b53 100644 --- a/src/TiledArray/conversions/btas.h +++ b/src/TiledArray/conversions/btas.h @@ -36,6 +36,9 @@ #include #include +#include +#include + namespace TiledArray { // clang-format off @@ -49,11 +52,12 @@ namespace TiledArray { /// \tparam Storage_ The storage type of the source btas::Tensor object /// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, /// optionally wrapped into TiledArray::Tile) -/// \param[in] src The source object; its subblock defined by the {lower,upper} -/// bounds \c {dst.lobound(),dst.upbound()} will be copied to \c dst +/// \param[in] src The source object; its subblock +/// `{dst.lobound(),dst.upbound()}` +/// will be copied to \c dst /// \param[out] dst The object that will contain the contents of the /// corresponding subblock of src -/// \throw TiledArray::Exception When the dimensions of \c src and \c dst do not +/// \throw TiledArray::Exception When the dimensions of \p src and \p dst do not /// match. // clang-format on template @@ -73,6 +77,57 @@ inline void btas_subtensor_to_tensor( dst_view = src_view; } +// clang-format off +/// Copy a block of a btas::Tensor into a TiledArray::Tensor + +/// A block of btas::Tensor \c src will be copied into TiledArray::Tensor \c +/// dst. The block dimensions will be determined by the dimensions of the range +/// of \c dst . +/// \tparam T The tensor element type +/// \tparam Range_ The range type of the source btas::Tensor object +/// \tparam Storage_ The storage type of the source btas::Tensor object +/// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, +/// optionally wrapped into TiledArray::Tile) +/// \param[in] src The source object; its subblock +/// `{dst.lobound() + offset,dst.upbound() + offset}` +/// will be copied to \c dst +/// \param[out] dst The object that will contain the contents of the +/// corresponding subblock of src +/// \param[out] offset the offset to be applied to the coordinates of `dst.range()` to determine the block in \p src to be copied; this is needed if the DistArray that will contain \p dst will have a range whose lobound is different from `src.lobound()` +/// \throw TiledArray::Exception When the dimensions of \p src and \p dst do not +/// match. +// clang-format on +template < + typename T, typename Range_, typename Storage_, typename Tensor_, + typename IntegerRange, + typename = std::enable_if_t>> +inline void btas_subtensor_to_tensor( + const btas::Tensor& src, Tensor_& dst, + IntegerRange&& offset) { + TA_ASSERT(dst.range().rank() == src.range().rank()); + TA_ASSERT(ranges::size(offset) == src.range().rank()); + + const auto& src_range = src.range(); + const auto& dst_range = dst.range(); + auto src_blk_range = + TiledArray::BlockRange(detail::make_ta_range(src_range), + ranges::views::zip(dst_range.lobound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + }), + ranges::views::zip(dst_range.upbound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + })); + using std::data; + auto src_view = TiledArray::make_const_map(data(src), src_blk_range); + auto dst_view = TiledArray::make_map(data(dst), dst_range); + + dst_view = src_view; +} + // clang-format off /// Copy a TiledArray::Tensor into a block of a btas::Tensor @@ -86,8 +141,8 @@ inline void btas_subtensor_to_tensor( /// \tparam Storage_ The storage type of the destination btas::Tensor object /// \param[in] src The source object whose contents will be copied into /// a subblock of \c dst -/// \param[out] dst The destination object; its subblock defined by the -/// {lower,upper} bounds \c {src.lobound(),src.upbound()} will be +/// \param[out] dst The destination object; its subblock +/// `{src.lobound(),src.upbound()}` will be /// overwritten with the content of \c src /// \throw TiledArray::Exception When the dimensions /// of \c src and \c dst do not match. @@ -109,6 +164,57 @@ inline void tensor_to_btas_subtensor(const Tensor_& src, dst_view = src_view; } +// clang-format off +/// Copy a TiledArray::Tensor into a block of a btas::Tensor + +/// TiledArray::Tensor \c src will be copied into a block of btas::Tensor +/// \c dst. The block dimensions will be determined by the dimensions of the range +/// of \c src . +/// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, +/// optionally wrapped into TiledArray::Tile) +/// \tparam T The tensor element type +/// \tparam Range_ The range type of the destination btas::Tensor object +/// \tparam Storage_ The storage type of the destination btas::Tensor object +/// \param[in] src The source object whose contents will be copied into +/// a subblock of \c dst +/// \param[out] dst The destination object; its subblock +/// `{src.lobound()+offset,src.upbound()+offset}` will be +/// overwritten with the content of \c src +/// \param[out] offset the offset to be applied to the coordinates of `src.range()` to determine the block in \p dst to be copied; this is needed if the DistArray that contains \p src has a range whose lobound is different from `dst.lobound()` +/// \throw TiledArray::Exception When the dimensions +/// of \c src and \c dst do not match. +// clang-format on +template < + typename Tensor_, typename T, typename Range_, typename Storage_, + typename IntegerRange, + typename = std::enable_if_t>> +inline void tensor_to_btas_subtensor(const Tensor_& src, + btas::Tensor& dst, + IntegerRange&& offset) { + TA_ASSERT(dst.range().rank() == src.range().rank()); + TA_ASSERT(ranges::size(offset) == src.range().rank()); + + const auto& src_range = src.range(); + const auto& dst_range = dst.range(); + auto dst_blk_range = + TiledArray::BlockRange(detail::make_ta_range(dst_range), + ranges::views::zip(src_range.lobound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + }), + ranges::views::zip(src_range.upbound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + })); + using std::data; + auto src_view = TiledArray::make_const_map(data(src), src_range); + auto dst_view = TiledArray::make_map(data(dst), dst_blk_range); + + dst_view = src_view; +} + namespace detail { /// Task function for converting btas::Tensor subblock to a @@ -127,7 +233,13 @@ void counted_btas_subtensor_to_tensor(const BTAS_Tensor_* src, DistArray_* dst, const typename Range::index_type i, madness::AtomicInt* counter) { typename DistArray_::value_type tensor(dst->trange().make_tile_range(i)); - btas_subtensor_to_tensor(*src, tensor); + auto offset = ranges::views::zip(ranges::views::all(src->range().lobound()), + dst->trange().elements_range().lobound()) | + ranges::views::transform([](const auto& s_d) { + auto&& [s, d] = s_d; + return s - d; + }); + btas_subtensor_to_tensor(*src, tensor, offset); dst->set(i, tensor); (*counter)++; } @@ -137,12 +249,24 @@ void counted_btas_subtensor_to_tensor(const BTAS_Tensor_* src, DistArray_* dst, /// \tparam TA_Tensor_ a TiledArray::Tensor type /// \tparam BTAS_Tensor_ a btas::Tensor type /// \param src The source tensor -/// \param dst The destination tensor -/// \param counter The task counter -template -void counted_tensor_to_btas_subtensor(const TA_Tensor_& src, BTAS_Tensor_* dst, +/// \param src_array_lobound the lobound of the DistArrany that contains src, +/// used to compute the offset to be applied to the coordinates of `src.range()` +/// to determine the block in \p dst to be copied into \param dst The +/// destination tensor \param counter The task counter +template < + typename TA_Tensor_, typename BTAS_Tensor_, typename IntegerRange, + typename = std::enable_if_t>> +void counted_tensor_to_btas_subtensor(const TA_Tensor_& src, + IntegerRange src_array_lobound, + BTAS_Tensor_* dst, madness::AtomicInt* counter) { - tensor_to_btas_subtensor(src, *dst); + auto offset = ranges::views::zip(ranges::views::all(dst->range().lobound()), + src_array_lobound) | + ranges::views::transform([](const auto& d_s) { + auto&& [d, s] = d_s; + return d - s; + }); + tensor_to_btas_subtensor(src, *dst, offset); (*counter)++; } @@ -267,41 +391,14 @@ DistArray_ btas_tensor_to_array( return array; } -/// Convert a TiledArray::DistArray object into a btas::Tensor object +namespace detail { -/// This function will copy the contents of \c src into a \c btas::Tensor -/// object. The copy operation is done in parallel, and this function will block -/// until all elements of \c src have been copied into the result array tiles. -/// The size of \c src.world().size() must be equal to 1 or \c src must be a -/// replicated TiledArray::DistArray. Usage: -/// \code -/// TiledArray::TArrayD -/// array(world, trange); -/// // Set tiles of array ... -/// -/// auto t = array_to_btas_tensor(array); -/// \endcode -/// \tparam Tile the tile type of \c src -/// \tparam Policy the policy type of \c src -/// \tparam Range_ the range type of the result (either, btas::RangeNd or -/// TiledArray::Range) -/// \tparam Storage_ the storage type of the result -/// \param[in] src The TiledArray::DistArray object whose contents -/// will be copied to the result. -/// \return A \c btas::Tensor object that is a copy of \c src -/// \throw TiledArray::Exception When world size is greater than -/// 1 and \c src is not replicated -/// \param[in] target_rank the rank on which to create the BTAS tensor -/// containing the data of \c src ; if \c target_rank=-1 then -/// create the BTAS tensor on every rank (this requires -/// that \c src.is_replicated()==true ) -/// \return BTAS tensor object containing the data of \c src , if my rank equals -/// \c target_rank or \c target_rank==-1 , -/// default-initialized BTAS tensor otherwise. +/// \sa TiledArray::array_to_btas_tensor() template > -btas::Tensor array_to_btas_tensor( - const TiledArray::DistArray& src, int target_rank = -1) { +btas::Tensor +array_to_btas_tensor_impl(const TiledArray::DistArray& src, + const Range_& result_range, int target_rank) { // Test preconditions if (target_rank == -1 && src.world().size() > 1 && !src.pmap()->is_replicated()) @@ -314,13 +411,11 @@ btas::Tensor array_to_btas_tensor( using result_type = btas::Tensor::element_type, Range_, Storage_>; - using result_range_type = typename result_type::range_type; // Construct the result if (target_rank == -1 || src.world().rank() == target_rank) { // if array is sparse must initialize to zero - result_type result( - result_range_type(src.trange().elements_range().extent()), 0.0); + result_type result(result_range, 0.0); // Spawn tasks to copy array tiles to btas::Tensor madness::AtomicInt counter; @@ -329,8 +424,12 @@ btas::Tensor array_to_btas_tensor( for (std::size_t i = 0; i < src.size(); ++i) { if (!src.is_zero(i)) { src.world().taskq.add( - &detail::counted_tensor_to_btas_subtensor, - src.find(i), &result, &counter); + &detail::counted_tensor_to_btas_subtensor< + Tile, result_type, + std::decay_t< + decltype(src.trange().elements_range().lobound())>>, + src.find(i), src.trange().elements_range().lobound(), &result, + &counter); ++n; } } @@ -343,6 +442,59 @@ btas::Tensor array_to_btas_tensor( return result_type{}; } +} // namespace detail + +/// Convert a TiledArray::DistArray object into a btas::Tensor object + +/// This function will copy the contents of \c src into a \c btas::Tensor +/// object. The copy operation is done in parallel, and this function will block +/// until all elements of \c src have been copied into the result array tiles. +/// The size of \c src.world().size() must be equal to 1 or \c src must be a +/// replicated TiledArray::DistArray. Usage: +/// \code +/// TiledArray::TArrayD +/// array(world, trange); +/// // Set tiles of array ... +/// +/// auto t = array_to_btas_tensor(array); +/// \endcode +/// \tparam Tile the tile type of \c src +/// \tparam Policy the policy type of \c src +/// \tparam Range_ the range type of the result (either, btas::RangeNd or +/// TiledArray::Range) +/// \tparam Storage_ the storage type of the result +/// \param[in] src The TiledArray::DistArray object whose contents +/// will be copied to the result. +/// \param[in] target_rank the rank on which to create the BTAS tensor +/// containing the data of \c src ; if \c target_rank=-1 then +/// create the BTAS tensor on every rank (this requires +/// that \c src.is_replicated()==true ) +/// \return BTAS tensor object containing the data of \c src , if my rank equals +/// \c target_rank or \c target_rank==-1 , +/// default-initialized BTAS tensor otherwise. +/// \warning The range of \c src is +/// not preserved, i.e. the lobound of the result is zero. Use the +/// variant of this function tagged with preserve_lobound_t to +/// preserve the range. +/// \throw TiledArray::Exception When world size is greater than +/// 1 and \c src is not replicated +template > +btas::Tensor array_to_btas_tensor( + const TiledArray::DistArray& src, int target_rank = -1) { + return detail::array_to_btas_tensor_impl( + src, Range_(src.trange().elements_range().extent()), target_rank); +} + +template > +btas::Tensor array_to_btas_tensor( + const TiledArray::DistArray& src, preserve_lobound_t, + int target_rank = -1) { + return detail::array_to_btas_tensor_impl(src, src.trange().elements_range(), + target_rank); +} + } // namespace TiledArray #endif // TILEDARRAY_CONVERSIONS_BTAS_H__INCLUDED diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 823e13bec8..85d30d7728 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -101,6 +101,7 @@ set(ta_test_src_files ta_test.cpp einsum.cpp linalg.cpp cp.cpp + btas.cpp ) if(CUDA_FOUND OR HIP_FOUND) diff --git a/tests/btas.cpp b/tests/btas.cpp index a31329a80d..2f4bc1b527 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -324,8 +324,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(dense_array_conversion, bTensor, tensor_types) { // make tiled range using trange1_t = TiledArray::TiledRange1; - TiledArray::TiledRange trange( - {trange1_t(0, 10, 20), trange1_t(0, 11, 22), trange1_t(0, 12, 24)}); + TiledArray::TiledRange trange({trange1_t(0, 10, 20), + trange1_t(0, 11, 22).inplace_shift(1), + trange1_t(0, 12, 24).inplace_shift(2)}); // convert to a replicated DistArray using T = typename bTensor::value_type;