From 84be590e07ea9ab4f7ea88302e678f6e6746d712 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 31 Aug 2024 07:47:55 -0400 Subject: [PATCH 01/62] TiledRange1: can construct using a range of tile hashmarks --- src/TiledArray/tiled_range1.h | 29 ++++++++++++++++++++++++----- tests/tiled_range1.cpp | 15 +++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index e25c8a5357..4824dec26e 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -98,19 +98,38 @@ class TiledRange1 { /// Construct a 1D tiled range. - /// This will construct a 1D tiled range with tile boundaries ("hashmarks") - /// {\p t0 , \p t_rest... } + /// This will construct a 1D tiled range from range {t0, t1, t2, ... tn} + /// specifying the tile boundaries (hashmarks). + /// The number of tile boundaries is n + 1, where n is the number of tiles. + /// Tiles are defined as [\p t0 , t1), [t1, t2), [t2, t3), ... + /// Tiles are indexed starting with 0. + /// \tparam Integer An integral type + /// \param tile_boundaries The list of tile boundaries in order from smallest + /// to largest + /// \note validity of the {\p t0 , \p t_rest... } range is checked using + /// #TA_ASSERT() only if preprocessor macro \c NDEBUG is not defined + template >> + explicit TiledRange1(Range&& tile_boundaries) { + init_tiles_(tile_boundaries.begin(), tile_boundaries.end(), 0); + } + + /// Construct a 1D tiled range. + + /// This will construct a 1D tiled range from range {t0, t1, t2, ... tn} + /// specifying the tile boundaries (hashmarks). /// The number of tile boundaries is n + 1, where n is the number of tiles. /// Tiles are defined as [\p t0 , t1), [t1, t2), [t2, t3), ... /// Tiles are indexed starting with 0. /// \tparam Integer An integral type - /// \param list The list of tile boundaries in order from smallest to largest + /// \param tile_boundaries The list of tile boundaries in order from smallest + /// to largest /// \note validity of the {\p t0 , \p t_rest... } range is checked using /// #TA_ASSERT() only if preprocessor macro \c NDEBUG is not defined template >> - explicit TiledRange1(const std::initializer_list& list) { - init_tiles_(list.begin(), list.end(), 0); + explicit TiledRange1(const std::initializer_list& tile_boundaries) { + init_tiles_(tile_boundaries.begin(), tile_boundaries.end(), 0); } /// Copy assignment operator diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index b4aef7f51c..f01a9a208e 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -110,6 +110,21 @@ BOOST_AUTO_TEST_CASE(constructor) { } } + // check constructor using range of tile boundaries. + { + if (Range1Fixture::ntiles == 5) { + TiledRange1 r(a); + BOOST_CHECK_EQUAL(r.tiles_range().first, tiles.first); + BOOST_CHECK_EQUAL(r.tiles_range().second, tiles.second); + BOOST_CHECK_EQUAL(r.elements_range().first, elements.first); + BOOST_CHECK_EQUAL(r.elements_range().second, elements.second); + for (std::size_t i = 0; i < a.size() - 1; ++i) { + BOOST_CHECK_EQUAL(r.tile(i).first, a[i]); + BOOST_CHECK_EQUAL(r.tile(i).second, a[i + 1]); + } + } + } + // check construction with negative index values #ifdef TA_SIGNED_1INDEX_TYPE { From cf0e0ea0a351717c752546870b914917a1cbb33a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 31 Aug 2024 08:04:34 -0400 Subject: [PATCH 02/62] <-> Eigen::{Vector,Matrix,Tensor} conversions can handle DistArrays with non-zero base Ranges --- src/TiledArray/conversions/eigen.h | 209 +++++++----- tests/eigen.cpp | 491 ++++++++++++++++------------- tests/range_fixture.h | 47 ++- 3 files changed, 443 insertions(+), 304 deletions(-) diff --git a/src/TiledArray/conversions/eigen.h b/src/TiledArray/conversions/eigen.h index 816a8bfe24..3caeecc178 100644 --- a/src/TiledArray/conversions/eigen.h +++ b/src/TiledArray/conversions/eigen.h @@ -196,20 +196,26 @@ eigen_map(T& tensor) { /// Copy a block of an Eigen matrix into a tensor -/// A block of \c matrix will be copied into \c tensor. The block -/// dimensions will be determined by the dimensions of the tensor's range. +// clang-format off +/// A block of \c matrix will be copied into \c tensor. If `tensor.rank()==2` +/// the block is `[tensor.range().lobound()[0] - base_offsets[0], tensor.range().upbound()[0] - base_offsets[0]) x `[tensor.range().lobound()[1] - base_offsets[1], tensor.range().upbound()[1] - base_offsets[1])`, +/// else it is `[tensor.range().lobound()[0] - base_offsets[0], tensor.range().upbound()[0] - base_offsets[0])`. +/// /// \tparam T A tensor type, e.g. TiledArray::Tensor /// \tparam Derived The derived type of an Eigen matrix /// \param[in] matrix The object that will be assigned the content of \c tensor -/// \param[out] tensor The object that will be assigned the content of \c matrix +/// \param[out] tensor The object that will contain the block of \c matrix +/// \param[in] base_offsets The base offsets for the tensor range (should be lobound of the array that will contain tensor as a tile) /// \throw TiledArray::Exception When the dimensions of \c tensor are not equal /// to 1 or 2. /// \throw TiledArray::Exception When the range of \c tensor is outside the /// range of \c matrix . +// clang-format on template >* = nullptr> -inline void eigen_submatrix_to_tensor(const Eigen::MatrixBase& matrix, - T& tensor) { +inline void eigen_submatrix_to_tensor( + const Eigen::MatrixBase& matrix, T& tensor, + std::array base_offsets = {0, 0}) { [[maybe_unused]] typedef typename T::index1_type size_type; TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); @@ -223,60 +229,71 @@ inline void eigen_submatrix_to_tensor(const Eigen::MatrixBase& matrix, if (tensor.range().rank() == 2u) { // Get tensor range data - const std::size_t tensor_lower_0 = tensor_lower[0]; - const std::size_t tensor_lower_1 = tensor_lower[1]; - [[maybe_unused]] const std::size_t tensor_upper_0 = tensor_upper[0]; - [[maybe_unused]] const std::size_t tensor_upper_1 = tensor_upper[1]; - const std::size_t tensor_extent_0 = tensor_extent[0]; - const std::size_t tensor_extent_1 = tensor_extent[1]; - - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.rows())); - TA_ASSERT(tensor_upper_1 <= std::size_t(matrix.cols())); + const size_type tensor_lower_0 = tensor_lower[0]; + const size_type tensor_lower_1 = tensor_lower[1]; + [[maybe_unused]] const size_type tensor_upper_0 = tensor_upper[0]; + [[maybe_unused]] const size_type tensor_upper_1 = tensor_upper[1]; + const size_type tensor_extent_0 = tensor_extent[0]; + const size_type tensor_extent_1 = tensor_extent[1]; + + TA_ASSERT(tensor_extent_0 <= size_type(matrix.rows())); + TA_ASSERT(tensor_extent_1 <= size_type(matrix.cols())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); + TA_ASSERT(tensor_lower_1 >= base_offsets[1]); // Copy matrix eigen_map(tensor, tensor_extent_0, tensor_extent_1) = matrix.block( - tensor_lower_0, tensor_lower_1, tensor_extent_0, tensor_extent_1); + tensor_lower_0 - base_offsets[0], tensor_lower_1 - base_offsets[1], + tensor_extent_0, tensor_extent_1); } else { // Get tensor range data - const std::size_t tensor_lower_0 = tensor_lower[0]; - [[maybe_unused]] const std::size_t tensor_upper_0 = tensor_upper[0]; - const std::size_t tensor_extent_0 = tensor_extent[0]; + const size_type tensor_lower_0 = tensor_lower[0]; + [[maybe_unused]] const size_type tensor_upper_0 = tensor_upper[0]; + const size_type tensor_extent_0 = tensor_extent[0]; // Check that matrix is a vector. TA_ASSERT((matrix.rows() == 1) || (matrix.cols() == 1)); if (matrix.rows() == 1) { - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.cols())); + TA_ASSERT(tensor_extent_0 <= size_type(matrix.cols())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); // Copy the row vector to tensor eigen_map(tensor, 1, tensor_extent_0) = - matrix.block(0, tensor_lower_0, 1, tensor_extent_0); + matrix.block(0, tensor_lower_0 - base_offsets[0], 1, tensor_extent_0); } else { - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.rows())); + TA_ASSERT(tensor_extent_0 <= size_type(matrix.rows())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); // Copy the column vector to tensor eigen_map(tensor, tensor_extent_0, 1) = - matrix.block(tensor_lower_0, 0, tensor_extent_0, 1); + matrix.block(tensor_lower_0 - base_offsets[0], 0, tensor_extent_0, 1); } } } /// Copy the content of a tensor into an Eigen matrix block -/// The content of tensor will be copied into a block of matrix. The block -/// dimensions will be determined by the dimensions of the tensor's range. -/// \tparam T A tensor type, e.g. TiledArray::Tensor -/// \tparam Derived The derived type of an Eigen matrix -/// \param[in] tensor The object that will be copied to \c matrix -/// \param[out] matrix The object that will be assigned the content of \c tensor -/// \throw TiledArray::Exception When the dimensions of \c tensor are not equal -/// to 1 or 2. -/// \throw TiledArray::Exception When the range of \c tensor is outside the -/// range of \c matrix . +/// The content of tensor will be copied into a block of matrix. +/// If `tensor.rank()==2` +/// the block is `[tensor.range().lobound()[0] - base_offsets[0], +/// tensor.range().upbound()[0] - base_offsets[0]) x +/// `[tensor.range().lobound()[1] - base_offsets[1], tensor.range().upbound()[1] +/// - base_offsets[1])`, else it is `[tensor.range().lobound()[0] - +/// base_offsets[0], tensor.range().upbound()[0] - base_offsets[0])`. \tparam T +/// A tensor type, e.g. TiledArray::Tensor \tparam Derived The derived type of +/// an Eigen matrix \param[in] tensor The object that will be copied to \c +/// matrix \param[out] matrix The object that will be assigned the content of \c +/// tensor \param[in] base_offsets The base offsets for the tensor range (should +/// be lobound of the array that will contain tensor as a tile) \throw +/// TiledArray::Exception When the dimensions of \c tensor are not equal to 1 +/// or 2. \throw TiledArray::Exception When the range of \c tensor is outside +/// the range of \c matrix . template >* = nullptr> -inline void tensor_to_eigen_submatrix(const T& tensor, - Eigen::MatrixBase& matrix) { +inline void tensor_to_eigen_submatrix( + const T& tensor, Eigen::MatrixBase& matrix, + std::array base_offsets = {0, 0}) { [[maybe_unused]] typedef typename T::index1_type size_type; TA_ASSERT((tensor.range().rank() == 2u) || (tensor.range().rank() == 1u)); @@ -290,39 +307,44 @@ inline void tensor_to_eigen_submatrix(const T& tensor, if (tensor.range().rank() == 2) { // Get tensor range data - const std::size_t tensor_lower_0 = tensor_lower[0]; - const std::size_t tensor_lower_1 = tensor_lower[1]; - [[maybe_unused]] const std::size_t tensor_upper_0 = tensor_upper[0]; - [[maybe_unused]] const std::size_t tensor_upper_1 = tensor_upper[1]; - const std::size_t tensor_extent_0 = tensor_extent[0]; - const std::size_t tensor_extent_1 = tensor_extent[1]; - - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.rows())); - TA_ASSERT(tensor_upper_1 <= std::size_t(matrix.cols())); + const size_type tensor_lower_0 = tensor_lower[0]; + const size_type tensor_lower_1 = tensor_lower[1]; + [[maybe_unused]] const size_type tensor_upper_0 = tensor_upper[0]; + [[maybe_unused]] const size_type tensor_upper_1 = tensor_upper[1]; + const size_type tensor_extent_0 = tensor_extent[0]; + const size_type tensor_extent_1 = tensor_extent[1]; + + TA_ASSERT(tensor_extent_0 <= size_type(matrix.rows())); + TA_ASSERT(tensor_extent_1 <= size_type(matrix.cols())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); + TA_ASSERT(tensor_lower_1 >= base_offsets[1]); // Copy tensor into matrix - matrix.block(tensor_lower_0, tensor_lower_1, tensor_extent_0, + matrix.block(tensor_lower_0 - base_offsets[0], + tensor_lower_1 - base_offsets[1], tensor_extent_0, tensor_extent_1) = eigen_map(tensor, tensor_extent_0, tensor_extent_1); } else { // Get tensor range data - const std::size_t tensor_lower_0 = tensor_lower[0]; - [[maybe_unused]] const std::size_t tensor_upper_0 = tensor_upper[0]; - const std::size_t tensor_extent_0 = tensor_extent[0]; + const size_type tensor_lower_0 = tensor_lower[0]; + [[maybe_unused]] const size_type tensor_upper_0 = tensor_upper[0]; + const size_type tensor_extent_0 = tensor_extent[0]; TA_ASSERT((matrix.rows() == 1) || (matrix.cols() == 1)); if (matrix.rows() == 1) { - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.cols())); + TA_ASSERT(tensor_extent_0 <= size_type(matrix.cols())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); // Copy tensor into row vector - matrix.block(0, tensor_lower_0, 1, tensor_extent_0) = + matrix.block(0, tensor_lower_0 - base_offsets[0], 1, tensor_extent_0) = eigen_map(tensor, 1, tensor_extent_0); } else { - TA_ASSERT(tensor_upper_0 <= std::size_t(matrix.rows())); + TA_ASSERT(tensor_extent_0 <= size_type(matrix.rows())); + TA_ASSERT(tensor_lower_0 >= base_offsets[0]); // Copy tensor into column vector - matrix.block(tensor_lower_0, 0, tensor_extent_0, 1) = + matrix.block(tensor_lower_0 - base_offsets[0], 0, tensor_extent_0, 1) = eigen_map(tensor, tensor_extent_0, 1); } } @@ -344,7 +366,12 @@ void counted_eigen_submatrix_to_tensor(const Eigen::MatrixBase* matrix, const typename A::ordinal_type i, madness::AtomicInt* counter) { typename A::value_type tensor(array->trange().make_tile_range(i)); - eigen_submatrix_to_tensor(*matrix, tensor); + // array lobound, in case not base-0 + const auto* range_lobound_data = + array->trange().elements_range().lobound_data(); + std::array array_lobound{ + {range_lobound_data[0], range_lobound_data[1]}}; + eigen_submatrix_to_tensor(*matrix, tensor, array_lobound); array->set(i, tensor); (*counter)++; } @@ -357,10 +384,11 @@ void counted_eigen_submatrix_to_tensor(const Eigen::MatrixBase* matrix, /// \param tensor The tensor to be copied /// \param counter The task counter template -void counted_tensor_to_eigen_submatrix(const T& tensor, - Eigen::MatrixBase* matrix, - madness::AtomicInt* counter) { - tensor_to_eigen_submatrix(tensor, *matrix); +void counted_tensor_to_eigen_submatrix( + const T& tensor, Eigen::MatrixBase* matrix, + std::array base_offsets, + madness::AtomicInt* counter) { + tensor_to_eigen_submatrix(tensor, *matrix, base_offsets); (*counter)++; } @@ -524,6 +552,12 @@ array_to_eigen(const DistArray& array) { EigenMatrix matrix = EigenMatrix::Zero(array_extent[0], (rank == 2 ? array_extent[1] : 1)); + // array lobound, in case not base-0 + const auto* range_lobound_data = + array.trange().elements_range().lobound_data(); + std::array array_lobound{ + {range_lobound_data[0], range_lobound_data[1]}}; + // Spawn tasks to copy array tiles to the Eigen matrix madness::AtomicInt counter; counter = 0; @@ -533,7 +567,7 @@ array_to_eigen(const DistArray& array) { array.world().taskq.add( &detail::counted_tensor_to_eigen_submatrix< EigenMatrix, typename DistArray::value_type>, - array.find(i), &matrix, &counter); + array.find(i), &matrix, array_lobound, &counter); ++n; } } @@ -565,6 +599,7 @@ array_to_eigen(const DistArray& array) { /// // Create a range for the new array object /// std::vector blocks; /// for(std::size_t i = 0ul; i <= 100ul; i += 10ul) +/// // N.B. can create non-0-base range, replace i -> i + base_offse /// blocks.push_back(i); /// std::array blocks2 = /// {{ TiledArray::TiledRange1(blocks.begin(), blocks.end()), @@ -634,6 +669,7 @@ inline A row_major_buffer_to_array( /// // Create a range for the new array object /// std::vector blocks; /// for(std::size_t i = 0ul; i <= 100ul; i += 10ul) +/// // N.B. can create non-0-base range, replace i -> i + base_offse /// blocks.push_back(i); /// std::array blocks2 = /// {{ TiledArray::TiledRange1(blocks.begin(), blocks.end()), @@ -705,11 +741,13 @@ inline A column_major_buffer_to_array( /// match. // clang-format on template + typename Tensor_, std::size_t NumIndices_Sz = NumIndices_> inline void eigen_subtensor_to_tensor( const Eigen::Tensor& src, - Tensor_& dst) { + Tensor_& dst, + std::array base_offsets = {}) { TA_ASSERT(dst.range().rank() == NumIndices_); + static_assert(NumIndices_Sz == NumIndices_); auto to_array = [](const auto& seq) { TA_ASSERT(seq.size() == NumIndices_); @@ -718,6 +756,13 @@ inline void eigen_subtensor_to_tensor( return result; }; + auto to_base0 = [&](const auto& arr) { + TA_ASSERT(arr.size() == NumIndices_); + std::array result; + for (int i = 0; i < NumIndices_; ++i) result[i] = arr[i] - base_offsets[i]; + return result; + }; + [[maybe_unused]] auto reverse_extent_indices = []() { std::array result; std::iota(result.rbegin(), result.rend(), 0); @@ -725,8 +770,8 @@ inline void eigen_subtensor_to_tensor( }; const auto& dst_range = dst.range(); - auto src_block = - src.slice(to_array(dst_range.lobound()), to_array(dst_range.extent())); + auto src_block = src.slice(to_base0(to_array(dst_range.lobound())), + to_array(dst_range.extent())); auto dst_eigen_map = Eigen::TensorMap< Eigen::Tensor>( dst.data(), to_array(dst_range.extent())); @@ -758,11 +803,13 @@ inline void eigen_subtensor_to_tensor( /// of \c src and \c dst do not match. // clang-format on template + typename IndexType_, std::size_t NumIndices_Sz = NumIndices_> inline void tensor_to_eigen_subtensor( const Tensor_& src, - Eigen::Tensor& dst) { + Eigen::Tensor& dst, + std::array base_offsets = {}) { TA_ASSERT(src.range().rank() == NumIndices_); + static_assert(NumIndices_Sz == NumIndices_); auto to_array = [](const auto& seq) { TA_ASSERT(seq.size() == NumIndices_); @@ -771,6 +818,13 @@ inline void tensor_to_eigen_subtensor( return result; }; + auto to_base0 = [&](const auto& arr) { + TA_ASSERT(arr.size() == NumIndices_); + std::array result; + for (int i = 0; i < NumIndices_; ++i) result[i] = arr[i] - base_offsets[i]; + return result; + }; + [[maybe_unused]] auto reverse_extent_indices = []() { std::array result; std::iota(result.rbegin(), result.rend(), 0); @@ -778,8 +832,8 @@ inline void tensor_to_eigen_subtensor( }; const auto& src_range = src.range(); - auto dst_block = - dst.slice(to_array(src_range.lobound()), to_array(src_range.extent())); + auto dst_block = dst.slice(to_base0(to_array(src_range.lobound())), + to_array(src_range.extent())); auto src_eigen_map = Eigen::TensorMap< Eigen::Tensor>( src.data(), to_array(src_range.extent())); @@ -809,7 +863,13 @@ void counted_eigen_subtensor_to_tensor(const Eigen_Tensor_* src, const typename Range::index_type i, madness::AtomicInt* counter) { typename DistArray_::value_type tensor(dst->trange().make_tile_range(i)); - eigen_subtensor_to_tensor(*src, tensor); + // array lobound, in case not base-0 + const auto* range_lobound_data = + dst->trange().elements_range().lobound_data(); + std::array array_lobound; + std::copy(range_lobound_data, range_lobound_data + dst->trange().rank(), + array_lobound.begin()); + eigen_subtensor_to_tensor(*src, tensor, array_lobound); dst->set(i, tensor); (*counter)++; } @@ -822,10 +882,11 @@ void counted_eigen_subtensor_to_tensor(const Eigen_Tensor_* src, /// \param dst The destination tensor /// \param counter The task counter template -void counted_tensor_to_eigen_subtensor(const TA_Tensor_& src, - Eigen_Tensor_* dst, - madness::AtomicInt* counter) { - tensor_to_eigen_subtensor(src, *dst); +void counted_tensor_to_eigen_subtensor( + const TA_Tensor_& src, Eigen_Tensor_* dst, + std::array base_offsets, + madness::AtomicInt* counter) { + tensor_to_eigen_subtensor(src, *dst, base_offsets); (*counter)++; } @@ -1004,6 +1065,12 @@ Tensor array_to_eigen_tensor(const TiledArray::DistArray& src, result_type result(src.trange().elements_range().extent()); result.setZero(); + const auto* range_lobound_data = + src.trange().elements_range().lobound_data(); + std::array array_lobound; + std::copy(range_lobound_data, range_lobound_data + src.trange().rank(), + array_lobound.begin()); + // Spawn tasks to copy array tiles to btas::Tensor madness::AtomicInt counter; counter = 0; @@ -1012,7 +1079,7 @@ Tensor array_to_eigen_tensor(const TiledArray::DistArray& src, if (!src.is_zero(i)) { src.world().taskq.add( &detail::counted_tensor_to_eigen_subtensor, - src.find(i), &result, &counter); + src.find(i), &result, array_lobound, &counter); ++n; } } diff --git a/tests/eigen.cpp b/tests/eigen.cpp index d577804417..11ca7088b1 100644 --- a/tests/eigen.cpp +++ b/tests/eigen.cpp @@ -29,9 +29,16 @@ struct EigenFixture : public TiledRangeFixture { : trange(dims.begin(), dims.begin() + 2), trange1(dims.begin(), dims.begin() + 1), trangeN(dims.begin(), dims.begin() + GlobalFixture::dim), + trange_base1(dims_base1.begin(), dims_base1.begin() + 2), + trange1_base1(dims_base1.begin(), dims_base1.begin() + 1), + trangeN_base1(dims_base1.begin(), + dims_base1.begin() + GlobalFixture::dim), array(*GlobalFixture::world, trange), array1(*GlobalFixture::world, trange1), arrayN(*GlobalFixture::world, trangeN), + array_base1(*GlobalFixture::world, trange_base1), + array1_base1(*GlobalFixture::world, trange1_base1), + arrayN_base1(*GlobalFixture::world, trangeN_base1), matrix(dims[0].elements_range().second, dims[1].elements_range().second), rmatrix(dims[0].elements_range().second, @@ -43,9 +50,15 @@ struct EigenFixture : public TiledRangeFixture { TiledRange trange; TiledRange trange1; TiledRange trangeN; + TiledRange trange_base1; // base-1 version of trange + TiledRange trange1_base1; // base-1 version of trange1 + TiledRange trangeN_base1; // base-1 version of trangeN TArrayI array; TArrayI array1; TArrayI arrayN; + TArrayI array_base1; // base-1 version of array + TArrayI array1_base1; // base-1 version of array1 + TArrayI arrayN_base1; // base-1 version of array1 Eigen::MatrixXi matrix; EigenMatrixXi rmatrix; Eigen::VectorXi vector; @@ -172,15 +185,23 @@ BOOST_AUTO_TEST_CASE(matrix_to_array) { (array = eigen_to_array(*GlobalFixture::world, trange, matrix))); // Check that the data in array is equal to that in matrix - for (Range::const_iterator it = array.tiles_range().begin(); - it != array.tiles_range().end(); ++it) { - Future tile = array.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(tile.get()[*tile_it], - matrix((*tile_it)[0], (*tile_it)[1])); + auto test = [&](const auto& array, auto base = 0) { + for (Range::const_iterator it = array.tiles_range().begin(); + it != array.tiles_range().end(); ++it) { + Future tile = array.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(tile.get()[*tile_it], + matrix((*tile_it)[0] - base, (*tile_it)[1] - base)); + } } - } + }; + test(array, 0); + + // same with base-1 + BOOST_CHECK_NO_THROW((array_base1 = eigen_to_array( + *GlobalFixture::world, trange_base1, matrix))); + test(array_base1, 1); } BOOST_AUTO_TEST_CASE(vector_to_array) { @@ -193,14 +214,23 @@ BOOST_AUTO_TEST_CASE(vector_to_array) { trange1, vector))); // Check that the data in array matches the data in vector - for (Range::const_iterator it = array1.tiles_range().begin(); - it != array1.tiles_range().end(); ++it) { - Future tile = array1.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(tile.get()[*tile_it], vector((*tile_it)[0])); + auto test = [&](const auto& array1, auto base = 0) { + for (Range::const_iterator it = array1.tiles_range().begin(); + it != array1.tiles_range().end(); ++it) { + Future tile = array1.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(tile.get()[*tile_it], vector((*tile_it)[0] - base)); + } } - } + }; + + test(array1, 0); + + // same with base-1 + BOOST_CHECK_NO_THROW((array1_base1 = eigen_to_array( + *GlobalFixture::world, trange1_base1, vector))); + test(array1_base1, 1); } BOOST_AUTO_TEST_CASE(array_to_matrix) { @@ -208,168 +238,180 @@ BOOST_AUTO_TEST_CASE(array_to_matrix) { return array_to_eigen, DensePolicy, Eigen::RowMajor>(array); }; - if (GlobalFixture::world->size() == 1) { - // Fill the array with random data - GlobalFixture::world->srand(27); - for (Range::const_iterator it = array.tiles_range().begin(); - it != array.tiles_range().end(); ++it) { - TArrayI::value_type tile(array.trange().make_tile_range(*it)); - for (TArrayI::value_type::iterator tile_it = tile.begin(); - tile_it != tile.end(); ++tile_it) { - *tile_it = GlobalFixture::world->rand(); + for (auto base : {0, 1}) { + auto& arr = base == 1 ? array_base1 : array; + + if (GlobalFixture::world->size() == 1) { + // Fill the array with random data + GlobalFixture::world->srand(27); + for (Range::const_iterator it = arr.tiles_range().begin(); + it != arr.tiles_range().end(); ++it) { + TArrayI::value_type tile(arr.trange().make_tile_range(*it)); + for (TArrayI::value_type::iterator tile_it = tile.begin(); + tile_it != tile.end(); ++tile_it) { + *tile_it = GlobalFixture::world->rand(); + } + arr.set(*it, tile); } - array.set(*it, tile); - } - - // Convert the array to an Eigen matrices: column-major (matrix) and - // row-major (rmatrix) - BOOST_CHECK_NO_THROW(matrix = array_to_eigen(array)); - BOOST_CHECK_NO_THROW(rmatrix = a_to_e_rowmajor(array)); - // Check that the matrix dimensions are the same as the array - BOOST_CHECK_EQUAL(matrix.rows(), array.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(matrix.cols(), array.trange().elements_range().extent(1)); - BOOST_CHECK_EQUAL(rmatrix.rows(), - array.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(rmatrix.cols(), - array.trange().elements_range().extent(1)); - - // Check that the data in matrix matches the data in array - for (Range::const_iterator it = array.tiles_range().begin(); - it != array.tiles_range().end(); ++it) { - Future tile = array.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(matrix((*tile_it)[0], (*tile_it)[1]), - tile.get()[*tile_it]); - BOOST_CHECK_EQUAL(rmatrix((*tile_it)[0], (*tile_it)[1]), - tile.get()[*tile_it]); + // Convert the array to an Eigen matrices: column-major (matrix) and + // row-major (rmatrix) + BOOST_CHECK_NO_THROW(matrix = array_to_eigen(arr)); + BOOST_CHECK_NO_THROW(rmatrix = a_to_e_rowmajor(arr)); + BOOST_CHECK_NO_THROW(matrix = array_to_eigen(arr)); + BOOST_CHECK_NO_THROW(rmatrix = a_to_e_rowmajor(arr)); + + // Check that the matrix dimensions are the same as the array + BOOST_CHECK_EQUAL(matrix.rows(), arr.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(matrix.cols(), arr.trange().elements_range().extent(1)); + BOOST_CHECK_EQUAL(rmatrix.rows(), + arr.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(rmatrix.cols(), + arr.trange().elements_range().extent(1)); + + // Check that the data in matrix matches the data in array + for (Range::const_iterator it = arr.tiles_range().begin(); + it != arr.tiles_range().end(); ++it) { + Future tile = arr.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(matrix((*tile_it)[0] - base, (*tile_it)[1] - base), + tile.get()[*tile_it]); + BOOST_CHECK_EQUAL(rmatrix((*tile_it)[0] - base, (*tile_it)[1] - base), + tile.get()[*tile_it]); + } } - } - } else { - // Check that eigen_to_array throws when there is more than one node - BOOST_CHECK_THROW(array_to_eigen(array), TiledArray::Exception); - - // Fill local tiles with data - GlobalFixture::world->srand(27); - TArrayI::pmap_interface::const_iterator it = array.pmap()->begin(); - TArrayI::pmap_interface::const_iterator end = array.pmap()->end(); - for (; it != end; ++it) { - TArrayI::value_type tile(array.trange().make_tile_range(*it)); - for (TArrayI::value_type::iterator tile_it = tile.begin(); - tile_it != tile.end(); ++tile_it) { - *tile_it = GlobalFixture::world->rand(); + } else { + // Check that eigen_to_array throws when there is more than one node + BOOST_CHECK_THROW(array_to_eigen(arr), TiledArray::Exception); + + // Fill local tiles with data + GlobalFixture::world->srand(27); + TArrayI::pmap_interface::const_iterator it = arr.pmap()->begin(); + TArrayI::pmap_interface::const_iterator end = arr.pmap()->end(); + for (; it != end; ++it) { + TArrayI::value_type tile(arr.trange().make_tile_range(*it)); + for (TArrayI::value_type::iterator tile_it = tile.begin(); + tile_it != tile.end(); ++tile_it) { + *tile_it = GlobalFixture::world->rand(); + } + arr.set(*it, tile); } - array.set(*it, tile); - } - - // Distribute the data of array1 to all nodes - array.make_replicated(); - - BOOST_CHECK(array.pmap()->is_replicated()); - - // Convert the array to an Eigen matrix - BOOST_CHECK_NO_THROW(matrix = array_to_eigen(array)); - BOOST_CHECK_NO_THROW(rmatrix = a_to_e_rowmajor(array)); - - // Check that the matrix dimensions are the same as the array - BOOST_CHECK_EQUAL(matrix.rows(), array.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(matrix.cols(), array.trange().elements_range().extent(1)); - BOOST_CHECK_EQUAL(rmatrix.rows(), - array.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(rmatrix.cols(), - array.trange().elements_range().extent(1)); - // Check that the data in vector matches the data in array - for (Range::const_iterator it = array.tiles_range().begin(); - it != array.tiles_range().end(); ++it) { - BOOST_CHECK(array.is_local(*it)); - - Future tile = array.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(matrix((*tile_it)[0], (*tile_it)[1]), - tile.get()[*tile_it]); - BOOST_CHECK_EQUAL(rmatrix((*tile_it)[0], (*tile_it)[1]), - tile.get()[*tile_it]); + // Distribute the data of array1 to all nodes + arr.make_replicated(); + + BOOST_CHECK(arr.pmap()->is_replicated()); + + // Convert the array to an Eigen matrix + BOOST_CHECK_NO_THROW(matrix = array_to_eigen(arr)); + BOOST_CHECK_NO_THROW(rmatrix = a_to_e_rowmajor(arr)); + + // Check that the matrix dimensions are the same as the array + BOOST_CHECK_EQUAL(matrix.rows(), arr.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(matrix.cols(), arr.trange().elements_range().extent(1)); + BOOST_CHECK_EQUAL(rmatrix.rows(), + arr.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(rmatrix.cols(), + arr.trange().elements_range().extent(1)); + + // Check that the data in vector matches the data in array + for (Range::const_iterator it = arr.tiles_range().begin(); + it != arr.tiles_range().end(); ++it) { + BOOST_CHECK(arr.is_local(*it)); + + Future tile = arr.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(matrix((*tile_it)[0] - base, (*tile_it)[1] - base), + tile.get()[*tile_it]); + BOOST_CHECK_EQUAL(rmatrix((*tile_it)[0] - base, (*tile_it)[1] - base), + tile.get()[*tile_it]); + } } } - } + + } // base=0,1 } BOOST_AUTO_TEST_CASE(array_to_vector) { - if (GlobalFixture::world->size() == 1) { - // Fill the array with random data - GlobalFixture::world->srand(27); - for (Range::const_iterator it = array1.tiles_range().begin(); - it != array1.tiles_range().end(); ++it) { - TArrayI::value_type tile(array1.trange().make_tile_range(*it)); - for (TArrayI::value_type::iterator tile_it = tile.begin(); - tile_it != tile.end(); ++tile_it) { - *tile_it = GlobalFixture::world->rand(); + for (auto base : {0, 1}) { + auto& arr1 = base == 1 ? array1_base1 : array1; + + if (GlobalFixture::world->size() == 1) { + // Fill the array with random data + GlobalFixture::world->srand(27); + for (Range::const_iterator it = arr1.tiles_range().begin(); + it != arr1.tiles_range().end(); ++it) { + TArrayI::value_type tile(arr1.trange().make_tile_range(*it)); + for (TArrayI::value_type::iterator tile_it = tile.begin(); + tile_it != tile.end(); ++tile_it) { + *tile_it = GlobalFixture::world->rand(); + } + arr1.set(*it, tile); } - array1.set(*it, tile); - } - - // Convert the array to an Eigen vector - BOOST_CHECK_NO_THROW(vector = array_to_eigen(array1)); - - // Check that the matrix dimensions are the same as the array - BOOST_CHECK_EQUAL(vector.rows(), - array1.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(vector.cols(), 1); - // Check that the data in vector matches the data in array - for (Range::const_iterator it = array1.tiles_range().begin(); - it != array1.tiles_range().end(); ++it) { - Future tile = array1.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(vector((*tile_it)[0]), tile.get()[*tile_it]); + // Convert the array to an Eigen vector + BOOST_CHECK_NO_THROW(vector = array_to_eigen(arr1)); + + // Check that the matrix dimensions are the same as the array + BOOST_CHECK_EQUAL(vector.rows(), + arr1.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(vector.cols(), 1); + + // Check that the data in vector matches the data in array + for (Range::const_iterator it = arr1.tiles_range().begin(); + it != arr1.tiles_range().end(); ++it) { + Future tile = arr1.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(vector((*tile_it)[0] - base), tile.get()[*tile_it]); + } } - } - } else { - // Check that eigen_to_array throws when there is more than one node - BOOST_CHECK_THROW(array_to_eigen(array1), TiledArray::Exception); - - // Fill local tiles with data - GlobalFixture::world->srand(27); - TArrayI::pmap_interface::const_iterator it = array1.pmap()->begin(); - TArrayI::pmap_interface::const_iterator end = array1.pmap()->end(); - for (; it != end; ++it) { - TArrayI::value_type tile(array1.trange().make_tile_range(*it)); - for (TArrayI::value_type::iterator tile_it = tile.begin(); - tile_it != tile.end(); ++tile_it) { - *tile_it = GlobalFixture::world->rand(); + } else { + // Check that eigen_to_array throws when there is more than one node + BOOST_CHECK_THROW(array_to_eigen(arr1), TiledArray::Exception); + + // Fill local tiles with data + GlobalFixture::world->srand(27); + TArrayI::pmap_interface::const_iterator it = arr1.pmap()->begin(); + TArrayI::pmap_interface::const_iterator end = arr1.pmap()->end(); + for (; it != end; ++it) { + TArrayI::value_type tile(arr1.trange().make_tile_range(*it)); + for (TArrayI::value_type::iterator tile_it = tile.begin(); + tile_it != tile.end(); ++tile_it) { + *tile_it = GlobalFixture::world->rand(); + } + arr1.set(*it, tile); } - array1.set(*it, tile); - } - // Distribute the data of array1 to all nodes - array1.make_replicated(); + // Distribute the data of array1 to all nodes + arr1.make_replicated(); - BOOST_CHECK(array1.pmap()->is_replicated()); + BOOST_CHECK(arr1.pmap()->is_replicated()); - // Convert the array to an Eigen vector - BOOST_CHECK_NO_THROW(vector = array_to_eigen(array1)); + // Convert the array to an Eigen vector + BOOST_CHECK_NO_THROW(vector = array_to_eigen(arr1)); - // Check that the matrix dimensions are the same as the array - BOOST_CHECK_EQUAL(vector.rows(), - array1.trange().elements_range().extent(0)); - BOOST_CHECK_EQUAL(vector.cols(), 1); + // Check that the matrix dimensions are the same as the array + BOOST_CHECK_EQUAL(vector.rows(), + arr1.trange().elements_range().extent(0)); + BOOST_CHECK_EQUAL(vector.cols(), 1); - // Check that the data in vector matches the data in array - for (Range::const_iterator it = array1.tiles_range().begin(); - it != array1.tiles_range().end(); ++it) { - BOOST_CHECK(array1.is_local(*it)); + // Check that the data in vector matches the data in array + for (Range::const_iterator it = arr1.tiles_range().begin(); + it != arr1.tiles_range().end(); ++it) { + BOOST_CHECK(arr1.is_local(*it)); - Future tile = array1.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(vector((*tile_it)[0]), tile.get()[*tile_it]); + Future tile = arr1.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(vector((*tile_it)[0] - base), tile.get()[*tile_it]); + } } } - } + + } // base=0,1 } BOOST_AUTO_TEST_CASE(subtensor_to_tensor) { @@ -430,22 +472,26 @@ BOOST_AUTO_TEST_CASE(tensor_to_array) { BOOST_CHECK(eq() == true); } - // Copy matrix to array - BOOST_CHECK_NO_THROW((array = eigen_tensor_to_array( - *GlobalFixture::world, trangeN, tensor))); - - // Check that the data in array is equal to that in matrix - for (Range::const_iterator it = array.tiles_range().begin(); - it != array.tiles_range().end(); ++it) { - Future tile = array.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - std::array idx; - auto& t_idx = *tile_it; - std::copy(t_idx.begin(), t_idx.end(), idx.begin()); - BOOST_CHECK_EQUAL(tile.get()[*tile_it], tensor(idx)); + for (auto base : {0, 1}) { + auto& tr = base == 1 ? trangeN_base1 : trangeN; + auto& arr = base == 1 ? arrayN_base1 : arrayN; + // Copy matrix to array + BOOST_CHECK_NO_THROW((arr = eigen_tensor_to_array( + *GlobalFixture::world, tr, tensor))); + + // Check that the data in array is equal to that in matrix + for (Range::const_iterator it = arr.tiles_range().begin(); + it != arr.tiles_range().end(); ++it) { + Future tile = arr.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + auto& t_idx = *tile_it; + std::array idx; + for (auto d = 0; d != GlobalFixture::dim; ++d) idx[d] = t_idx[d] - base; + BOOST_CHECK_EQUAL(tile.get()[*tile_it], tensor(idx)); + } } - } + } // base } BOOST_AUTO_TEST_CASE(array_to_tensor) { @@ -462,57 +508,70 @@ BOOST_AUTO_TEST_CASE(array_to_tensor) { return result; }; - // Fill local tiles with data - GlobalFixture::world->srand(27); - TArrayI::pmap_interface::const_iterator it = arrayN.pmap()->begin(); - TArrayI::pmap_interface::const_iterator end = arrayN.pmap()->end(); - for (; it != end; ++it) { - TArrayI::value_type tile(arrayN.trange().make_tile_range(*it)); - for (TArrayI::value_type::iterator tile_it = tile.begin(); - tile_it != tile.end(); ++tile_it) { - *tile_it = GlobalFixture::world->rand(); + for (auto base : {0, 1}) { + auto& arr = base == 1 ? arrayN_base1 : arrayN; + + auto to_base0 = [&](const auto& arr) { + std::array result; + for (int i = 0; i < GlobalFixture::dim; ++i) result[i] = arr[i] - base; + return result; + }; + + // Fill local tiles with data + GlobalFixture::world->srand(27); + TArrayI::pmap_interface::const_iterator it = arr.pmap()->begin(); + TArrayI::pmap_interface::const_iterator end = arr.pmap()->end(); + for (; it != end; ++it) { + TArrayI::value_type tile(arr.trange().make_tile_range(*it)); + for (TArrayI::value_type::iterator tile_it = tile.begin(); + tile_it != tile.end(); ++tile_it) { + *tile_it = GlobalFixture::world->rand(); + } + arr.set(*it, tile); } - arrayN.set(*it, tile); - } - if (GlobalFixture::world->size() > 1) { - // Check that array_to_eigen_tensor throws when there is more than one node - BOOST_CHECK_THROW(array_to_eigen_tensor(arrayN), - TiledArray::Exception); - } + if (GlobalFixture::world->size() > 1) { + // Check that array_to_eigen_tensor throws when there is more than one + // node + BOOST_CHECK_THROW(array_to_eigen_tensor(arr), + TiledArray::Exception); + } - // Distribute the data of arrayN to all nodes - if (GlobalFixture::world->size() > 1) { - arrayN.make_replicated(); - BOOST_CHECK(arrayN.pmap()->is_replicated()); - } + // Distribute the data of arrayN to all nodes + if (GlobalFixture::world->size() > 1) { + arr.make_replicated(); + BOOST_CHECK(arr.pmap()->is_replicated()); + } + + // Convert the array to an Eigen matrix + BOOST_CHECK_NO_THROW(tensor = array_to_eigen_tensor(arr)); + BOOST_CHECK_NO_THROW(rtensor = a_to_e_rowmajor(arr)); + + // Check that the matrix dimensions are the same as the array + BOOST_CHECK_EQUAL_COLLECTIONS( + tensor.dimensions().begin(), tensor.dimensions().end(), + arr.trange().elements_range().extent().begin(), + arr.trange().elements_range().extent().end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + rtensor.dimensions().begin(), rtensor.dimensions().end(), + arr.trange().elements_range().extent().begin(), + arr.trange().elements_range().extent().end()); - // Convert the array to an Eigen matrix - BOOST_CHECK_NO_THROW(tensor = array_to_eigen_tensor(arrayN)); - BOOST_CHECK_NO_THROW(rtensor = a_to_e_rowmajor(arrayN)); - - // Check that the matrix dimensions are the same as the array - BOOST_CHECK_EQUAL_COLLECTIONS( - tensor.dimensions().begin(), tensor.dimensions().end(), - arrayN.trange().elements_range().extent().begin(), - arrayN.trange().elements_range().extent().end()); - BOOST_CHECK_EQUAL_COLLECTIONS( - rtensor.dimensions().begin(), rtensor.dimensions().end(), - arrayN.trange().elements_range().extent().begin(), - arrayN.trange().elements_range().extent().end()); - - // Check that the data in vector matches the data in array - for (Range::const_iterator it = arrayN.tiles_range().begin(); - it != arrayN.tiles_range().end(); ++it) { - BOOST_CHECK(arrayN.is_local(*it)); - - Future tile = arrayN.find(*it); - for (Range::const_iterator tile_it = tile.get().range().begin(); - tile_it != tile.get().range().end(); ++tile_it) { - BOOST_CHECK_EQUAL(tensor(to_array(*tile_it)), tile.get()[*tile_it]); - BOOST_CHECK_EQUAL(rtensor(to_array(*tile_it)), tile.get()[*tile_it]); + // Check that the data in vector matches the data in array + for (Range::const_iterator it = arr.tiles_range().begin(); + it != arr.tiles_range().end(); ++it) { + BOOST_CHECK(arr.is_local(*it)); + + Future tile = arr.find(*it); + for (Range::const_iterator tile_it = tile.get().range().begin(); + tile_it != tile.get().range().end(); ++tile_it) { + BOOST_CHECK_EQUAL(tensor(to_base0(to_array(*tile_it))), + tile.get()[*tile_it]); + BOOST_CHECK_EQUAL(rtensor(to_base0(to_array(*tile_it))), + tile.get()[*tile_it]); + } } - } + } // base=0,1 } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/range_fixture.h b/tests/range_fixture.h index 3eb9afd611..6b0fcd1604 100644 --- a/tests/range_fixture.h +++ b/tests/range_fixture.h @@ -65,37 +65,46 @@ struct RangeFixture { }; struct Range1Fixture { + using index1_type = Range1::index1_type; static const size_t ntiles = 5; Range1Fixture() - : a(init_tiling()), - tiles(0, a.size() - 1), - elements(a.front(), a.back()), - tr1(a.begin(), a.end()) {} + : tr1_hashmarks(make_hashmarks()), + a(tr1_hashmarks), + tiles(0, tr1_hashmarks.size() - 1), + elements(tr1_hashmarks.front(), tr1_hashmarks.back()), + tr1(tr1_hashmarks), + tr1_base1(make_hashmarks(1)) {} ~Range1Fixture() {} template - static std::array init_tiling() { - std::array result; - result[0] = 0u; + static std::array make_hashmarks(index1_type offset = 0) { + std::array result; + result[0] = offset; for (std::size_t i = 1; i < D; ++i) result[i] = result[i - 1] + GlobalFixture::primes[i - 1]; return result; } - const std::array a; - const TiledRange1::range_type tiles; - const TiledRange1::range_type elements; - TiledRange1 tr1; + const std::array tr1_hashmarks; + const std::array + a; // copy of tr1_hashmarks, to make legacy tests build + const TiledRange1::range_type tiles; // = tr1.tiles_range() + const TiledRange1::range_type elements; // = tr1.elements_range() + TiledRange1 tr1; // base-0 TiledRange1 std::array tile; + TiledRange1 tr1_base1; // base-1 TiledRange1 }; struct TiledRangeFixtureBase : public Range1Fixture { TiledRangeFixtureBase() { std::fill(dims.begin(), dims.end(), tr1); std::fill(extents.begin(), extents.end(), tr1.extent()); + std::fill(dims_base1.begin(), dims_base1.end(), tr1_base1); } - std::array dims; + std::array dims; // base-0 TiledRange1's + std::array + dims_base1; // base-1 version of dims std::array extents; }; // struct TiledRangeFixtureBase @@ -106,17 +115,21 @@ struct TiledRangeFixture : public RangeFixture, public TiledRangeFixtureBase { TiledRangeFixture() : tiles_range(TiledRangeFixture::index(GlobalFixture::dim, 0), TiledRangeFixture::index(GlobalFixture::dim, 5)), - elements_range(TiledRangeFixture::tile_index(GlobalFixture::dim, 0), - TiledRangeFixture::tile_index(GlobalFixture::dim, a[5])), - tr(dims.begin(), dims.end()) {} + elements_range(TiledRangeFixture::tile_index(GlobalFixture::dim, + tr1_hashmarks.front()), + TiledRangeFixture::tile_index(GlobalFixture::dim, + tr1_hashmarks.back())), + tr(dims.begin(), dims.end()), + tr_base1(dims_base1.begin(), dims_base1.end()) {} ~TiledRangeFixture() {} static tile_index fill_tile_index(TRangeN::range_type::index::value_type); const TRangeN::range_type tiles_range; - const TRangeN::range_type elements_range; - TRangeN tr; + const TRangeN::range_type elements_range; // elements range of tr + TRangeN tr; // base-0 TiledRangeN + TRangeN tr_base1; // base-1 version of tr }; #endif // TILEDARRAY_RANGE_FIXTURE_H__INCLUDED From 9129da9b5f5ff9104878819b1b6b0d9a81411e15 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Sep 2024 23:33:05 -0400 Subject: [PATCH 03/62] [skip ci] typo --- src/TiledArray/conversions/concat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/conversions/concat.h b/src/TiledArray/conversions/concat.h index cc55f91e17..dd35e09456 100644 --- a/src/TiledArray/conversions/concat.h +++ b/src/TiledArray/conversions/concat.h @@ -64,7 +64,7 @@ DistArray concat( using std::begin; using std::end; - index b(r), e(r); // updated for concatted modes only + index b(r), e(r); // updated for concatenated modes only std::fill(begin(b), end(b), 0); for (auto i = 0ul; i != arrays.size(); ++i) { auto& tr = arrays[i].trange(); From 959adb1bcafc9bef05b8f1eb05f9b21e3437d47a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Sep 2024 23:34:27 -0400 Subject: [PATCH 04/62] remove duplicate vlock copy in concat --- src/TiledArray/conversions/concat.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/TiledArray/conversions/concat.h b/src/TiledArray/conversions/concat.h index dd35e09456..e7b3e9da55 100644 --- a/src/TiledArray/conversions/concat.h +++ b/src/TiledArray/conversions/concat.h @@ -97,9 +97,6 @@ DistArray concat( result.make_tsrexpr(annot).block(tile_begin_end[i].first, tile_begin_end[i].second) = arrays[i].make_tsrexpr(annot); - result.make_tsrexpr(annot).block(tile_begin_end[i].first, - tile_begin_end[i].second) = - arrays[i].make_tsrexpr(annot); } } result.world().gop.fence(); From 4ed437c0ee1525a730725bf4906bccea3c390c69 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Sep 2024 23:35:10 -0400 Subject: [PATCH 05/62] introduced tile_ranges_match_trange(DistArray) for validating tile ranges against trange --- src/TiledArray/dist_array.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index c2645dd7ce..3bc9fe3c62 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1779,6 +1779,22 @@ auto rank(const DistArray& a) { return a.trange().tiles_range().rank(); } +/// Checks if for every tile `i` its range matches the tile range produced by +/// `a.trange()` + +/// @return `a.get(i)->range() == a.trange().make_tile_range(i)` for every tile +/// `i` +template +bool tile_ranges_match_trange(const DistArray& a) { + auto end = a.end(); + for (auto it = a.begin(); it != end; ++it) { + if (it->is_local() && !a.is_zero(it.index())) + if ((*it).get().range() != a.trange().make_tile_range(it.index())) + return false; + } + return true; +} + /// /// \brief Get the total elements in the non-zero tiles of an array. /// For tensor-of-tensor tiles, the total is the sum of the number of From 7f687b306faad027608dc915b1d1840b789aeb64 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Sep 2024 23:35:47 -0400 Subject: [PATCH 06/62] SizeArray is a viewable range --- src/TiledArray/size_array.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/size_array.h b/src/TiledArray/size_array.h index bd52139ce5..ef2ed1e121 100644 --- a/src/TiledArray/size_array.h +++ b/src/TiledArray/size_array.h @@ -26,6 +26,8 @@ #include #include +#include + namespace TiledArray { namespace detail { @@ -445,6 +447,20 @@ class SizeArray { }; // class SizeArray +} // namespace detail +} // namespace TiledArray + +namespace ranges { +template +inline constexpr bool enable_view> = true; +} // namespace ranges + +static_assert(ranges::range>); +static_assert( + ranges::viewable_range>); + +namespace TiledArray::detail { + template std::enable_if_t< is_sized_range_v> && @@ -473,7 +489,6 @@ inline std::ostream& operator<<(std::ostream& os, return os; } -} // namespace detail -} // namespace TiledArray +} // namespace TiledArray::detail #endif // TILEDARRAY_SIZE_ARRAY_H__INCLUDED From 65b8520945baebedfb90d2785bc42b3841d3b58c Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Sep 2024 23:38:19 -0400 Subject: [PATCH 07/62] fixed assignment to block expression from an expression with nonzero base --- src/TiledArray/expressions/expr.h | 13 +++++++++++-- tests/expressions_fixture.h | 19 +++++++++++++++++++ tests/expressions_impl.h | 29 +++++++++++++++++++++++++++++ tests/range_fixture.h | 2 +- 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index c3fdd6423b..8d52990eef 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -47,6 +47,9 @@ #include +#include +#include + namespace TiledArray::expressions { template @@ -509,8 +512,14 @@ class Expr { if (tsr.array().trange().tiles_range().volume() != 0) { // N.B. must deep copy TA_ASSERT(tsr.array().trange().tiles_range().includes(tsr.lower_bound())); - const container::svector shift = - tsr.array().trange().make_tile_range(tsr.lower_bound()).lobound(); + // N.B. this expression's range, + // dist_eval.trange().elements_range().lobound(), may not be zero! + const auto shift = + ranges::views::zip_with( + [](auto a, auto b) { return a - b; }, + tsr.array().trange().make_tile_range(tsr.lower_bound()).lobound(), + dist_eval.trange().elements_range().lobound()) | + ranges::to>(); std::shared_ptr shift_op = std::make_shared(shift_op_type(shift)); diff --git a/tests/expressions_fixture.h b/tests/expressions_fixture.h index 94c09a7449..7a7be4c9af 100644 --- a/tests/expressions_fixture.h +++ b/tests/expressions_fixture.h @@ -57,6 +57,8 @@ struct ExpressionsFixture : public TiledRangeFixture { ExpressionsFixture() : s_tr_1(make_random_sparseshape(tr)), s_tr_2(make_random_sparseshape(tr)), + s_tr_base1_1(make_random_sparseshape(tr_base1)), + s_tr_base1_2(make_random_sparseshape(tr_base1)), s_tr1_1(make_random_sparseshape(trange1)), s_tr1_2(make_random_sparseshape(trange1)), s_tr2(make_random_sparseshape(trange2)), @@ -65,6 +67,9 @@ struct ExpressionsFixture : public TiledRangeFixture { a(*GlobalFixture::world, tr, s_tr_1), b(*GlobalFixture::world, tr, s_tr_2), c(*GlobalFixture::world, tr, s_tr_2), + a_base1(*GlobalFixture::world, tr_base1, s_tr_base1_1), + b_base1(*GlobalFixture::world, tr_base1, s_tr_base1_2), + c_base1(*GlobalFixture::world, tr_base1, s_tr_base1_2), aC(*GlobalFixture::world, trangeC, s_trC), aC_f(*GlobalFixture::world, trangeC_f, s_trC_f), u(*GlobalFixture::world, trange1, s_tr1_1), @@ -72,12 +77,16 @@ struct ExpressionsFixture : public TiledRangeFixture { w(*GlobalFixture::world, trange2, s_tr2) { random_fill(a); random_fill(b); + random_fill(a_base1); + random_fill(b_base1); random_fill(u); random_fill(v); random_fill(aC); GlobalFixture::world->gop.fence(); a.truncate(); b.truncate(); + a_base1.truncate(); + b_base1.truncate(); u.truncate(); v.truncate(); } @@ -89,6 +98,9 @@ struct ExpressionsFixture : public TiledRangeFixture { : a(*GlobalFixture::world, tr), b(*GlobalFixture::world, tr), c(*GlobalFixture::world, tr), + a_base1(*GlobalFixture::world, tr_base1), + b_base1(*GlobalFixture::world, tr_base1), + c_base1(*GlobalFixture::world, tr_base1), u(*GlobalFixture::world, trange1), v(*GlobalFixture::world, trange1), w(*GlobalFixture::world, trange2), @@ -96,6 +108,8 @@ struct ExpressionsFixture : public TiledRangeFixture { aC_f(*GlobalFixture::world, trangeC_f) { random_fill(a); random_fill(b); + random_fill(a_base1); + random_fill(b_base1); random_fill(u); random_fill(v); random_fill(aC); @@ -229,6 +243,8 @@ struct ExpressionsFixture : public TiledRangeFixture { SparseShape s_tr_1; SparseShape s_tr_2; + SparseShape s_tr_base1_1; + SparseShape s_tr_base1_2; SparseShape s_tr1_1; SparseShape s_tr1_2; SparseShape s_tr2; @@ -237,6 +253,9 @@ struct ExpressionsFixture : public TiledRangeFixture { TArray a; TArray b; TArray c; + TArray a_base1; + TArray b_base1; + TArray c_base1; TArray u; TArray v; TArray w; diff --git a/tests/expressions_impl.h b/tests/expressions_impl.h index 268b118568..ca8027c03d 100644 --- a/tests/expressions_impl.h +++ b/tests/expressions_impl.h @@ -32,6 +32,7 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(tensor_factories, F, Fixtures, F) { auto& a = F::a; auto& c = F::c; auto& aC = F::aC; + auto& a_base1 = F::a_base1; const auto& ca = a; const std::array lobound{{3, 3, 3}}; @@ -66,6 +67,8 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(tensor_factories, F, Fixtures, F) { BOOST_CHECK_NO_THROW(c("a,b,c") = ca("a,b,c").block(iv(3, 3, 3), iv(5, 5, 5))); + BOOST_CHECK_NO_THROW(c("a,b,c") = a_base1("a,b,c").block(lobound, upbound)); + // make sure that c("abc") = a("abc") does a deep copy { BOOST_CHECK_NO_THROW(c("a,b,c") = a("a, b, c")); @@ -291,6 +294,7 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(block, F, Fixtures, F) { auto& a = F::a; auto& b = F::b; auto& c = F::c; + auto& a_base1 = F::a_base1; BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); @@ -683,6 +687,31 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_block, F, Fixtures, F) { } } +BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_block_base1, F, Fixtures, F) { + auto& a = F::a; + auto& b = F::b; + auto& c = F::c; + auto& a_base1 = F::a_base1; + auto& c_base1 = F::c_base1; + auto& ntiles = F::ntiles; + + c.fill_local(0.0); + c_base1.fill_local(0.0); + + BOOST_REQUIRE_NO_THROW(c("a,b,c").block({3, 3, 3}, {5, 5, 5}) = + a_base1("a,b,c").block({3, 3, 3}, {5, 5, 5})); + BOOST_REQUIRE(tile_ranges_match_trange(c)); + BOOST_REQUIRE_NO_THROW(c_base1("a,b,c").block({3, 3, 3}, {5, 5, 5}) = + a("a,b,c").block({3, 3, 3}, {5, 5, 5})); + BOOST_REQUIRE(tile_ranges_match_trange(c_base1)); + BOOST_REQUIRE_NO_THROW(c("a,b,c").block({0, 0, 0}, {ntiles, ntiles, ntiles}) = + a_base1("a,b,c")); + BOOST_REQUIRE(tile_ranges_match_trange(c)); + BOOST_REQUIRE_NO_THROW( + c_base1("a,b,c").block({0, 0, 0}, {ntiles, ntiles, ntiles}) = a("a,b,c")); + BOOST_REQUIRE(tile_ranges_match_trange(c_base1)); +} + BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_permute_block, F, Fixtures, F) { auto& a = F::a; diff --git a/tests/range_fixture.h b/tests/range_fixture.h index 6b0fcd1604..5a554eab7c 100644 --- a/tests/range_fixture.h +++ b/tests/range_fixture.h @@ -66,7 +66,7 @@ struct RangeFixture { struct Range1Fixture { using index1_type = Range1::index1_type; - static const size_t ntiles = 5; + static const inline size_t ntiles = 5; Range1Fixture() : tr1_hashmarks(make_hashmarks()), From d9eb67738424ad4adfbdf24f3ce2c88a2d4d171a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Sep 2024 11:24:53 -0400 Subject: [PATCH 08/62] [skip ci] to_container.hpp -> range/conversion.hpp to_container.hpp is deprecated --- src/TiledArray/expressions/expr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index 8d52990eef..f6d2ff1376 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -47,7 +47,7 @@ #include -#include +#include #include namespace TiledArray::expressions { From 6e5df0f44e79c93ef79a1356f3ba1480188356ba Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Sep 2024 11:52:45 -0400 Subject: [PATCH 09/62] device::Env::initialize: use correct page sizes for Umpire allocations + do not allocate anything at the start --- src/TiledArray/external/device.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/external/device.h b/src/TiledArray/external/device.h index 44d9c77a68..38bcbbc745 100644 --- a/src/TiledArray/external/device.h +++ b/src/TiledArray/external/device.h @@ -503,8 +503,7 @@ class Env { /// \param page_size memory added to the pools supporting `this->um_allocator()`, `this->device_allocator()`, and `this->pinned_allocator()` in chunks of at least /// this size (bytes) [default=2^25] /// \param pinned_alloc_limit the maximum total amount of memory (in bytes) that - /// allocator returned by `this->pinned_allocator()` can allocate; - /// this allocator is not used by default [default=0] + /// allocator returned by `this->pinned_allocator()` can allocate [default=2^40] // clang-format on static void initialize(World& world = TiledArray::get_default_world(), const std::uint64_t page_size = (1ul << 25), @@ -563,8 +562,9 @@ class Env { // allocate all currently-free memory for UM pool auto um_dynamic_pool = rm.makeAllocator( - "UMDynamicPool", rm.getAllocator("UM"), mem_total_free.second, - pinned_alloc_limit); + "UMDynamicPool", rm.getAllocator("UM"), + /* first_minimum_pool_allocation_size = */ 0, + /* next_minimum_pool_allocation_size = */ page_size); // allocate zero memory for device pool auto dev_size_limited_alloc = @@ -573,8 +573,9 @@ class Env { mem_total_free.first); auto dev_dynamic_pool = rm.makeAllocator( - "DEVICEDynamicPool", dev_size_limited_alloc, 0, - pinned_alloc_limit); + "DEVICEDynamicPool", dev_size_limited_alloc, + /* first_minimum_pool_allocation_size = */ 0, + /* next_minimum_pool_allocation_size = */ page_size); // allocate pinned_alloc_limit in pinned memory auto pinned_size_limited_alloc = @@ -584,7 +585,9 @@ class Env { auto pinned_dynamic_pool = rm.makeAllocator( "QuickPool_SizeLimited_PINNED", pinned_size_limited_alloc, - page_size, page_size, /* alignment */ TILEDARRAY_ALIGN_SIZE); + /* first_minimum_pool_allocation_size = */ 0, + /* next_minimum_pool_allocation_size = */ page_size, + /* alignment */ TILEDARRAY_ALIGN_SIZE); auto env = std::unique_ptr(new Env( world, num_visible_devices, compute_devices, num_streams_per_device, From bf89f5919675aa390377eccaf2cf932c2926f286 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Sep 2024 16:13:56 -0400 Subject: [PATCH 10/62] [skip ci] svd dox fixup --- src/TiledArray/math/linalg/non-distributed/svd.h | 10 +++++----- src/TiledArray/math/linalg/scalapack/svd.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/TiledArray/math/linalg/non-distributed/svd.h b/src/TiledArray/math/linalg/non-distributed/svd.h index e0094ef906..3e3608240e 100644 --- a/src/TiledArray/math/linalg/non-distributed/svd.h +++ b/src/TiledArray/math/linalg/non-distributed/svd.h @@ -34,16 +34,16 @@ namespace TiledArray::math::linalg::non_distributed { /** - * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * @brief Compute the singular value decomposition (SVD) via LAPACK * * A(i,j) = S(k) U(i,k) conj(V(j,k)) * * Example Usage: * - * auto S = svd (A, ...) - * auto [S, U] = svd (A, ...) - * auto [S, VT] = svd(A, ...) - * auto [S, U, VT] = svd (A, ...) + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) * * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * diff --git a/src/TiledArray/math/linalg/scalapack/svd.h b/src/TiledArray/math/linalg/scalapack/svd.h index dc68d374c5..aa9f459ba9 100644 --- a/src/TiledArray/math/linalg/scalapack/svd.h +++ b/src/TiledArray/math/linalg/scalapack/svd.h @@ -42,10 +42,10 @@ namespace TiledArray::math::linalg::scalapack { * * Example Usage: * - * auto S = svd (A, ...) - * auto [S, U] = svd (A, ...) - * auto [S, VT] = svd(A, ...) - * auto [S, U, VT] = svd (A, ...) + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) * * @tparam Array Input array type, must be convertible to BlockCyclicMatrix * From e78e231741882563167859cf13db4146f3e8df1d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 8 Sep 2024 15:50:17 -0400 Subject: [PATCH 11/62] TiledRange1{int x} constructs an empty element range at [x,x) --- src/TiledArray/tiled_range1.h | 13 ++++++++----- tests/tiled_range1.cpp | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 4824dec26e..102ea1bcc8 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -338,10 +338,11 @@ class TiledRange1 { /// Validates tile_boundaries template static void valid_(RandIter first, RandIter last) { - // Verify at least 2 elements are present if the vector is not empty. - TA_ASSERT((std::distance(first, last) >= 2) && - "TiledRange1 construction failed: You need at least 2 " - "elements in the tile boundary list."); + // Need at least 1 tile hashmark to position the element range + // (zero hashmarks is handled by the default ctor) + TA_ASSERT((std::distance(first, last) >= 1) && + "TiledRange1 construction failed: You need at least 1 " + "element in the tile boundary list."); // Verify the requirement that a0 <= a1 <= a2 <= ... for (; first != (last - 1); ++first) { TA_ASSERT( @@ -364,7 +365,9 @@ class TiledRange1 { valid_(first, last); #endif // NDEBUG range_.first = start_tile_index; - range_.second = start_tile_index + last - first - 1; + using std::distance; + range_.second = + start_tile_index + static_cast(distance(first, last)) - 1; elements_range_.first = *first; elements_range_.second = *(last - 1); for (; first != (last - 1); ++first) diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index f01a9a208e..056f752e33 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -63,6 +63,25 @@ BOOST_AUTO_TEST_CASE(constructor) { BOOST_CHECK_TA_ASSERT(r.tile(0), Exception); } + // check construction with single tile boundary (hence zero tiles) + { + { + BOOST_REQUIRE_NO_THROW(TiledRange1 r(0)); + TiledRange1 r(0); + BOOST_CHECK_EQUAL(r, TiledRange1{}); + } + { + BOOST_REQUIRE_NO_THROW(TiledRange1 r(1)); + TiledRange1 r(1); + BOOST_CHECK_NE(r, TiledRange1{}); + BOOST_CHECK_EQUAL(r.tiles_range().first, 0); + BOOST_CHECK_EQUAL(r.tiles_range().second, 0); + BOOST_CHECK_EQUAL(r.elements_range().first, 1); + BOOST_CHECK_EQUAL(r.elements_range().second, 1); + BOOST_CHECK_TA_ASSERT(r.tile(0), Exception); + } + } + // check construction with a iterators and the range info. { BOOST_REQUIRE_NO_THROW(TiledRange1 r(a.begin(), a.end())); @@ -200,7 +219,7 @@ BOOST_AUTO_TEST_CASE(constructor) { BOOST_CHECK_TA_ASSERT(TiledRange1 r(boundaries.begin(), boundaries.end()), Exception); BOOST_CHECK_TA_ASSERT(TiledRange1 r(a.begin(), a.begin()), Exception); - BOOST_CHECK_TA_ASSERT(TiledRange1 r(a.begin(), a.begin() + 1), Exception); + BOOST_CHECK_NO_THROW(TiledRange1 r(a.begin(), a.begin() + 1)); boundaries.push_back(2); boundaries.push_back(0); BOOST_CHECK_TA_ASSERT(TiledRange1 r(boundaries.begin(), boundaries.end()), From 8185cc539d4f7450b0d680707740aa69894629fd Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 8 Sep 2024 23:35:03 -0400 Subject: [PATCH 12/62] make Range1 printable and shiftable --- src/TiledArray/range1.h | 34 +++++++++++++++++++++++++++++++++- tests/range1.cpp | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/range1.h b/src/TiledArray/range1.h index dbb4b05a67..8b185936d4 100644 --- a/src/TiledArray/range1.h +++ b/src/TiledArray/range1.h @@ -32,7 +32,8 @@ namespace TiledArray { /// an integer range `[first,second)` /// @note previously represented by std::pair, hence the design struct Range1 { - typedef TA_1INDEX_TYPE index1_type; + using index1_type = TA_1INDEX_TYPE; + using signed_index1_type = std::make_signed_t; index1_type first = 0; index1_type second = 0; //< N.B. second >= first @@ -164,6 +165,31 @@ struct Range1 { /// @} + /// shifts this Range1 + + /// @param[in] shift the shift to apply + /// @return reference to this + Range1& inplace_shift(signed_index1_type shift) { + if (shift == 0) return *this; + // ensure that it's safe to shift + TA_ASSERT(shift <= 0 || upbound() <= 0 || + (shift <= (std::numeric_limits::max() - upbound()))); + TA_ASSERT(shift >= 0 || lobound() >= 0 || + (std::abs(shift) <= + (lobound() - std::numeric_limits::min()))); + first += shift; + second += shift; + return *this; + } + + /// creates a shifted Range1 + + /// @param[in] shift the shift value + /// @return a copy of this shifted by @p shift + [[nodiscard]] Range1 shift(signed_index1_type shift) const { + return Range1(*this).inplace_shift(shift); + } + template >>::type* = nullptr> @@ -190,6 +216,12 @@ inline void swap(Range1& r0, Range1& r1) { // no throw r0.swap(r1); } +/// Range1 ostream operator +inline std::ostream& operator<<(std::ostream& out, const Range1& rng) { + out << "[ " << rng.first << ", " << rng.second << " )"; + return out; +} + /// Test that two Range1 objects are congruent /// This function tests that the sizes of the two Range1 objects coincide. diff --git a/tests/range1.cpp b/tests/range1.cpp index ba49515cd7..f8d05ed4c0 100644 --- a/tests/range1.cpp +++ b/tests/range1.cpp @@ -137,6 +137,43 @@ BOOST_AUTO_TEST_CASE(comparison) { BOOST_CHECK(r1 != r4); } +BOOST_AUTO_TEST_CASE(shift) { + Range1 r0; + Range1 r0_plus_1; + BOOST_REQUIRE_NO_THROW(r0_plus_1 = r0.shift(1)); + BOOST_CHECK_EQUAL(r0_plus_1, Range1(1, 1)); + BOOST_REQUIRE_NO_THROW(r0_plus_1.inplace_shift(-1)); + BOOST_CHECK_EQUAL(r0_plus_1, r0); + + using index1_type = Range1::index1_type; + BOOST_CHECK_TA_ASSERT((Range1{std::numeric_limits::max() - 1, + std::numeric_limits::max()} + .inplace_shift(1)), + Exception); + BOOST_CHECK_TA_ASSERT((Range1{std::numeric_limits::min(), + std::numeric_limits::min() + 1} + .inplace_shift(-1)), + Exception); + Range1 tmp; + BOOST_CHECK_TA_ASSERT( + tmp = (Range1{std::numeric_limits::max() - 1, + std::numeric_limits::max()} + .shift(1)), + Exception); + BOOST_CHECK_TA_ASSERT( + tmp = (Range1{std::numeric_limits::min(), + std::numeric_limits::min() + 1} + .shift(-1)), + Exception); + + Range1 r1{1, 3}; + Range1 r1_minus_1; + BOOST_REQUIRE_NO_THROW(r1_minus_1 = r1.shift(-1)); + BOOST_CHECK_EQUAL(r1_minus_1, Range1(0, 2)); + BOOST_REQUIRE_NO_THROW(r1_minus_1.inplace_shift(1)); + BOOST_CHECK_EQUAL(r1_minus_1, r1); +} + BOOST_AUTO_TEST_CASE(serialization) { Range1 r{1, 10}; From f10d61b9bf9c80a2f024b1939b4941366463d564 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 8 Sep 2024 17:02:24 -0400 Subject: [PATCH 13/62] make TiledRange1 shiftable --- src/TiledArray/tiled_range1.h | 48 +++++++++++++++++++++++++++++++++++ tests/tiled_range1.cpp | 30 ++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 102ea1bcc8..9ea5769203 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -50,6 +50,7 @@ class TiledRange1 { public: using range_type = Range1; using index1_type = range_type::index1_type; + using signed_index1_type = range_type::signed_index1_type; using const_iterator = std::vector::const_iterator; /// Default constructor creates an empty range (tile and element ranges are @@ -305,6 +306,53 @@ class TiledRange1 { return make_uniform(Range1(0, range_extent), target_tile_size); } + /// shifts this TiledRange1 + + /// @param[in] shift the shift to apply + /// @return reference to this + TiledRange1& inplace_shift(signed_index1_type shift) { + if (shift == 0) return *this; + // ensure that it's safe to shift + TA_ASSERT(shift <= 0 || elements_range().upbound() <= 0 || + (shift <= (std::numeric_limits::max() - + elements_range().upbound()))); + TA_ASSERT(shift >= 0 || elements_range().lobound() >= 0 || + (std::abs(shift) <= (elements_range().lobound() - + std::numeric_limits::min()))); + elements_range_.inplace_shift(shift); + for (auto& tile : tiles_ranges_) { + tile.inplace_shift(shift); + } + elem2tile_.reset(); + return *this; + } + + /// creates a shifted TiledRange1 + + /// equivalent to (but more efficient than) `TiledRange1(*this).shift(shift)` + /// @param[in] shift the shift value + [[nodiscard]] TiledRange1 shift(signed_index1_type shift) const { + if (shift == 0) return *this; + // ensure that it's safe to shift + TA_ASSERT(shift <= 0 || elements_range().upbound() <= 0 || + (shift <= (std::numeric_limits::max() - + elements_range().upbound()))); + TA_ASSERT(shift >= 0 || elements_range().lobound() >= 0 || + (std::abs(shift) <= (elements_range().lobound() - + std::numeric_limits::min()))); + std::vector hashmarks; + hashmarks.reserve(tile_extent() + 1); + if (tiles_ranges_.empty()) + hashmarks.emplace_back(elements_range_.lobound() + shift); + else { + for (auto& t : tiles_ranges_) { + hashmarks.push_back(t.first + shift); + } + hashmarks.push_back(elements_range_.upbound() + shift); + } + return TiledRange1(hashmarks.begin(), hashmarks.end()); + } + /// swapper /// \param other the range with which the contents of this range will be diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index 056f752e33..2fe958bd2d 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -360,4 +360,34 @@ BOOST_AUTO_TEST_CASE(make_uniform) { (TiledRange1{0, 10, 20, 30, 40, 50, 59})); } +BOOST_AUTO_TEST_CASE(shift) { + TiledRange1 r0; + TiledRange1 r0_plus_1; + BOOST_REQUIRE_NO_THROW(r0_plus_1 = r0.shift(1)); + BOOST_CHECK_EQUAL(r0_plus_1, TiledRange1(1)); + BOOST_REQUIRE_NO_THROW(r0_plus_1.inplace_shift(-1)); + BOOST_CHECK_EQUAL(r0_plus_1, r0); + + BOOST_CHECK_TA_ASSERT( + TiledRange1{std::numeric_limits::max()}.inplace_shift(1), + Exception); + BOOST_CHECK_TA_ASSERT( + TiledRange1{std::numeric_limits::min()}.inplace_shift(-1), + Exception); + TiledRange1 tmp; + BOOST_CHECK_TA_ASSERT( + tmp = TiledRange1{std::numeric_limits::max()}.shift(1), + Exception); + BOOST_CHECK_TA_ASSERT( + tmp = TiledRange1{std::numeric_limits::min()}.shift(-1), + Exception); + + TiledRange1 r1{1, 3, 7, 9}; + TiledRange1 r1_minus_1; + BOOST_REQUIRE_NO_THROW(r1_minus_1 = r1.shift(-1)); + BOOST_CHECK_EQUAL(r1_minus_1, TiledRange1(0, 2, 6, 8)); + BOOST_REQUIRE_NO_THROW(r1_minus_1.inplace_shift(1)); + BOOST_CHECK_EQUAL(r1_minus_1, r1); +} + BOOST_AUTO_TEST_SUITE_END() From ddce13e607f83504ab2878f8c57b89f725196b94 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 8 Sep 2024 23:37:24 -0400 Subject: [PATCH 14/62] TiledRange1 printer reimplemented in terms of Range1 printer --- src/TiledArray/tiled_range1.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 9ea5769203..46c4b37adc 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -485,10 +485,8 @@ inline bool operator!=(const TiledRange1& r1, const TiledRange1& r2) { /// TiledRange1 ostream operator inline std::ostream& operator<<(std::ostream& out, const TiledRange1& rng) { - out << "( tiles = [ " << rng.tiles_range().first << ", " - << rng.tiles_range().second << " ), elements = [ " - << rng.elements_range().first << ", " << rng.elements_range().second - << " ) )"; + out << "( tiles = " << rng.tiles_range() + << ", elements = " << rng.elements_range() << " )"; return out; } From e00554832f49fa06c51363e49c25e35c46b8ebd6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 9 Sep 2024 00:00:28 -0400 Subject: [PATCH 15/62] [skip ci] dox++ --- src/TiledArray/range.h | 8 ++++---- src/TiledArray/tile.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 25e4852118..c3ce5aa7f7 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -949,7 +949,7 @@ class Range { return *this; } - /// Shift the lower and upper bound of this range + /// Shifts the lower and upper bounds of this range /// \tparam Index An integral range type /// \param bound_shift The shift to be applied to the range @@ -987,7 +987,7 @@ class Range { return *this; } - /// Shift the lower and upper bound of this range + /// Shifts the lower and upper bounds of this range /// \tparam Index An integral type /// \param bound_shift The shift to be applied to the range @@ -998,7 +998,7 @@ class Range { return inplace_shift>(bound_shift); } - /// Create a Range with shiften lower and upper bounds + /// Create a Range with shifted lower and upper bounds /// \tparam Index An integral range type /// \param bound_shift The shift to be applied to the range @@ -1011,7 +1011,7 @@ class Range { return result; } - /// Create a Range with shiften lower and upper bounds + /// Create a Range with shifted lower and upper bounds /// \tparam Index An integral type /// \param bound_shift The shift to be applied to the range diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 7d568f7200..b8c62d95b8 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -39,19 +39,19 @@ namespace TiledArray { /// object to be used in TiledArray expressions, users must also define the /// following functions: /// \li \c add -/// \li \c add_to +/// \li \c add_to (in-place add) /// \li \c subt -/// \li \c subt_to +/// \li \c subt_to (in-place subt) /// \li \c mult -/// \li \c mult_to +/// \li \c mult_to (in-place mult) /// \li \c scale -/// \li \c scale_to +/// \li \c scale_to (in-place scale) /// \li \c gemm /// \li \c neg /// \li \c permute /// \li \c empty /// \li \c shift -/// \li \c shift_to +/// \li \c shift_to (in-place shift) /// \li \c trace /// \li \c sum /// \li \c product From ffd81511bc06704aaddc6ac5698fab1220cb48e6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 9 Sep 2024 00:00:04 -0400 Subject: [PATCH 16/62] Range::shift is const --- src/TiledArray/range.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index c3ce5aa7f7..1363d6b992 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -1005,7 +1005,7 @@ class Range { /// \return A shifted copy of this range template >> - Range_ shift(const Index& bound_shift) { + Range_ shift(const Index& bound_shift) const { Range_ result(*this); result.inplace_shift(bound_shift); return result; @@ -1018,7 +1018,7 @@ class Range { /// \return A shifted copy of this range template >> - Range_ shift(const std::initializer_list& bound_shift) { + Range_ shift(const std::initializer_list& bound_shift) const { Range_ result(*this); result.inplace_shift(bound_shift); return result; From cb9f08503787195dda91ff830f099a9524938498 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 9 Sep 2024 00:22:31 -0400 Subject: [PATCH 17/62] Range::shift is nodiscard --- src/TiledArray/range.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/range.h b/src/TiledArray/range.h index 1363d6b992..cdebd7ddfc 100644 --- a/src/TiledArray/range.h +++ b/src/TiledArray/range.h @@ -1005,7 +1005,7 @@ class Range { /// \return A shifted copy of this range template >> - Range_ shift(const Index& bound_shift) const { + [[nodiscard]] Range_ shift(const Index& bound_shift) const { Range_ result(*this); result.inplace_shift(bound_shift); return result; @@ -1018,7 +1018,8 @@ class Range { /// \return A shifted copy of this range template >> - Range_ shift(const std::initializer_list& bound_shift) const { + [[nodiscard]] Range_ shift( + const std::initializer_list& bound_shift) const { Range_ result(*this); result.inplace_shift(bound_shift); return result; From 57907cc6e0df335a129b35072d9828b68855c141 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 9 Sep 2024 00:23:18 -0400 Subject: [PATCH 18/62] TiledRange is shiftable --- src/TiledArray/tiled_range.h | 55 ++++++++++++++++++++++++++++++++++++ tests/tiled_range.cpp | 11 ++++++++ 2 files changed, 66 insertions(+) diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h index 27e559da1c..bfcd4c86fc 100644 --- a/src/TiledArray/tiled_range.h +++ b/src/TiledArray/tiled_range.h @@ -324,6 +324,61 @@ class TiledRange { std::swap(ranges_, other.ranges_); } + /// Shifts the lower and upper bounds of this range + + /// \tparam Index An integral range type + /// \param bound_shift The shift to be applied to the range + /// \return A reference to this range + template >> + TiledRange_& inplace_shift(const Index& bound_shift) { + elements_range_.inplace_shift(bound_shift); + using std::begin; + auto bound_shift_it = begin(bound_shift); + for (std::size_t d = 0; d != rank(); ++d, ++bound_shift_it) { + ranges_[d].inplace_shift(*bound_shift_it); + } + return *this; + } + + /// Shifts the lower and upper bound of this range + + /// \tparam Index An integral type + /// \param bound_shift The shift to be applied to the range + /// \return A reference to this range + template >> + TiledRange_& inplace_shift(const std::initializer_list& bound_shift) { + return inplace_shift>(bound_shift); + } + + /// Create a TiledRange with shifted lower and upper bounds + + /// \tparam Index An integral range type + /// \param bound_shift The shift to be applied to the range + /// \return A shifted copy of this range + template >> + [[nodiscard]] TiledRange_ shift(const Index& bound_shift) const { + TiledRange_ result(*this); + result.inplace_shift(bound_shift); + return result; + } + + /// Create a TiledRange with shifted lower and upper bounds + + /// \tparam Index An integral type + /// \param bound_shift The shift to be applied to the range + /// \return A shifted copy of this range + template >> + [[nodiscard]] TiledRange_ shift( + const std::initializer_list& bound_shift) const { + TiledRange_ result(*this); + result.inplace_shift(bound_shift); + return result; + } + template >>::type* = nullptr> diff --git a/tests/tiled_range.cpp b/tests/tiled_range.cpp index 76702831a3..577b395927 100644 --- a/tests/tiled_range.cpp +++ b/tests/tiled_range.cpp @@ -155,6 +155,17 @@ BOOST_AUTO_TEST_CASE(permutation) { r1); // check that the permutation was assigned correctly. } +BOOST_AUTO_TEST_CASE(shift) { + TiledRange tr1 = tr; + const auto shift = std::vector(GlobalFixture::dim, 1); + BOOST_CHECK_NO_THROW(tr1.inplace_shift(shift)); + BOOST_CHECK_EQUAL(tr1.tiles_range(), tr.tiles_range()); + BOOST_CHECK_EQUAL(tr1.elements_range(), tr.elements_range().shift(shift)); + TiledRange tr1_copy; + BOOST_CHECK_NO_THROW(tr1_copy = tr.shift(shift)); + BOOST_CHECK_EQUAL(tr1, tr1_copy); +} + BOOST_AUTO_TEST_CASE(make_tiles_range) { tile_index start(GlobalFixture::dim); tile_index finish(GlobalFixture::dim); From 6e8624ac59d66204cf9e06143ce7c8ad1ffe7617 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 11 Sep 2024 15:53:25 -0400 Subject: [PATCH 19/62] introduced BlkTsrExpr::{{set_,}trange_lobound,preserve_lobound}() that allow to use block tensor expressions even with DistArrays that have non-zero lobound --- src/TiledArray/expressions/blk_tsr_engine.h | 54 +++++-- src/TiledArray/expressions/blk_tsr_expr.h | 36 +++++ src/TiledArray/expressions/expr.h | 11 ++ src/TiledArray/expressions/fwd.h | 13 +- src/TiledArray/expressions/tsr_expr.h | 148 ++++++++++++++++++-- tests/expressions_impl.h | 52 +++++++ 6 files changed, 293 insertions(+), 21 deletions(-) diff --git a/src/TiledArray/expressions/blk_tsr_engine.h b/src/TiledArray/expressions/blk_tsr_engine.h index e85aac7925..9b6e750bb5 100644 --- a/src/TiledArray/expressions/blk_tsr_engine.h +++ b/src/TiledArray/expressions/blk_tsr_engine.h @@ -158,22 +158,29 @@ class BlkTsrEngineBase : public LeafEngine { using LeafEngine_::array_; container::svector - lower_bound_; ///< Lower bound of the tile block + lower_bound_; ///< Tile coordinates of the lower bound of the tile block + ///< in the host array container::svector - upper_bound_; ///< Upper bound of the tile block + upper_bound_; ///< Tile coordinates of the upper bound of the tile block + ///< in the host array + std::optional + trange_lobound_; ///< Lobound of the result trange, modulo permutation + ///< (i.e. referring to the modes of the host array) public: template BlkTsrEngineBase(const BlkTsrExpr& expr) : LeafEngine_(expr), lower_bound_(expr.lower_bound()), - upper_bound_(expr.upper_bound()) {} + upper_bound_(expr.upper_bound()), + trange_lobound_(expr.trange_lobound()) {} template BlkTsrEngineBase(const ScalBlkTsrExpr& expr) : LeafEngine_(expr), lower_bound_(expr.lower_bound()), - upper_bound_(expr.upper_bound()) {} + upper_bound_(expr.upper_bound()), + trange_lobound_(expr.trange_lobound()) {} /// Non-permuting tiled range factory function @@ -199,9 +206,12 @@ class BlkTsrEngineBase : public LeafEngine { if (lower_d != upper_d) { auto i = lower_d; const auto base_d = trange[d].tile(i).first; - trange1_data.emplace_back(0ul); + const auto trange1_lobound = + trange_lobound_ ? (*trange_lobound_)[d] : 0ul; + trange1_data.emplace_back(trange1_lobound); for (; i < upper_d; ++i) - trange1_data.emplace_back(trange[d].tile(i).second - base_d); + trange1_data.emplace_back(trange[d].tile(i).extent() + + trange1_data.back()); // Add the trange1 to the tiled range data trange_data.emplace_back(trange1_data.begin(), trange1_data.end()); trange1_data.resize(0ul); @@ -241,9 +251,12 @@ class BlkTsrEngineBase : public LeafEngine { // Copy, shift, and permute the tiling of the block auto i = lower_i; const auto base_d = trange[inv_perm_d].tile(i).first; - trange1_data.emplace_back(0ul); + const auto trange1_lobound = + trange_lobound_ ? (*trange_lobound_)[inv_perm_d] : 0ul; + trange1_data.emplace_back(trange1_lobound); for (; i < upper_i; ++i) - trange1_data.emplace_back(trange[inv_perm_d].tile(i).second - base_d); + trange1_data.emplace_back(trange[inv_perm_d].tile(i).extent() + + trange1_data.back()); // Add the trange1 to the tiled range data trange_data.emplace_back(trange1_data.begin(), trange1_data.end()); @@ -341,6 +354,7 @@ class BlkTsrEngine protected: // Import base class variables to this scope using BlkTsrEngineBase_::lower_bound_; + using BlkTsrEngineBase_::trange_lobound_; using BlkTsrEngineBase_::upper_bound_; using ExprEngine_::implicit_permute_inner_; using ExprEngine_::implicit_permute_outer_; @@ -391,8 +405,12 @@ class BlkTsrEngine const auto lower_d = lower[d]; const auto upper_d = upper[d]; if (lower_d != upper_d) { + // element lobound of the block in the host const auto base_d = trange[d].tile(lower_d).first; - range_shift.emplace_back(-base_d); + // element lobound of the target of this expression + const auto target_base_d = + trange_lobound_ ? (*trange_lobound_)[d] : 0ul; + range_shift.emplace_back(target_base_d - base_d); } else { range_shift.emplace_back(0l); } @@ -427,8 +445,11 @@ class BlkTsrEngine const auto lower_d = lower[d]; const auto upper_d = upper[d]; if (lower_d != upper_d) { + // element lobound of the block in the host const auto base_d = trange[d].tile(lower_d).first; - range_shift[perm_d] = -base_d; + // element lobound of the target of this expression + const auto target_base_d = trange_lobound_ ? (*trange_lobound_)[d] : 0; + range_shift[perm_d] = target_base_d - base_d; } } @@ -496,6 +517,7 @@ class ScalBlkTsrEngine protected: // Import base class variables to this scope using BlkTsrEngineBase_::lower_bound_; + using BlkTsrEngineBase_::trange_lobound_; using BlkTsrEngineBase_::upper_bound_; using ExprEngine_::implicit_permute_inner_; using ExprEngine_::implicit_permute_outer_; @@ -549,8 +571,12 @@ class ScalBlkTsrEngine const auto lower_d = lower[d]; const auto upper_d = upper[d]; if (lower_d != upper_d) { + // element lobound of the block in the host const auto base_d = trange[d].tile(lower_d).first; - range_shift.emplace_back(-base_d); + // element lobound of the target of this expression + const auto target_base_d = + trange_lobound_ ? (*trange_lobound_)[d] : 0ul; + range_shift.emplace_back(target_base_d - base_d); } else range_shift.emplace_back(0); } @@ -584,8 +610,12 @@ class ScalBlkTsrEngine const auto lower_d = lower[d]; const auto upper_d = upper[d]; if (lower_d != upper_d) { + // element lobound of the block in the host const auto base_d = trange[d].tile(lower_d).first; - range_shift[perm_d] = -base_d; + // element lobound of the target of this expression + const auto target_base_d = + trange_lobound_ ? (*trange_lobound_)[d] : 0ul; + range_shift[perm_d] = target_base_d - base_d; } } diff --git a/src/TiledArray/expressions/blk_tsr_expr.h b/src/TiledArray/expressions/blk_tsr_expr.h index 5d6612d5cc..661e2ff666 100644 --- a/src/TiledArray/expressions/blk_tsr_expr.h +++ b/src/TiledArray/expressions/blk_tsr_expr.h @@ -32,6 +32,8 @@ #include #include "blk_tsr_engine.h" +#include + namespace TiledArray { namespace expressions { @@ -118,6 +120,10 @@ class BlkTsrExprBase : public Expr { lower_bound_; ///< Lower bound of the tile block container::svector upper_bound_; ///< Upper bound of the tile block + /// If non-null, element lobound of the expression trange (else zeros will be + /// used) Fusing permutation does not affect this (i.e. this refers to the + /// modes of the host array). + std::optional trange_lobound_; void check_valid() const { TA_ASSERT(array_); @@ -285,6 +291,36 @@ class BlkTsrExprBase : public Expr { /// \return The block upper bound const auto& upper_bound() const { return upper_bound_; } + /// Sets result trange lobound + /// @param[in] trange_lobound The result trange lobound + template >> + Derived& set_trange_lobound(const Index1& trange_lobound) { + trange_lobound_.emplace(std::begin(trange_lobound), + std::end(trange_lobound)); + return static_cast(*this); + } + + /// Sets result trange lobound + /// @param[in] trange_lobound The result trange lobound + template >> + Derived& set_trange_lobound(std::initializer_list trange_lobound) { + return this->set_trange_lobound>( + trange_lobound); + } + + /// Sets result trange lobound such that the tile lobounds are not changed + Derived& preserve_lobound() { + return set_trange_lobound( + array_.trange().make_tile_range(lower_bound()).lobound()); + } + + /// @return optional to result trange lobound; if null, the result trange + /// lobound is zero + const auto& trange_lobound() const { return trange_lobound_; } + }; // class BlkTsrExprBase /// Block expression diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index f6d2ff1376..8e3f925310 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -47,6 +47,7 @@ #include +#include #include #include @@ -464,6 +465,16 @@ class Expr { // set even though this is a requirement. #endif // NDEBUG + // Assignment to block expression uses trange of the array it is bounded to + // Assert that the user did not try to override the trange by accident using + // set_trange_lobound or at least that it matches tsr.array's trange + TA_ASSERT(!tsr.trange_lobound().has_value() || + (ranges::equal(tsr.trange_lobound().value(), + tsr.array() + .trange() + .make_tile_range(tsr.lower_bound()) + .lobound()))); + // Get the target world. World& world = tsr.array().world(); diff --git a/src/TiledArray/expressions/fwd.h b/src/TiledArray/expressions/fwd.h index 7960baf648..1d234b6dc5 100644 --- a/src/TiledArray/expressions/fwd.h +++ b/src/TiledArray/expressions/fwd.h @@ -28,7 +28,6 @@ #include - namespace TiledArray::expressions { template @@ -43,6 +42,10 @@ class BlkTsrExpr; template class ScalBlkTsrExpr; +/// used to indicate that block tensor expression should preserve the underlying +/// tensor's trange lobound +struct preserve_lobound_t {}; + template struct is_aliased : std::true_type {}; @@ -68,6 +71,14 @@ class ScalTsrExpr; template class ScalTsrEngine; +} // namespace TiledArray::expressions + +namespace TiledArray { + +/// used to tag block tensor expression methods that preserve the underlying +/// tensor's trange lobound +inline constexpr expressions::preserve_lobound_t preserve_lobound; + } // namespace TiledArray #endif // TILEDARRAY_EXPRESSIONS_FWD_H__INCLUDED diff --git a/src/TiledArray/expressions/tsr_expr.h b/src/TiledArray/expressions/tsr_expr.h index 8430a3c852..68e036f4c4 100644 --- a/src/TiledArray/expressions/tsr_expr.h +++ b/src/TiledArray/expressions/tsr_expr.h @@ -197,7 +197,7 @@ class TsrExpr : public Expr> { return TsrExpr(array(), annotation_); } - /// immutable Block expression factory + /// makes an immutable Block expression /// \tparam Index1 An integral range type /// \tparam Index2 An integral range type @@ -213,7 +213,26 @@ class TsrExpr : public Expr> { upper_bound); } - /// immutable Block expression factory + /// makes an immutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index1 An integral range type + /// \tparam Index2 An integral range type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + TiledArray::detail::is_integral_range_v>> + BlkTsrExpr block(const Index1& lower_bound, + const Index2& upper_bound, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// makes an immutable Block expression /// \tparam Index1 An integral type /// \tparam Index2 An integral type @@ -229,7 +248,26 @@ class TsrExpr : public Expr> { upper_bound); } - /// immutable Block expression factory + /// makes an immutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index1 An integral type + /// \tparam Index2 An integral type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + std::is_integral_v>> + BlkTsrExpr block( + const std::initializer_list& lower_bound, + const std::initializer_list& upper_bound, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// makes an immutable Block expression /// \tparam PairRange Type representing a range of generalized pairs (see /// TiledArray::detail::is_gpair_v ) \param bounds The {lower,upper} bounds of @@ -241,7 +279,22 @@ class TsrExpr : public Expr> { return BlkTsrExpr(array_, annotation_, bounds); } - /// immutable Block expression factory + /// makes an immutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam PairRange Type representing a range of generalized pairs (see + /// TiledArray::detail::is_gpair_v ) \param bounds The {lower,upper} bounds of + /// the block + template >> + BlkTsrExpr block(const PairRange& bounds, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + + /// makes an immutable Block expression /// \tparam Index An integral type /// \param bounds The {lower,upper} bounds of the block @@ -252,7 +305,21 @@ class TsrExpr : public Expr> { return BlkTsrExpr(array_, annotation_, bounds); } - /// mutable Block expression factory + /// makes an immutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index An integral type + /// \param bounds The {lower,upper} bounds of the block + template >> + BlkTsrExpr block( + const std::initializer_list>& bounds, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + + /// makes a mutable Block expression /// \tparam Index1 An integral range type /// \tparam Index2 An integral range type @@ -268,7 +335,26 @@ class TsrExpr : public Expr> { upper_bound); } - /// mutable Block expression factory + /// makes a mutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index1 An integral range type + /// \tparam Index2 An integral range type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + TiledArray::detail::is_integral_range_v>> + BlkTsrExpr block(const Index1& lower_bound, + const Index2& upper_bound, + preserve_lobound_t) { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// makes a mutable Block expression /// \tparam Index1 An integral type /// \tparam Index2 An integral type @@ -284,7 +370,25 @@ class TsrExpr : public Expr> { upper_bound); } - /// mutable Block expression factory + /// makes a mutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index1 An integral type + /// \tparam Index2 An integral type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + std::is_integral_v>> + BlkTsrExpr block( + const std::initializer_list& lower_bound, + const std::initializer_list& upper_bound, preserve_lobound_t) { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// makes a mutable Block expression /// \tparam PairRange Type representing a range of generalized pairs (see /// TiledArray::detail::is_gpair_v ) \param bounds The {lower,upper} bounds of @@ -296,7 +400,21 @@ class TsrExpr : public Expr> { return BlkTsrExpr(array_, annotation_, bounds); } - /// mutable Block expression factory + /// makes a mutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam PairRange Type representing a range of generalized pairs (see + /// TiledArray::detail::is_gpair_v ) \param bounds The {lower,upper} bounds of + /// the block + template >> + BlkTsrExpr block(const PairRange& bounds, preserve_lobound_t) { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + + /// makes a mutable Block expression /// \tparam Index An integral type /// \param bounds The {lower,upper} bounds of the block @@ -307,6 +425,20 @@ class TsrExpr : public Expr> { return BlkTsrExpr(array_, annotation_, bounds); } + /// makes a mutable Block expression that preserves the underlying tensor's + /// trange lobound + + /// \tparam Index An integral type + /// \param bounds The {lower,upper} bounds of the block + template >> + BlkTsrExpr block( + const std::initializer_list>& bounds, + preserve_lobound_t) { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + /// Conjugated-tensor expression factor /// \return A conjugated expression object diff --git a/tests/expressions_impl.h b/tests/expressions_impl.h index ca8027c03d..e7c781ccc6 100644 --- a/tests/expressions_impl.h +++ b/tests/expressions_impl.h @@ -619,6 +619,7 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_block, F, Fixtures, F) { for (int repeat = 0; repeat != nrepeats; ++repeat) BOOST_REQUIRE_NO_THROW(c("a,b,c").block({3, 3, 3}, {5, 5, 5}) = 2 * a("a,b,c").block({3, 3, 3}, {5, 5, 5})); + BOOST_REQUIRE(tile_ranges_match_trange(c)); BlockRange block_range(a.trange().tiles_range(), {3, 3, 3}, {5, 5, 5}); @@ -698,18 +699,69 @@ BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_block_base1, F, Fixtures, F) { c.fill_local(0.0); c_base1.fill_local(0.0); + // block expressions by default have trange lobound (=base) set to 0 ... + // this is done to allow block expressions involving multiple arrays with + // different lobounds all work correctly BOOST_REQUIRE_NO_THROW(c("a,b,c").block({3, 3, 3}, {5, 5, 5}) = a_base1("a,b,c").block({3, 3, 3}, {5, 5, 5})); BOOST_REQUIRE(tile_ranges_match_trange(c)); BOOST_REQUIRE_NO_THROW(c_base1("a,b,c").block({3, 3, 3}, {5, 5, 5}) = a("a,b,c").block({3, 3, 3}, {5, 5, 5})); BOOST_REQUIRE(tile_ranges_match_trange(c_base1)); + BOOST_REQUIRE_NO_THROW(c_base1("a,b,c").block({3, 3, 3}, {5, 5, 5}) = + a_base1("a,b,c").block({3, 3, 3}, {5, 5, 5})); + BOOST_REQUIRE(tile_ranges_match_trange(c_base1)); BOOST_REQUIRE_NO_THROW(c("a,b,c").block({0, 0, 0}, {ntiles, ntiles, ntiles}) = a_base1("a,b,c")); BOOST_REQUIRE(tile_ranges_match_trange(c)); BOOST_REQUIRE_NO_THROW( c_base1("a,b,c").block({0, 0, 0}, {ntiles, ntiles, ntiles}) = a("a,b,c")); BOOST_REQUIRE(tile_ranges_match_trange(c_base1)); + + // however user can override the trange lobound using set_trange_lobound + { + decltype(F::c) a_block; + // default trange lobound is 0 + BOOST_REQUIRE_NO_THROW(a_block("a,b,c") = + a_base1("a,b,c").block({3, 3, 3}, {5, 5, 5})); + BOOST_REQUIRE_EQUAL(a_block.trange().elements_range().lobound(), + (Range::index_type{0, 0, 0})); + + // this preserves tile's lobounds, so that tile {0,0,0} in a_block has + // identical range to that of tile {3, 3, 3} in a_base1 + BOOST_REQUIRE_NO_THROW(a_block("a,b,c") = a_base1("a,b,c").block( + {3, 3, 3}, {5, 5, 5}, preserve_lobound)); + BOOST_REQUIRE_EQUAL(a_block.trange().elements_range().lobound(), + a_base1.trange().make_tile_range({3, 3, 3}).lobound()); + // this explicitly makes the trange lobound of a_block to be {1,1,1} + BOOST_REQUIRE_NO_THROW(a_block("a,b,c") = + a("a,b,c") + .block({3, 3, 3}, {5, 5, 5}) + .set_trange_lobound({1, 1, 1})); + BOOST_REQUIRE_EQUAL(a_block.trange().elements_range().lobound(), + Range::index_type({1, 1, 1})); + // trange of source block is ignored when it is assigned to a block of an + // existing array + BOOST_REQUIRE_NO_THROW(a_block("a,b,c").block({0, 0, 0}, {2, 2, 2}) = + a_base1("a,b,c") + .block({3, 3, 3}, {5, 5, 5}) + .set_trange_lobound({0, 0, 0})); + // overriding trange of result block is not allowed ... + BOOST_REQUIRE_THROW( + a_block("a,b,c") + .block({0, 0, 0}, {2, 2, 2}) + .set_trange_lobound({0, 0, 0}) = a_base1("a,b,c") + .block({3, 3, 3}, {5, 5, 5}) + .set_trange_lobound({0, 0, 0}), + Exception); + // ... unless makes it same as trange lobound of the underlying array + BOOST_REQUIRE_NO_THROW(a_block("a,b,c") + .block({0, 0, 0}, {2, 2, 2}) + .set_trange_lobound({1, 1, 1}) = + a_base1("a,b,c") + .block({3, 3, 3}, {5, 5, 5}) + .set_trange_lobound({0, 0, 0})); + } } BOOST_FIXTURE_TEST_CASE_TEMPLATE(assign_subblock_permute_block, F, Fixtures, From 5cc3ce3e2c76f12749be0f3e64c44d1a40f56484 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 13 Sep 2024 05:53:48 -0400 Subject: [PATCH 20/62] dox fixup [skip ci] --- src/TiledArray/tiled_range1.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 46c4b37adc..8cc830046b 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -523,9 +523,8 @@ inline TiledRange1 concat(const TiledRange1& r1, const TiledRange1& r2) { /// Test that two TiledRange1 objects are congruent /// This function tests that the tile sizes of the two ranges coincide. -/// \tparam Range The range type -/// \param r1 an TiledRange1 object -/// \param r2 an TiledRange1 object +/// \param r1 a TiledRange1 object +/// \param r2 a TiledRange1 object inline bool is_congruent(const TiledRange1& r1, const TiledRange1& r2) { return r1.tile_extent() == r2.tile_extent() && std::equal(r1.begin(), r1.end(), r2.begin(), From 2527e8057d360fe57ab37459dac384234a4efc64 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 13 Sep 2024 06:06:37 -0400 Subject: [PATCH 21/62] introduced TiledRange::is_congruent --- src/TiledArray/tiled_range.h | 13 +++++++++++++ tests/tiled_range.cpp | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/TiledArray/tiled_range.h b/src/TiledArray/tiled_range.h index bfcd4c86fc..fb73512560 100644 --- a/src/TiledArray/tiled_range.h +++ b/src/TiledArray/tiled_range.h @@ -423,6 +423,19 @@ inline bool operator==(const TiledRange& r1, const TiledRange& r2) { std::equal(r1.data().begin(), r1.data().end(), r2.data().begin()); } +/// Test that two TiledRange objects are congruent + +/// Two tranges are congruent if one is a translation of another (i.e. their +/// ranks and extents of all tiles) agree \param r1 a TiledRange object \param +/// r2 a TiledRange object +inline bool is_congruent(const TiledRange& r1, const TiledRange& r2) { + return r1.rank() == r2.rank() && + std::equal(r1.begin(), r1.end(), r2.begin(), + [](const auto& tr1_1, const auto& tr1_2) { + return is_congruent(tr1_1, tr1_2); + }); +} + inline bool operator!=(const TiledRange& r1, const TiledRange& r2) { return !operator==(r1, r2); } diff --git a/tests/tiled_range.cpp b/tests/tiled_range.cpp index 577b395927..eb557b761f 100644 --- a/tests/tiled_range.cpp +++ b/tests/tiled_range.cpp @@ -119,6 +119,7 @@ BOOST_AUTO_TEST_CASE(comparison) { TiledRange r1{{0, 2, 4, 6, 8, 10}, {0, 2, 4, 6, 8, 10}}; TiledRange r2{{0, 2, 4, 6, 8, 10}, {0, 2, 4, 6, 8, 10}}; TiledRange r3{{0, 3, 6, 9, 12, 15}, {0, 3, 6, 9, 12, 15}}; + BOOST_CHECK(r1 == r1); // self-comparison BOOST_CHECK(r1 == r2); // check equality operator BOOST_CHECK(!(r1 != r2)); // check not-equal operator BOOST_CHECK( @@ -126,6 +127,18 @@ BOOST_AUTO_TEST_CASE(comparison) { BOOST_CHECK(r1 != r3); } +BOOST_AUTO_TEST_CASE(congruency) { + TiledRange r1{{0, 2, 4, 6, 8, 10}, {0, 2, 4, 6, 8, 10}}; + TiledRange r2{{1, 3, 5, 7, 9, 11}, {2, 4, 6, 8, 10, 12}}; + TiledRange r3{{0, 3, 6, 9, 12, 15}, {0, 3, 6, 9, 12, 15}}; + BOOST_CHECK(r1 == r1 && is_congruent(r1, r1)); // congruent with self + BOOST_CHECK(r1 != r2 && + is_congruent(r1, r2)); // r1 and r2 are not equal but congruent + BOOST_CHECK( + r1 != r3 && + !is_congruent(r1, r3)); // r1 and r3 are not equal and not congruent +} + BOOST_AUTO_TEST_CASE(assignment) { TiledRange r1; From 1471c8b94f94aec6abecf96419527e3887f4689a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 13 Sep 2024 06:07:29 -0400 Subject: [PATCH 22/62] like contraction, reduction expression and binary expression can ignore absolute positions if ignore_tile_position() is on --- src/TiledArray/expressions/binary_engine.h | 22 +++++++++++++++++----- src/TiledArray/expressions/expr.h | 17 +++++++++++++++-- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/expressions/binary_engine.h b/src/TiledArray/expressions/binary_engine.h index 33318b57a6..486c5421a1 100644 --- a/src/TiledArray/expressions/binary_engine.h +++ b/src/TiledArray/expressions/binary_engine.h @@ -235,18 +235,30 @@ class BinaryEngine : public ExprEngine { left_.init_struct(left_indices_); right_.init_struct(right_indices_); #ifndef NDEBUG - if (left_.trange() != right_.trange()) { + if (ignore_tile_position()) { + if (!is_congruent(left_.trange(), right_.trange())) { + if (TiledArray::get_default_world().rank() == 0) { + TA_USER_ERROR_MESSAGE( + "The TiledRanges of the left- and right-hand arguments the " + "binary " + "expression are not congruent:" + << "\n left = " << left_.trange() + << "\n right = " << right_.trange()); + } + TA_EXCEPTION( + "The TiledRange objects of a binary expression are not congruent."); + } + } else if (left_.trange() != right_.trange()) { if (TiledArray::get_default_world().rank() == 0) { TA_USER_ERROR_MESSAGE( - "The TiledRanges of the left- and right-hand arguments of the " - "binary operation are not equal:" + "The TiledRanges of the left- and right-hand arguments the binary " + "expression are not equal:" << "\n left = " << left_.trange() << "\n right = " << right_.trange()); } TA_EXCEPTION( - "The TiledRanges of the left- and right-hand arguments " - "of the binary operation are not equal."); + "The TiledRange objects of a binary expression are not equal."); } #endif // NDEBUG ExprEngine_::init_struct(target_indices); diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index 8e3f925310..3b1e9f43be 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -664,7 +664,20 @@ class Expr { right_dist_eval.eval(); #ifndef NDEBUG - if (left_dist_eval.trange() != right_dist_eval.trange()) { + if (ignore_tile_position()) { + if (!is_congruent(left_dist_eval.trange(), right_dist_eval.trange())) { + if (TiledArray::get_default_world().rank() == 0) { + TA_USER_ERROR_MESSAGE( + "The TiledRanges of the left- and right-hand arguments the " + "binary " + "reduction are not congruent:" + << "\n left = " << left_dist_eval.trange() + << "\n right = " << right_dist_eval.trange()); + } + TA_EXCEPTION( + "The TiledRange objects of a binary reduction are not congruent."); + } + } else if (left_dist_eval.trange() != right_dist_eval.trange()) { if (TiledArray::get_default_world().rank() == 0) { TA_USER_ERROR_MESSAGE( "The TiledRanges of the left- and right-hand arguments the binary " @@ -674,7 +687,7 @@ class Expr { } TA_EXCEPTION( - "The TiledRange objects of a binary expression are not equal."); + "The TiledRange objects of a binary reduction are not equal."); } #endif // NDEBUG From d4142d7c7b1eff6e766ad851a95799aebfe00a1b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 14 Sep 2024 07:22:55 -0400 Subject: [PATCH 23/62] introduced TiledRange1::{lo,up}bound which feel to have unambiguous meaning --- src/TiledArray/tiled_range1.h | 12 ++++++++++++ tests/tiled_range1.cpp | 24 +++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 8cc830046b..5fbe87c64d 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -180,6 +180,18 @@ class TiledRange1 { /// \return the number of elements in the range index1_type extent() const { return TiledArray::extent(elements_range_); } + // clang-format off + /// Elements range lobound accessor + /// \return lower bound of the elements range (i.e., the smallest index in the elements range, `a` in `[a,b)`) + // clang-format on + index1_type lobound() const { return elements_range_.lobound(); } + + // clang-format off + /// Elements range upbound accessor + /// \return upper bound of the elements range (i.e., the smallest index greater than any in the elements range, `b` in `[a,b)`) + // clang-format on + index1_type upbound() const { return elements_range_.upbound(); } + /// Computes hashmarks /// \return the hashmarks of the tiled range, consisting of the following /// values: diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index 2fe958bd2d..12b94578b5 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -32,6 +32,10 @@ BOOST_AUTO_TEST_CASE(range_accessor) { BOOST_CHECK_EQUAL(tr1.tiles_range().second, tiles.second); BOOST_CHECK_EQUAL(tr1.elements_range().first, elements.first); BOOST_CHECK_EQUAL(tr1.elements_range().second, elements.second); + BOOST_CHECK_EQUAL(tr1.tile_extent(), tiles.second - tiles.first); + BOOST_CHECK_EQUAL(tr1.extent(), elements.second - elements.first); + BOOST_CHECK_EQUAL(tr1.lobound(), elements.first); + BOOST_CHECK_EQUAL(tr1.upbound(), elements.second); // Check individual tiles for (std::size_t i = 0; i < a.size() - 1; ++i) { @@ -43,12 +47,30 @@ BOOST_AUTO_TEST_CASE(range_accessor) { BOOST_AUTO_TEST_CASE(range_info) { BOOST_CHECK_EQUAL(tr1.tiles_range().first, 0ul); BOOST_CHECK_EQUAL(tr1.tiles_range().second, a.size() - 1); - BOOST_CHECK_EQUAL(tr1.elements_range().first, 0ul); + BOOST_CHECK_EQUAL(tr1.elements_range().first, a.front()); BOOST_CHECK_EQUAL(tr1.elements_range().second, a.back()); + BOOST_CHECK_EQUAL(tr1.tile_extent(), a.size() - 1); + BOOST_CHECK_EQUAL(tr1.extent(), a.back() - a.front()); + BOOST_CHECK_EQUAL(tr1.lobound(), a.front()); + BOOST_CHECK_EQUAL(tr1.upbound(), a.back()); for (std::size_t i = 0; i < a.size() - 1; ++i) { BOOST_CHECK_EQUAL(tr1.tile(i).first, a[i]); BOOST_CHECK_EQUAL(tr1.tile(i).second, a[i + 1]); } + + auto a_base1 = make_hashmarks(1); + BOOST_CHECK_EQUAL(tr1_base1.tiles_range().first, 0ul); + BOOST_CHECK_EQUAL(tr1_base1.tiles_range().second, a_base1.size() - 1); + BOOST_CHECK_EQUAL(tr1_base1.elements_range().first, a_base1.front()); + BOOST_CHECK_EQUAL(tr1_base1.elements_range().second, a_base1.back()); + BOOST_CHECK_EQUAL(tr1_base1.tile_extent(), a_base1.size() - 1); + BOOST_CHECK_EQUAL(tr1_base1.extent(), a_base1.back() - a_base1.front()); + BOOST_CHECK_EQUAL(tr1_base1.lobound(), a_base1.front()); + BOOST_CHECK_EQUAL(tr1_base1.upbound(), a_base1.back()); + for (std::size_t i = 0; i < a.size() - 1; ++i) { + BOOST_CHECK_EQUAL(tr1_base1.tile(i).first, a_base1[i]); + BOOST_CHECK_EQUAL(tr1_base1.tile(i).second, a_base1[i + 1]); + } } BOOST_AUTO_TEST_CASE(constructor) { From 2f80dc0c9e268fc614addcd25615e6586ea79a91 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 14 Sep 2024 07:24:17 -0400 Subject: [PATCH 24/62] more TsrExpr::block variants tagged by preserve_lobound_t --- src/TiledArray/expressions/tsr_expr.h | 67 ++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/expressions/tsr_expr.h b/src/TiledArray/expressions/tsr_expr.h index 68e036f4c4..e17ee2ddfa 100644 --- a/src/TiledArray/expressions/tsr_expr.h +++ b/src/TiledArray/expressions/tsr_expr.h @@ -523,6 +523,24 @@ class TsrExpr : public Expr> { /// Block expression + /// \tparam Index1 An integral range type + /// \tparam Index2 An integral range type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + TiledArray::detail::is_integral_range_v>> + BlkTsrExpr block(const Index1& lower_bound, + const Index2& upper_bound, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// Block expression + /// \tparam Index1 An integral type /// \tparam Index2 An integral type /// \param lower_bound The lower_bound of the block @@ -539,8 +557,27 @@ class TsrExpr : public Expr> { /// Block expression + /// \tparam Index1 An integral type + /// \tparam Index2 An integral type + /// \param lower_bound The lower_bound of the block + /// \param upper_bound The upper_bound of the block + template && + std::is_integral_v>> + BlkTsrExpr block( + const std::initializer_list& lower_bound, + const std::initializer_list& upper_bound, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, lower_bound, + upper_bound) + .preserve_lobound(); + } + + /// Block expression + /// \tparam PairRange Type representing a range of generalized pairs (see - /// TiledArray::detail::is_gpair_v ) \param bounds The {lower,upper} bounds of + /// TiledArray::detail::is_gpair_v ) + /// \param bounds The {lower,upper} bounds of /// the block template : public Expr> { /// Block expression + /// \tparam PairRange Type representing a range of generalized pairs (see + /// TiledArray::detail::is_gpair_v ) + /// \param bounds The {lower,upper} bounds of + /// the block + template >> + BlkTsrExpr block(const PairRange& bounds, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + + /// Block expression + /// \tparam Index An integral type /// \param bounds The {lower,upper} bounds of the block template : public Expr> { return BlkTsrExpr(array_, annotation_, bounds); } + /// Block expression + + /// \tparam Index An integral type + /// \param bounds The {lower,upper} bounds of the block + template >> + BlkTsrExpr block( + const std::initializer_list>& bounds, + preserve_lobound_t) const { + return BlkTsrExpr(array_, annotation_, bounds) + .preserve_lobound(); + } + /// Conjugated-tensor expression factor /// \return A conjugated expression object From 7820909905ebded48ada01283def6cba62dacee0 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 15 Sep 2024 11:15:38 -0400 Subject: [PATCH 25/62] TA::expressions::preserve_lobound_t -> TA::preserve_lobound_t --- src/TiledArray/expressions/fwd.h | 12 ------------ src/TiledArray/fwd.h | 8 ++++++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/TiledArray/expressions/fwd.h b/src/TiledArray/expressions/fwd.h index 1d234b6dc5..e56dea8b83 100644 --- a/src/TiledArray/expressions/fwd.h +++ b/src/TiledArray/expressions/fwd.h @@ -42,10 +42,6 @@ class BlkTsrExpr; template class ScalBlkTsrExpr; -/// used to indicate that block tensor expression should preserve the underlying -/// tensor's trange lobound -struct preserve_lobound_t {}; - template struct is_aliased : std::true_type {}; @@ -73,12 +69,4 @@ class ScalTsrEngine; } // namespace TiledArray::expressions -namespace TiledArray { - -/// used to tag block tensor expression methods that preserve the underlying -/// tensor's trange lobound -inline constexpr expressions::preserve_lobound_t preserve_lobound; - -} // namespace TiledArray - #endif // TILEDARRAY_EXPRESSIONS_FWD_H__INCLUDED diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 073e8bacd3..97d91a9a00 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -207,6 +207,14 @@ struct to; } // namespace conversions +/// used to indicate that block tensor expression should preserve the underlying +/// tensor's trange lobound +struct preserve_lobound_t {}; + +/// used to tag block tensor expression methods that preserve the underlying +/// tensor's trange lobound +inline constexpr preserve_lobound_t preserve_lobound; + } // namespace TiledArray #ifndef TILEDARRAY_DISABLE_NAMESPACE_TA From 4d4c06bf160cb8a0b74f37196d7852fd6b4fd574 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 15 Sep 2024 11:16:34 -0400 Subject: [PATCH 26/62] btas <-> ta tensor conversions work for non-0-lobound --- src/TiledArray/conversions/btas.h | 248 ++++++++++++++++++++++++------ tests/CMakeLists.txt | 1 + tests/btas.cpp | 21 ++- 3 files changed, 220 insertions(+), 50 deletions(-) diff --git a/src/TiledArray/conversions/btas.h b/src/TiledArray/conversions/btas.h index 28e5790e8f..ab07e97b53 100644 --- a/src/TiledArray/conversions/btas.h +++ b/src/TiledArray/conversions/btas.h @@ -36,6 +36,9 @@ #include #include +#include +#include + namespace TiledArray { // clang-format off @@ -49,11 +52,12 @@ namespace TiledArray { /// \tparam Storage_ The storage type of the source btas::Tensor object /// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, /// optionally wrapped into TiledArray::Tile) -/// \param[in] src The source object; its subblock defined by the {lower,upper} -/// bounds \c {dst.lobound(),dst.upbound()} will be copied to \c dst +/// \param[in] src The source object; its subblock +/// `{dst.lobound(),dst.upbound()}` +/// will be copied to \c dst /// \param[out] dst The object that will contain the contents of the /// corresponding subblock of src -/// \throw TiledArray::Exception When the dimensions of \c src and \c dst do not +/// \throw TiledArray::Exception When the dimensions of \p src and \p dst do not /// match. // clang-format on template @@ -73,6 +77,57 @@ inline void btas_subtensor_to_tensor( dst_view = src_view; } +// clang-format off +/// Copy a block of a btas::Tensor into a TiledArray::Tensor + +/// A block of btas::Tensor \c src will be copied into TiledArray::Tensor \c +/// dst. The block dimensions will be determined by the dimensions of the range +/// of \c dst . +/// \tparam T The tensor element type +/// \tparam Range_ The range type of the source btas::Tensor object +/// \tparam Storage_ The storage type of the source btas::Tensor object +/// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, +/// optionally wrapped into TiledArray::Tile) +/// \param[in] src The source object; its subblock +/// `{dst.lobound() + offset,dst.upbound() + offset}` +/// will be copied to \c dst +/// \param[out] dst The object that will contain the contents of the +/// corresponding subblock of src +/// \param[out] offset the offset to be applied to the coordinates of `dst.range()` to determine the block in \p src to be copied; this is needed if the DistArray that will contain \p dst will have a range whose lobound is different from `src.lobound()` +/// \throw TiledArray::Exception When the dimensions of \p src and \p dst do not +/// match. +// clang-format on +template < + typename T, typename Range_, typename Storage_, typename Tensor_, + typename IntegerRange, + typename = std::enable_if_t>> +inline void btas_subtensor_to_tensor( + const btas::Tensor& src, Tensor_& dst, + IntegerRange&& offset) { + TA_ASSERT(dst.range().rank() == src.range().rank()); + TA_ASSERT(ranges::size(offset) == src.range().rank()); + + const auto& src_range = src.range(); + const auto& dst_range = dst.range(); + auto src_blk_range = + TiledArray::BlockRange(detail::make_ta_range(src_range), + ranges::views::zip(dst_range.lobound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + }), + ranges::views::zip(dst_range.upbound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + })); + using std::data; + auto src_view = TiledArray::make_const_map(data(src), src_blk_range); + auto dst_view = TiledArray::make_map(data(dst), dst_range); + + dst_view = src_view; +} + // clang-format off /// Copy a TiledArray::Tensor into a block of a btas::Tensor @@ -86,8 +141,8 @@ inline void btas_subtensor_to_tensor( /// \tparam Storage_ The storage type of the destination btas::Tensor object /// \param[in] src The source object whose contents will be copied into /// a subblock of \c dst -/// \param[out] dst The destination object; its subblock defined by the -/// {lower,upper} bounds \c {src.lobound(),src.upbound()} will be +/// \param[out] dst The destination object; its subblock +/// `{src.lobound(),src.upbound()}` will be /// overwritten with the content of \c src /// \throw TiledArray::Exception When the dimensions /// of \c src and \c dst do not match. @@ -109,6 +164,57 @@ inline void tensor_to_btas_subtensor(const Tensor_& src, dst_view = src_view; } +// clang-format off +/// Copy a TiledArray::Tensor into a block of a btas::Tensor + +/// TiledArray::Tensor \c src will be copied into a block of btas::Tensor +/// \c dst. The block dimensions will be determined by the dimensions of the range +/// of \c src . +/// \tparam Tensor_ A tensor type (e.g., TiledArray::Tensor or btas::Tensor, +/// optionally wrapped into TiledArray::Tile) +/// \tparam T The tensor element type +/// \tparam Range_ The range type of the destination btas::Tensor object +/// \tparam Storage_ The storage type of the destination btas::Tensor object +/// \param[in] src The source object whose contents will be copied into +/// a subblock of \c dst +/// \param[out] dst The destination object; its subblock +/// `{src.lobound()+offset,src.upbound()+offset}` will be +/// overwritten with the content of \c src +/// \param[out] offset the offset to be applied to the coordinates of `src.range()` to determine the block in \p dst to be copied; this is needed if the DistArray that contains \p src has a range whose lobound is different from `dst.lobound()` +/// \throw TiledArray::Exception When the dimensions +/// of \c src and \c dst do not match. +// clang-format on +template < + typename Tensor_, typename T, typename Range_, typename Storage_, + typename IntegerRange, + typename = std::enable_if_t>> +inline void tensor_to_btas_subtensor(const Tensor_& src, + btas::Tensor& dst, + IntegerRange&& offset) { + TA_ASSERT(dst.range().rank() == src.range().rank()); + TA_ASSERT(ranges::size(offset) == src.range().rank()); + + const auto& src_range = src.range(); + const auto& dst_range = dst.range(); + auto dst_blk_range = + TiledArray::BlockRange(detail::make_ta_range(dst_range), + ranges::views::zip(src_range.lobound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + }), + ranges::views::zip(src_range.upbound(), offset) | + ranges::views::transform([](auto&& i_j) { + auto&& [i, j] = i_j; + return i + j; + })); + using std::data; + auto src_view = TiledArray::make_const_map(data(src), src_range); + auto dst_view = TiledArray::make_map(data(dst), dst_blk_range); + + dst_view = src_view; +} + namespace detail { /// Task function for converting btas::Tensor subblock to a @@ -127,7 +233,13 @@ void counted_btas_subtensor_to_tensor(const BTAS_Tensor_* src, DistArray_* dst, const typename Range::index_type i, madness::AtomicInt* counter) { typename DistArray_::value_type tensor(dst->trange().make_tile_range(i)); - btas_subtensor_to_tensor(*src, tensor); + auto offset = ranges::views::zip(ranges::views::all(src->range().lobound()), + dst->trange().elements_range().lobound()) | + ranges::views::transform([](const auto& s_d) { + auto&& [s, d] = s_d; + return s - d; + }); + btas_subtensor_to_tensor(*src, tensor, offset); dst->set(i, tensor); (*counter)++; } @@ -137,12 +249,24 @@ void counted_btas_subtensor_to_tensor(const BTAS_Tensor_* src, DistArray_* dst, /// \tparam TA_Tensor_ a TiledArray::Tensor type /// \tparam BTAS_Tensor_ a btas::Tensor type /// \param src The source tensor -/// \param dst The destination tensor -/// \param counter The task counter -template -void counted_tensor_to_btas_subtensor(const TA_Tensor_& src, BTAS_Tensor_* dst, +/// \param src_array_lobound the lobound of the DistArrany that contains src, +/// used to compute the offset to be applied to the coordinates of `src.range()` +/// to determine the block in \p dst to be copied into \param dst The +/// destination tensor \param counter The task counter +template < + typename TA_Tensor_, typename BTAS_Tensor_, typename IntegerRange, + typename = std::enable_if_t>> +void counted_tensor_to_btas_subtensor(const TA_Tensor_& src, + IntegerRange src_array_lobound, + BTAS_Tensor_* dst, madness::AtomicInt* counter) { - tensor_to_btas_subtensor(src, *dst); + auto offset = ranges::views::zip(ranges::views::all(dst->range().lobound()), + src_array_lobound) | + ranges::views::transform([](const auto& d_s) { + auto&& [d, s] = d_s; + return d - s; + }); + tensor_to_btas_subtensor(src, *dst, offset); (*counter)++; } @@ -267,41 +391,14 @@ DistArray_ btas_tensor_to_array( return array; } -/// Convert a TiledArray::DistArray object into a btas::Tensor object +namespace detail { -/// This function will copy the contents of \c src into a \c btas::Tensor -/// object. The copy operation is done in parallel, and this function will block -/// until all elements of \c src have been copied into the result array tiles. -/// The size of \c src.world().size() must be equal to 1 or \c src must be a -/// replicated TiledArray::DistArray. Usage: -/// \code -/// TiledArray::TArrayD -/// array(world, trange); -/// // Set tiles of array ... -/// -/// auto t = array_to_btas_tensor(array); -/// \endcode -/// \tparam Tile the tile type of \c src -/// \tparam Policy the policy type of \c src -/// \tparam Range_ the range type of the result (either, btas::RangeNd or -/// TiledArray::Range) -/// \tparam Storage_ the storage type of the result -/// \param[in] src The TiledArray::DistArray object whose contents -/// will be copied to the result. -/// \return A \c btas::Tensor object that is a copy of \c src -/// \throw TiledArray::Exception When world size is greater than -/// 1 and \c src is not replicated -/// \param[in] target_rank the rank on which to create the BTAS tensor -/// containing the data of \c src ; if \c target_rank=-1 then -/// create the BTAS tensor on every rank (this requires -/// that \c src.is_replicated()==true ) -/// \return BTAS tensor object containing the data of \c src , if my rank equals -/// \c target_rank or \c target_rank==-1 , -/// default-initialized BTAS tensor otherwise. +/// \sa TiledArray::array_to_btas_tensor() template > -btas::Tensor array_to_btas_tensor( - const TiledArray::DistArray& src, int target_rank = -1) { +btas::Tensor +array_to_btas_tensor_impl(const TiledArray::DistArray& src, + const Range_& result_range, int target_rank) { // Test preconditions if (target_rank == -1 && src.world().size() > 1 && !src.pmap()->is_replicated()) @@ -314,13 +411,11 @@ btas::Tensor array_to_btas_tensor( using result_type = btas::Tensor::element_type, Range_, Storage_>; - using result_range_type = typename result_type::range_type; // Construct the result if (target_rank == -1 || src.world().rank() == target_rank) { // if array is sparse must initialize to zero - result_type result( - result_range_type(src.trange().elements_range().extent()), 0.0); + result_type result(result_range, 0.0); // Spawn tasks to copy array tiles to btas::Tensor madness::AtomicInt counter; @@ -329,8 +424,12 @@ btas::Tensor array_to_btas_tensor( for (std::size_t i = 0; i < src.size(); ++i) { if (!src.is_zero(i)) { src.world().taskq.add( - &detail::counted_tensor_to_btas_subtensor, - src.find(i), &result, &counter); + &detail::counted_tensor_to_btas_subtensor< + Tile, result_type, + std::decay_t< + decltype(src.trange().elements_range().lobound())>>, + src.find(i), src.trange().elements_range().lobound(), &result, + &counter); ++n; } } @@ -343,6 +442,59 @@ btas::Tensor array_to_btas_tensor( return result_type{}; } +} // namespace detail + +/// Convert a TiledArray::DistArray object into a btas::Tensor object + +/// This function will copy the contents of \c src into a \c btas::Tensor +/// object. The copy operation is done in parallel, and this function will block +/// until all elements of \c src have been copied into the result array tiles. +/// The size of \c src.world().size() must be equal to 1 or \c src must be a +/// replicated TiledArray::DistArray. Usage: +/// \code +/// TiledArray::TArrayD +/// array(world, trange); +/// // Set tiles of array ... +/// +/// auto t = array_to_btas_tensor(array); +/// \endcode +/// \tparam Tile the tile type of \c src +/// \tparam Policy the policy type of \c src +/// \tparam Range_ the range type of the result (either, btas::RangeNd or +/// TiledArray::Range) +/// \tparam Storage_ the storage type of the result +/// \param[in] src The TiledArray::DistArray object whose contents +/// will be copied to the result. +/// \param[in] target_rank the rank on which to create the BTAS tensor +/// containing the data of \c src ; if \c target_rank=-1 then +/// create the BTAS tensor on every rank (this requires +/// that \c src.is_replicated()==true ) +/// \return BTAS tensor object containing the data of \c src , if my rank equals +/// \c target_rank or \c target_rank==-1 , +/// default-initialized BTAS tensor otherwise. +/// \warning The range of \c src is +/// not preserved, i.e. the lobound of the result is zero. Use the +/// variant of this function tagged with preserve_lobound_t to +/// preserve the range. +/// \throw TiledArray::Exception When world size is greater than +/// 1 and \c src is not replicated +template > +btas::Tensor array_to_btas_tensor( + const TiledArray::DistArray& src, int target_rank = -1) { + return detail::array_to_btas_tensor_impl( + src, Range_(src.trange().elements_range().extent()), target_rank); +} + +template > +btas::Tensor array_to_btas_tensor( + const TiledArray::DistArray& src, preserve_lobound_t, + int target_rank = -1) { + return detail::array_to_btas_tensor_impl(src, src.trange().elements_range(), + target_rank); +} + } // namespace TiledArray #endif // TILEDARRAY_CONVERSIONS_BTAS_H__INCLUDED diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 823e13bec8..85d30d7728 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -101,6 +101,7 @@ set(ta_test_src_files ta_test.cpp einsum.cpp linalg.cpp cp.cpp + btas.cpp ) if(CUDA_FOUND OR HIP_FOUND) diff --git a/tests/btas.cpp b/tests/btas.cpp index a31329a80d..9c15540e9a 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -324,8 +324,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(dense_array_conversion, bTensor, tensor_types) { // make tiled range using trange1_t = TiledArray::TiledRange1; - TiledArray::TiledRange trange( - {trange1_t(0, 10, 20), trange1_t(0, 11, 22), trange1_t(0, 12, 24)}); + TiledArray::TiledRange trange({trange1_t(0, 10, 20), + trange1_t(0, 11, 22).inplace_shift(1), + trange1_t(0, 12, 24).inplace_shift(2)}); // convert to a replicated DistArray using T = typename bTensor::value_type; @@ -371,6 +372,22 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(dense_array_conversion, bTensor, tensor_types) { BOOST_CHECK(src_copy == btas::Tensor{}); } } + + // convert the replicated DistArray back to a btas::Tensor while preserving + // the DistArray range + { + btas::Tensor src_copy; + BOOST_REQUIRE_NO_THROW( + src_copy = array_to_btas_tensor(dst, TiledArray::preserve_lobound)); + BOOST_CHECK(ranges::equal(src_copy.range().lobound(), + dst.trange().elements_range().lobound())); + for (const auto& i : src.range()) { + auto i_copy = i; + i_copy[1] += 1; + i_copy[2] += 2; + BOOST_CHECK_EQUAL(src(i), src_copy(i_copy)); + } + } } BOOST_AUTO_TEST_CASE_TEMPLATE(sparse_array_conversion, bTensor, tensor_types) { From 057df5aad0f3e28e5637a293af29d3918bd8e863 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 15 Sep 2024 23:40:47 -0400 Subject: [PATCH 27/62] introduced member versions of TiledRange1::make_uniform --- src/TiledArray/tiled_range1.h | 15 +++++++++++++++ tests/tiled_range1.cpp | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index 5fbe87c64d..e78e647c10 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -318,6 +318,21 @@ class TiledRange1 { return make_uniform(Range1(0, range_extent), target_tile_size); } + /// same as make_uniform(const Range1&, std::size_t), using the element_range + /// of this TiledRange1 + TiledRange1 make_uniform(std::size_t target_tile_size) const { + return make_uniform(this->elements_range(), target_tile_size); + } + + /// make as uniformly-tiled range as possible out of this TiledRange1, with + /// the same number of tiles as this + TiledRange1 make_uniform() const { + return make_uniform( + this->elements_range(), + (this->elements_range().extent() + this->tile_extent() - 1) / + this->tile_extent()); + } + /// shifts this TiledRange1 /// @param[in] shift the shift to apply diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index 12b94578b5..39bd7fa7c4 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -380,6 +380,14 @@ BOOST_AUTO_TEST_CASE(make_uniform) { BOOST_REQUIRE_NO_THROW(TiledRange1::make_uniform(59, 10)); BOOST_CHECK(TiledRange1::make_uniform(59, 10) == (TiledRange1{0, 10, 20, 30, 40, 50, 59})); + + // member versions + BOOST_REQUIRE_NO_THROW((TiledRange1{0, 10, 20, 30, 40, 50}.make_uniform(30))); + BOOST_CHECK((TiledRange1{0, 10, 20, 30, 40, 50}.make_uniform(30) == + TiledRange1{0, 25, 50})); + BOOST_REQUIRE_NO_THROW((TiledRange1{0, 40, 50}.make_uniform())); + BOOST_CHECK( + (TiledRange1{0, 40, 50}.make_uniform() == TiledRange1{0, 25, 50})); } BOOST_AUTO_TEST_CASE(shift) { From 34092359a6e5c02c0435fe36109be7a585d9625e Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Sep 2024 11:40:43 -0400 Subject: [PATCH 28/62] TiledRange1::make_uniform(rng) with empty range preserves its lobound --- src/TiledArray/tiled_range1.h | 2 +- tests/tiled_range1.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/tiled_range1.h b/src/TiledArray/tiled_range1.h index e78e647c10..aa75916442 100644 --- a/src/TiledArray/tiled_range1.h +++ b/src/TiledArray/tiled_range1.h @@ -308,7 +308,7 @@ class TiledRange1 { hashmarks.push_back(range.upbound()); return TiledRange1(hashmarks.begin(), hashmarks.end()); } else - return TiledRange1{}; + return TiledRange1{range.lobound()}; } /// same as make_uniform(const Range1&, std::size_t) for a 0-based range diff --git a/tests/tiled_range1.cpp b/tests/tiled_range1.cpp index 39bd7fa7c4..947142f6dc 100644 --- a/tests/tiled_range1.cpp +++ b/tests/tiled_range1.cpp @@ -350,8 +350,10 @@ BOOST_AUTO_TEST_CASE(concatenation) { } BOOST_AUTO_TEST_CASE(make_uniform) { + BOOST_REQUIRE_NO_THROW(TiledRange1::make_uniform(Range1{0, 0}, 0)); + BOOST_CHECK(TiledRange1::make_uniform(Range1{0, 0}, 0) == TiledRange1{}); BOOST_REQUIRE_NO_THROW(TiledRange1::make_uniform(Range1{1, 1}, 0)); - BOOST_CHECK(TiledRange1::make_uniform(Range1{1, 1}, 0) == TiledRange1{}); + BOOST_CHECK(TiledRange1::make_uniform(Range1{1, 1}, 0) == TiledRange1{1}); BOOST_REQUIRE_NO_THROW(TiledRange1::make_uniform(Range1{3, 6}, 10)); BOOST_CHECK(TiledRange1::make_uniform(Range1{3, 6}, 10) == (TiledRange1{3, 6})); From 456b7905f0a9e1cf2c413dad607e9ca740239aff Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Sep 2024 11:41:22 -0400 Subject: [PATCH 29/62] heig: work around the n=0 corner case --- src/TiledArray/math/linalg/rank-local.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/TiledArray/math/linalg/rank-local.cpp b/src/TiledArray/math/linalg/rank-local.cpp index d23f3b4e3f..6db050ee5c 100644 --- a/src/TiledArray/math/linalg/rank-local.cpp +++ b/src/TiledArray/math/linalg/rank-local.cpp @@ -121,6 +121,7 @@ void heig(Matrix& A, std::vector>& W) { integer lda = A.rows(); W.resize(n); auto* w = W.data(); + if (n == 0) return; if constexpr (TiledArray::detail::is_complex_v) TA_LAPACK(heev, jobz, uplo, n, a, lda, w); else @@ -140,6 +141,7 @@ void heig(Matrix& A, Matrix& B, integer ldb = B.rows(); W.resize(n); auto* w = W.data(); + if (n == 0) return; if constexpr (TiledArray::detail::is_complex_v) TA_LAPACK(hegv, itype, jobz, uplo, n, a, lda, b, ldb, w); else From d6223831afcb5ddb9e9962732f0768c43610000a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Sep 2024 11:52:20 -0400 Subject: [PATCH 30/62] bump MAD tag to pull in https://github.com/m-a-d-n-e-s-s/madness/pull/547 --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 96e7259ed5..db11ed24df 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -43,7 +43,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. - [BTAS](http://github.com/ValeevGroup/BTAS), tag 4e8f5233aa7881dccdfcc37ce07128833926d3c2 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 96ac90e8f193ccfaf16f346b4652927d2d362e75 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/versions.cmake b/external/versions.cmake index a005bcdec5..87804775f9 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -11,8 +11,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG 3d0ae2fad1b97e347ca6dd98b9f1b9e74e629f52) +set(TA_TRACKED_MADNESS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) From a0c04508a2c32e5c56c528bc917c5acf8fd9fc17 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 18 Sep 2024 00:10:14 -0400 Subject: [PATCH 31/62] BinaryExpr: account for ignore_tile_position when checking preconditions --- src/TiledArray/dist_eval/binary_eval.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/dist_eval/binary_eval.h b/src/TiledArray/dist_eval/binary_eval.h index 62bbdb64ce..87cce91656 100644 --- a/src/TiledArray/dist_eval/binary_eval.h +++ b/src/TiledArray/dist_eval/binary_eval.h @@ -107,7 +107,10 @@ class BinaryEvalImpl : public DistEvalImpl, right_ntiles_discarded_(0) #endif { - TA_ASSERT(left.trange() == right.trange()); + TA_ASSERT(ignore_tile_position() + ? left.trange().elements_range().extent() == + right.trange().elements_range().extent() + : left.trange() == right.trange()); } virtual ~BinaryEvalImpl() {} From 2ba8f8baae8bacdd269a521235f0114a98c40c11 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 18 Sep 2024 17:09:24 -0400 Subject: [PATCH 32/62] [python] simplify make_trange by using TiledRange1::make_uniform --- python/src/TiledArray/python/trange.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/python/src/TiledArray/python/trange.h b/python/src/TiledArray/python/trange.h index 488421291d..8c008c1fa9 100644 --- a/python/src/TiledArray/python/trange.h +++ b/python/src/TiledArray/python/trange.h @@ -45,7 +45,6 @@ auto list(const TiledRange &trange) { return v; } -// template<> inline TiledRange make_trange(std::vector > trange) { std::vector trange1; for (auto tr : trange) { @@ -58,11 +57,7 @@ inline TiledRange make_trange(std::vector > trange) { inline TiledRange make_trange(std::vector shape, size_t block) { std::vector trange1; for (size_t i = 0; i < shape.size(); ++i) { - std::vector tr1; - for (size_t j = 0; j <= (shape[i] + block - 1); j += block) { - tr1.push_back(std::min(j, shape[i])); - } - trange1.push_back(TiledRange1(tr1.begin(), tr1.end())); + trange1.emplace_back(TiledRange1::make_uniform(shape[i], block)); } return TiledRange(trange1.begin(), trange1.end()); } From ae1cf06592aca8171904be8605cf0a95973fa31d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 18 Sep 2024 19:21:45 -0400 Subject: [PATCH 33/62] [ci] greatly reduce the gitlab matrix, replace rel/deb builds with relwithdebinfo --- .gitlab-ci.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b57a210430..02c3edc266 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -57,22 +57,22 @@ ubuntu: metrics: build/metrics.txt parallel: matrix: - - IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ] + - IMAGE : [ "ubuntu:20.04" ] CXX: [ g++ ] - BUILD_TYPE : [ "Release" ] + BUILD_TYPE : [ "RelWithDebInfo" ] BLA_VENDOR : [ "BLAS_PREFERENCE_LIST=IntelMKL" ] BLA_THREADS : [ "IntelMKL_THREAD_LAYER=tbb" ] # ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] TA_PYTHON : [ "TA_PYTHON=OFF" ] # needs to be fixed for MKL RUNNER_TAGS: [ saas-linux-small-amd64 ] - - IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ] + - IMAGE : [ "ubuntu:22.04" ] CXX: [ g++, clang++-13 ] - BUILD_TYPE : [ "Release", "Debug" ] + BUILD_TYPE : [ "RelWithDebInfo" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] RUNNER_TAGS: [ saas-linux-small-amd64 ] - - IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ] + - IMAGE : [ "ubuntu:22.04" ] CXX: [ g++ ] - BUILD_TYPE : [ "Release", "Debug" ] + BUILD_TYPE : [ "RelWithDebInfo" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] RUNNER_TAGS: [ cuda ] From f294db31bea86d08b8d875d218f24c65221dca76 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 22 Sep 2024 06:32:39 -0400 Subject: [PATCH 34/62] TA::host_allocator is serializable, so that btas::Tensor can be used as a tile again --- src/CMakeLists.txt | 2 - src/TiledArray/device/allocators.h | 138 ---------------------------- src/TiledArray/device/um_storage.cu | 2 +- src/TiledArray/device/um_storage.h | 2 +- src/TiledArray/external/device.h | 15 ++- src/TiledArray/external/umpire.h | 83 ++++++++++++++++- src/TiledArray/fwd.h | 32 ++++--- src/TiledArray/host/allocator.h | 78 ---------------- src/TiledArray/host/env.h | 10 ++ src/TiledArray/tensor/tensor.h | 3 +- 10 files changed, 127 insertions(+), 238 deletions(-) delete mode 100644 src/TiledArray/device/allocators.h delete mode 100644 src/TiledArray/host/allocator.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c426d1ffbe..3d6b94ea9a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -134,7 +134,6 @@ TiledArray/external/btas.h TiledArray/external/madness.h TiledArray/external/umpire.h TiledArray/host/env.h -TiledArray/host/allocator.h TiledArray/math/blas.h TiledArray/math/gemm_helper.h TiledArray/math/outer.h @@ -223,7 +222,6 @@ if(CUDA_FOUND OR HIP_FOUND) TiledArray/device/kernel/thrust/reduce_kernel.h TiledArray/device/platform.h TiledArray/device/thrust.h - TiledArray/device/allocators.h TiledArray/device/um_storage.h) if(CUDA_FOUND) list(APPEND TILEDARRAY_HEADER_FILES diff --git a/src/TiledArray/device/allocators.h b/src/TiledArray/device/allocators.h deleted file mode 100644 index 2bda79e768..0000000000 --- a/src/TiledArray/device/allocators.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2018 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * Department of Chemistry, Virginia Tech - * Jan 31, 2018 - * - */ - -#ifndef TILEDARRAY_DEVICE_ALLOCATORS_H___INCLUDED -#define TILEDARRAY_DEVICE_ALLOCATORS_H___INCLUDED - -#include - -#ifdef TILEDARRAY_HAS_DEVICE - -#include -#include - -#include - -#include -#include - -namespace TiledArray { - -template -class umpire_based_allocator - : public umpire_based_allocator_impl { - public: - using base_type = umpire_based_allocator_impl; - using typename base_type::const_pointer; - using typename base_type::const_reference; - using typename base_type::pointer; - using typename base_type::reference; - using typename base_type::value_type; - - umpire_based_allocator() noexcept : base_type(&UmpireAllocatorAccessor{}()) {} - - template - umpire_based_allocator( - const umpire_based_allocator& - rhs) noexcept - : base_type( - static_cast&>( - rhs)) {} - - template - friend bool operator==( - const umpire_based_allocator& - lhs, - const umpire_based_allocator& - rhs) noexcept; -}; // class umpire_based_allocator - -template -bool operator==( - const umpire_based_allocator& lhs, - const umpire_based_allocator& - rhs) noexcept { - return lhs.umpire_allocator() == rhs.umpire_allocator(); -} - -template -bool operator!=( - const umpire_based_allocator& lhs, - const umpire_based_allocator& - rhs) noexcept { - return !(lhs == rhs); -} - -namespace detail { - -struct get_um_allocator { - umpire::Allocator& operator()() { - return deviceEnv::instance()->um_allocator(); - } -}; - -struct get_pinned_allocator { - umpire::Allocator& operator()() { - return deviceEnv::instance()->pinned_allocator(); - } -}; - -} // namespace detail - -} // namespace TiledArray - -namespace madness { -namespace archive { - -template -struct ArchiveLoadImpl> { - static inline void load( - const Archive& ar, - TiledArray::umpire_based_allocator& allocator) { - allocator = TiledArray::umpire_based_allocator{}; - } -}; - -template -struct ArchiveStoreImpl> { - static inline void store( - const Archive& ar, - const TiledArray::umpire_based_allocator< - T, StaticLock, UmpireAllocatorAccessor>& allocator) {} -}; - -} // namespace archive -} // namespace madness - -#endif // TILEDARRAY_HAS_DEVICE - -#endif // TILEDARRAY_DEVICE_ALLOCATORS_H___INCLUDED diff --git a/src/TiledArray/device/um_storage.cu b/src/TiledArray/device/um_storage.cu index cc3a1aae55..8879c246f8 100644 --- a/src/TiledArray/device/um_storage.cu +++ b/src/TiledArray/device/um_storage.cu @@ -22,7 +22,7 @@ */ -#include +#include #include #ifdef TILEDARRAY_HAS_CUDA diff --git a/src/TiledArray/device/um_storage.h b/src/TiledArray/device/um_storage.h index d151a3c316..d91c032312 100644 --- a/src/TiledArray/device/um_storage.h +++ b/src/TiledArray/device/um_storage.h @@ -24,7 +24,7 @@ #ifndef TILEDARRAY_DEVICE_UM_VECTOR_H__INCLUDED #define TILEDARRAY_DEVICE_UM_VECTOR_H__INCLUDED -#include +#include #ifdef TILEDARRAY_HAS_DEVICE diff --git a/src/TiledArray/external/device.h b/src/TiledArray/external/device.h index 38bcbbc745..597643b225 100644 --- a/src/TiledArray/external/device.h +++ b/src/TiledArray/external/device.h @@ -798,9 +798,22 @@ class Env { static std::unique_ptr instance_{nullptr}; return instance_; } -}; +}; // class Env namespace detail { + +struct get_um_allocator { + umpire::Allocator& operator()() { + return deviceEnv::instance()->um_allocator(); + } +}; + +struct get_pinned_allocator { + umpire::Allocator& operator()() { + return deviceEnv::instance()->pinned_allocator(); + } +}; + // in a madness device task point to its local optional stream to use by // madness_task_stream_opt; set to nullptr after task callable finished inline std::optional*& madness_task_stream_opt_ptr_accessor() { diff --git a/src/TiledArray/external/umpire.h b/src/TiledArray/external/umpire.h index e8d0d48632..ac23a60260 100644 --- a/src/TiledArray/external/umpire.h +++ b/src/TiledArray/external/umpire.h @@ -156,6 +156,54 @@ bool operator!=( return !(lhs == rhs); } +template +class umpire_based_allocator + : public umpire_based_allocator_impl { + public: + using base_type = umpire_based_allocator_impl; + using typename base_type::const_pointer; + using typename base_type::const_reference; + using typename base_type::pointer; + using typename base_type::reference; + using typename base_type::value_type; + + umpire_based_allocator() noexcept : base_type(&UmpireAllocatorAccessor{}()) {} + + template + umpire_based_allocator( + const umpire_based_allocator& + rhs) noexcept + : base_type( + static_cast&>( + rhs)) {} + + template + friend bool operator==( + const umpire_based_allocator& + lhs, + const umpire_based_allocator& + rhs) noexcept; +}; // class umpire_based_allocator + +template +bool operator==( + const umpire_based_allocator& lhs, + const umpire_based_allocator& + rhs) noexcept { + return lhs.umpire_allocator() == rhs.umpire_allocator(); +} + +template +bool operator!=( + const umpire_based_allocator& lhs, + const umpire_based_allocator& + rhs) noexcept { + return !(lhs == rhs); +} + /// see /// https://stackoverflow.com/questions/21028299/is-this-behavior-of-vectorresizesize-type-n-under-c11-and-boost-container/21028912#21028912 template @@ -202,7 +250,7 @@ struct ArchiveLoadImpl& allocator) { std::string allocator_name; - ar& allocator_name; + ar & allocator_name; allocator = TiledArray::umpire_based_allocator_impl( umpire::ResourceManager::getInstance().getAllocator(allocator_name)); } @@ -214,7 +262,7 @@ struct ArchiveStoreImpl< static inline void store( const Archive& ar, const TiledArray::umpire_based_allocator_impl& allocator) { - ar& allocator.umpire_allocator()->getName(); + ar & allocator.umpire_allocator()->getName(); } }; @@ -224,7 +272,7 @@ struct ArchiveLoadImpl> { TiledArray::default_init_allocator& allocator) { if constexpr (!std::allocator_traits::is_always_equal::value) { A base_allocator; - ar& base_allocator; + ar & base_allocator; allocator = TiledArray::default_init_allocator(base_allocator); } } @@ -244,4 +292,33 @@ struct ArchiveStoreImpl> { } // namespace archive } // namespace madness +namespace madness { +namespace archive { + +template +struct ArchiveLoadImpl> { + static inline void load( + const Archive& ar, + TiledArray::umpire_based_allocator& allocator) { + allocator = TiledArray::umpire_based_allocator{}; + } +}; + +template +struct ArchiveStoreImpl> { + static inline void store( + const Archive& ar, + const TiledArray::umpire_based_allocator< + T, StaticLock, UmpireAllocatorAccessor>& allocator) {} +}; + +} // namespace archive +} // namespace madness + #endif // TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 97d91a9a00..6127db32f3 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -36,12 +36,27 @@ class aligned_allocator; // fwddecl host_allocator namespace TiledArray { -template -class host_allocator_impl; -template +namespace detail { +struct get_host_allocator; +struct NullLock; +template +class MutexLock; +} // namespace detail + +template +class umpire_based_allocator; + +template > class default_init_allocator; + +class hostEnv; + +/// pooled thread-safe host memory allocator template -using host_allocator = default_init_allocator>; +using host_allocator = + default_init_allocator, + detail::get_host_allocator>>; } // namespace TiledArray namespace madness { @@ -87,18 +102,9 @@ class Env; } using deviceEnv = device::Env; -template -class umpire_based_allocator; - -template > -class default_init_allocator; - namespace detail { struct get_um_allocator; struct get_pinned_allocator; -struct NullLock; -template -class MutexLock; } // namespace detail /// pooled thread-safe unified memory (UM) allocator for device computing diff --git a/src/TiledArray/host/allocator.h b/src/TiledArray/host/allocator.h deleted file mode 100644 index a22613fb38..0000000000 --- a/src/TiledArray/host/allocator.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2021 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * Department of Chemistry, Virginia Tech - * Jan 31, 2018 - * - */ - -#ifndef TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED -#define TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED - -#include - -#include -#include - -#include - -#include -#include - -namespace TiledArray { - -/// pooled, thread-safe allocator for host memory -template -class host_allocator_impl - : public umpire_based_allocator_impl> { - public: - using base_type = umpire_based_allocator_impl>; - using typename base_type::const_pointer; - using typename base_type::const_reference; - using typename base_type::pointer; - using typename base_type::reference; - using typename base_type::value_type; - - host_allocator_impl() noexcept - : base_type(&hostEnv::instance()->host_allocator()) {} - - template - host_allocator_impl(const host_allocator_impl& rhs) noexcept - : base_type(static_cast>&>(rhs)) {} - - template - friend bool operator==(const host_allocator_impl& lhs, - const host_allocator_impl& rhs) noexcept; -}; // class host_allocator_impl - -template -bool operator==(const host_allocator_impl& lhs, - const host_allocator_impl& rhs) noexcept { - return lhs.umpire_allocator() == rhs.umpire_allocator(); -} - -template -bool operator!=(const host_allocator_impl& lhs, - const host_allocator_impl& rhs) noexcept { - return !(lhs == rhs); -} - -} // namespace TiledArray - -#endif // TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index 1b3c4f277f..be1de5369c 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -148,6 +148,16 @@ class hostEnv { } }; +namespace detail { + +struct get_host_allocator { + umpire::Allocator& operator()() { + return hostEnv::instance()->host_allocator(); + } +}; + +} // namespace detail + } // namespace TiledArray #endif // TILEDARRAY_HOST_ENV_H__INCLUDED diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 12479ef53c..171dac2eea 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -22,7 +22,8 @@ #include "TiledArray/config.h" -#include "TiledArray/host/allocator.h" +#include "TiledArray/external/umpire.h" +#include "TiledArray/host/env.h" #include "TiledArray/math/blas.h" #include "TiledArray/math/gemm_helper.h" From f613831844410bde0be87c9833448511eb2eb4fd Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 22 Sep 2024 06:34:00 -0400 Subject: [PATCH 35/62] introduced TA::Tile::at_ordinal + strengthen disambiguation checks for potential at_ordinal uses --- src/TiledArray/tensor/tensor.h | 20 ++++++++++--- src/TiledArray/tile.h | 52 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 171dac2eea..bf729e59d9 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -705,7 +705,7 @@ class Tensor { const_reference operator[](const Ordinal ord) const { TA_ASSERT(!this->empty()); // can't distinguish between operator[](Index...) and operator[](ordinal) - // thus assume at_ordinal() if this->rank()==1 + // thus insist on at_ordinal() if this->rank()==1 TA_ASSERT(this->range_.rank() != 1 && "use Tensor::operator[](index) or " "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); @@ -726,7 +726,7 @@ class Tensor { reference operator[](const Ordinal ord) { TA_ASSERT(!this->empty()); // can't distinguish between operator[](Index...) and operator[](ordinal) - // thus assume at_ordinal() if this->rank()==1 + // thus insist on at_ordinal() if this->rank()==1 TA_ASSERT(this->range_.rank() != 1 && "use Tensor::operator[](index) or " "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); @@ -848,7 +848,7 @@ class Tensor { TA_ASSERT(!this->empty()); TA_ASSERT(this->nbatch() == 1); // can't distinguish between operator[](Index...) and operator[](ordinal) - // thus assume at_ordinal() if this->rank()==1 + // thus insist on at_ordinal() if this->rank()==1 TA_ASSERT(this->range_.rank() != 1 && "use Tensor::operator()(index) or " "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); @@ -869,7 +869,7 @@ class Tensor { TA_ASSERT(!this->empty()); TA_ASSERT(this->nbatch() == 1); // can't distinguish between operator[](Index...) and operator[](ordinal) - // thus assume at_ordinal() if this->rank()==1 + // thus insist on at_ordinal() if this->rank()==1 TA_ASSERT(this->range_.rank() != 1 && "use Tensor::operator()(index) or " "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); @@ -960,6 +960,12 @@ class Tensor { const_reference operator()(const Index&... i) const { TA_ASSERT(!this->empty()); TA_ASSERT(this->nbatch() == 1); + TA_ASSERT(this->range().rank() == sizeof...(Index)); + // can't distinguish between operator()(Index...) and operator()(ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator()(index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); using Int = std::common_type_t; const auto iord = this->range_.ordinal( std::array{{static_cast(i)...}}); @@ -982,6 +988,12 @@ class Tensor { reference operator()(const Index&... i) { TA_ASSERT(!this->empty()); TA_ASSERT(this->nbatch() == 1); + TA_ASSERT(this->range().rank() == sizeof...(Index)); + // can't distinguish between operator()(Index...) and operator()(ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range_.rank() != 1 && + "use Tensor::operator()(index) or " + "Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); using Int = std::common_type_t; const auto iord = this->range_.ordinal( std::array{{static_cast(i)...}}); diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index b8c62d95b8..39fca37d9e 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -250,6 +250,11 @@ class Tile { std::enable_if_t::value>* = nullptr> const_reference operator[](const Ordinal ord) const { TA_ASSERT(pimpl_); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range().rank() != 1 && + "use Tile::operator[](index) or " + "Tile::at_ordinal(index_ordinal) if this->range().rank()==1"); TA_ASSERT(tensor().range().includes_ordinal(ord)); return tensor().data()[ord]; } @@ -264,6 +269,41 @@ class Tile { template ::value>* = nullptr> reference operator[](const Ordinal ord) { + TA_ASSERT(pimpl_); + // can't distinguish between operator[](Index...) and operator[](ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range().rank() != 1 && + "use Tile::operator[](index) or " + "Tile::at_ordinal(index_ordinal) if this->range().rank()==1"); + TA_ASSERT(tensor().range().includes_ordinal(ord)); + return tensor().data()[ord]; + } + + /// Const element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Const reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty and ord is + /// included in the range + template ::value>* = nullptr> + const_reference at_ordinal(const Ordinal ord) const { + TA_ASSERT(pimpl_); + TA_ASSERT(tensor().range().includes_ordinal(ord)); + return tensor().data()[ord]; + } + + /// Element accessor + + /// \tparam Ordinal an integer type that represents an ordinal + /// \param[in] ord an ordinal index + /// \return Reference to the element at position \c ord . + /// \note This asserts (using TA_ASSERT) that this is not empty and ord is + /// included in the range + template ::value>* = nullptr> + reference at_ordinal(const Ordinal ord) { TA_ASSERT(pimpl_); TA_ASSERT(tensor().range().includes_ordinal(ord)); return tensor().data()[ord]; @@ -401,6 +441,12 @@ class Tile { detail::is_integral_list::value>* = nullptr> const_reference operator()(const Index&... i) const { TA_ASSERT(pimpl_); + TA_ASSERT(this->range().rank() == sizeof...(Index)); + // can't distinguish between operator()(Index...) and operator()(ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range().rank() != 1 && + "use Tile::operator()(index) or " + "Tile::at_ordinal(index_ordinal) if this->range().rank()==1"); TA_ASSERT(tensor().range().includes(i...)); return tensor().data()[tensor().range().ordinal(i...)]; } @@ -417,6 +463,12 @@ class Tile { detail::is_integral_list::value>* = nullptr> reference operator()(const Index&... i) { TA_ASSERT(pimpl_); + TA_ASSERT(this->range().rank() == sizeof...(Index)); + // can't distinguish between operator()(Index...) and operator()(ordinal) + // thus insist on at_ordinal() if this->rank()==1 + TA_ASSERT(this->range().rank() != 1 && + "use Tile::operator()(index) or " + "Tile::at_ordinal(index_ordinal) if this->range().rank()==1"); TA_ASSERT(tensor().range().includes(i...)); return tensor().data()[tensor().range().ordinal(i...)]; } From 64723263b72a13eb0b494cfe1ed535fdd29f4554 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 07:39:35 -0400 Subject: [PATCH 36/62] hostEnv -> host::Env + fixup to make f294db31bea86d08b8d875d218f24c65221dca76 build --- src/CMakeLists.txt | 7 ++--- src/TiledArray/external/device.h | 40 +++++++++++++++++---------- src/TiledArray/fwd.h | 5 +++- src/TiledArray/host/env.cpp | 36 ++++++++++++++++++++++++ src/TiledArray/host/env.h | 47 ++++++++++++++++---------------- 5 files changed, 92 insertions(+), 43 deletions(-) create mode 100644 src/TiledArray/host/env.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3d6b94ea9a..80f2a49710 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -133,6 +133,7 @@ TiledArray/expressions/index_list.h TiledArray/external/btas.h TiledArray/external/madness.h TiledArray/external/umpire.h +TiledArray/host/env.cpp TiledArray/host/env.h TiledArray/math/blas.h TiledArray/math/gemm_helper.h @@ -206,11 +207,7 @@ TiledArray/util/vector.h if(HIP_FOUND OR CUDA_FOUND) list(APPEND TILEDARRAY_HEADER_FILES TiledArray/external/device.h - TiledArray/external/librett.h) -endif() - -if(CUDA_FOUND OR HIP_FOUND) - list(APPEND TILEDARRAY_HEADER_FILES + TiledArray/external/librett.h TiledArray/device/blas.cpp TiledArray/device/blas.h TiledArray/device/btas.h diff --git a/src/TiledArray/external/device.h b/src/TiledArray/external/device.h index 597643b225..4f9d365e0a 100644 --- a/src/TiledArray/external/device.h +++ b/src/TiledArray/external/device.h @@ -41,8 +41,6 @@ #include #endif -#include - #include #include #include @@ -51,6 +49,20 @@ #include #include +#include + +namespace TiledArray::detail { + +struct get_um_allocator { + inline umpire::Allocator& operator()(); +}; + +struct get_pinned_allocator { + inline umpire::Allocator& operator()(); +}; + +} // namespace TiledArray::detail + #if defined(TILEDARRAY_HAS_CUDA) inline void __DeviceSafeCall(cudaError err, const char* file, const int line) { @@ -802,18 +814,6 @@ class Env { namespace detail { -struct get_um_allocator { - umpire::Allocator& operator()() { - return deviceEnv::instance()->um_allocator(); - } -}; - -struct get_pinned_allocator { - umpire::Allocator& operator()() { - return deviceEnv::instance()->pinned_allocator(); - } -}; - // in a madness device task point to its local optional stream to use by // madness_task_stream_opt; set to nullptr after task callable finished inline std::optional*& madness_task_stream_opt_ptr_accessor() { @@ -905,6 +905,18 @@ device::Stream stream_for(const Range& range) { } // namespace device +namespace detail { + +inline umpire::Allocator& get_um_allocator::operator()() { + return deviceEnv::instance()->um_allocator(); +} + +inline umpire::Allocator& get_pinned_allocator::operator()() { + return deviceEnv::instance()->pinned_allocator(); +} + +} // namespace detail + #endif // TILEDARRAY_HAS_DEVICE #ifdef TILEDARRAY_HAS_CUDA diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 6127db32f3..652b835fab 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -49,7 +49,10 @@ class umpire_based_allocator; template > class default_init_allocator; -class hostEnv; +namespace host { +class Env; +} +using hostEnv = host::Env; /// pooled thread-safe host memory allocator template diff --git a/src/TiledArray/host/env.cpp b/src/TiledArray/host/env.cpp new file mode 100644 index 0000000000..16d3a71a50 --- /dev/null +++ b/src/TiledArray/host/env.cpp @@ -0,0 +1,36 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2021 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Chong Peng + * Department of Chemistry, Virginia Tech + * July 23, 2018 + * + */ + +#include + +namespace TiledArray { + +namespace detail { + +umpire::Allocator& get_host_allocator::operator()() { + return TiledArray::host::Env::instance()->host_allocator(); +} + +} // namespace detail + +} // namespace TiledArray diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index be1de5369c..b469704a72 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -41,24 +41,34 @@ namespace TiledArray { +namespace detail { + +struct get_host_allocator { + umpire::Allocator& operator()(); +}; + +} // namespace detail + +namespace host { + /** - * hostEnv maintains the (host-side, as opposed to device-side) environment, + * Env maintains the (host-side, as opposed to device-side) environment, * such as memory allocators * * \note this is a Singleton */ -class hostEnv { +class Env { public: - ~hostEnv() = default; + ~Env() = default; - hostEnv(const hostEnv&) = delete; - hostEnv(hostEnv&&) = delete; - hostEnv& operator=(const hostEnv&) = delete; - hostEnv& operator=(hostEnv&&) = delete; + Env(const Env&) = delete; + Env(Env&&) = delete; + Env& operator=(const Env&) = delete; + Env& operator=(Env&&) = delete; /// access the singleton instance; if not initialized will be - /// initialized via hostEnv::initialize() with the default params - static std::unique_ptr& instance() { + /// initialized via Env::initialize() with the default params + static std::unique_ptr& instance() { if (!instance_accessor()) { initialize(); } @@ -103,8 +113,7 @@ class hostEnv { "QuickPool_SizeLimited_HOST", host_size_limited_alloc, page_size, page_size, /* alignment */ TILEDARRAY_ALIGN_SIZE); - auto host_env = - std::unique_ptr(new hostEnv(world, host_dynamic_pool)); + auto host_env = std::unique_ptr(new Env(world, host_dynamic_pool)); instance_accessor() = std::move(host_env); } } @@ -131,7 +140,7 @@ class hostEnv { } protected: - hostEnv(World& world, umpire::Allocator host_alloc) + Env(World& world, umpire::Allocator host_alloc) : world_(&world), host_allocator_(host_alloc) {} private: @@ -142,21 +151,13 @@ class hostEnv { // N.B. not thread safe, so must be wrapped into umpire_based_allocator_impl umpire::Allocator host_allocator_; - inline static std::unique_ptr& instance_accessor() { - static std::unique_ptr instance_{nullptr}; + inline static std::unique_ptr& instance_accessor() { + static std::unique_ptr instance_{nullptr}; return instance_; } }; -namespace detail { - -struct get_host_allocator { - umpire::Allocator& operator()() { - return hostEnv::instance()->host_allocator(); - } -}; - -} // namespace detail +} // namespace host } // namespace TiledArray From 57eb4e14098b35481f028e8a85cf3d0c51e10930 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 15:43:49 -0400 Subject: [PATCH 37/62] [ci] do not use gcc toolchain on macos, instead try linux + enable ccache --- .github/workflows/ci.yml | 55 +++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2339070e54..8e71db9403 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,16 +12,26 @@ jobs: strategy: fail-fast: false matrix: - os : [ macos-latest ] - cxx : [ clang++, /opt/homebrew/bin/g++-11 ] + os : [ macos-latest, ubuntu-22.04 ] build_type : [ Release, Debug ] task_backend: [ Pthreads, PaRSEC ] - prerequisites : [ gcc@11 boost eigen open-mpi bison scalapack ] + include: + - os: ubuntu-22.04 + cc: /usr/bin/gcc-12 + cxx: /usr/bin/g++-12 + - os: macos-latest + cc: clang + cxx: clang++ name: "${{ matrix.os }}: ${{ matrix.cxx }} ${{ matrix.build_type }} ${{ matrix.task_backend }}" runs-on: ${{ matrix.os }} env: CXX : ${{ matrix.cxx }} + CCACHE_DIR : ${{github.workspace}}/build/.ccache + CCACHE_COMPRESS : true + CCACHE_COMPRESSLEVEL : 6 + OMPI_MCA_btl_vader_single_copy_mechanism : none + PARSEC_MCA_runtime_bind_threads : 0 BUILD_CONFIG : > -DMADNESS_TASK_BACKEND=${{ matrix.task_backend }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} @@ -33,18 +43,40 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: maxim-lobanov/setup-xcode@v1 - with: - xcode-version: 'latest-stable' - - name: Host system info shell: bash run: cmake -P ${{github.workspace}}/ci/host_system_info.cmake - - name: Install ${{matrix.prerequisites}} + + - name: Install prerequisite MacOS packages + if: ${{ matrix.os == 'macos-latest' }} + run: | + brew install ninja boost eigen open-mpi bison scalapack ccache + echo "MPIEXEC=/opt/homebrew/bin/mpiexec" >> $GITHUB_ENV + + - name: Install prerequisites Ubuntu packages + if: ${{ matrix.os == 'ubuntu-22.04' }} run: | - brew install ${{matrix.prerequisites}} - echo "/usr/local/opt/bison/bin" >> $GITHUB_PATH + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null + sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" + sudo apt-get update + sudo apt-get -y install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison libscalapack-openmpi-dev cmake doxygen + echo "MPIEXEC=/usr/bin/mpiexec" >> $GITHUB_ENV + + - name: Prepare ccache timestamp + id: ccache_cache_timestamp + shell: cmake -P {0} + run: | + string(TIMESTAMP current_date "%Y-%m-%d-%H;%M;%S" UTC) + message("::set-output name=timestamp::${current_date}") + + - name: Setup ccache cache files + uses: actions/cache@v1.1.0 + with: + path: ${{github.workspace}}/build/.ccache + key: ${{ matrix.config.name }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }} + restore-keys: | + ${{ matrix.config.name }}-ccache- - name: "Configure build: ${{ env.BUILD_CONFIG }}" shell: bash @@ -56,8 +88,7 @@ jobs: working-directory: ${{github.workspace}}/build shell: bash run: | - cmake --build . --target tiledarray - cmake --build . --target examples + ccache -p && ccache -z && cmake --build . --target tiledarray && cmake --build . --target examples && ccache -s - name: Test working-directory: ${{github.workspace}}/build From 7343ae68308807ebe55e0c83413be75b1fe36ce5 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 16:02:46 -0400 Subject: [PATCH 38/62] [ci] try symlinking libscalapack-openmpi.so to libscalapack.so to help out FindReferenceSCALAPACK --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8e71db9403..143c88f8ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,8 @@ jobs: wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" sudo apt-get update - sudo apt-get -y install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison libscalapack-openmpi-dev cmake doxygen + sudo apt-get -y install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison libscalapack-mpi-dev cmake doxygen + sudo ln -s /usr/lib/x86_64-linux-gnu/libscalapack-openmpi.so /usr/lib/x86_64-linux-gnu/libscalapack.so echo "MPIEXEC=/usr/bin/mpiexec" >> $GITHUB_ENV - name: Prepare ccache timestamp @@ -82,7 +83,7 @@ jobs: shell: bash run: | set -x; - cmake -B${{github.workspace}}/build $BUILD_CONFIG || (cat CMakeFiles/CMakeOutput.log && cat CMakeFiles/CMakeError.log) + cmake -B${{github.workspace}}/build $BUILD_CONFIG || (cat CMakeFiles/CMakeConfigureLog.yaml) - name: Build working-directory: ${{github.workspace}}/build From df09400150a095500421a04397b41f54567da86a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 16:48:02 -0400 Subject: [PATCH 39/62] [unit] [cuda] another disambiguation via at_ordinal --- tests/expressions_device_um.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/expressions_device_um.cpp b/tests/expressions_device_um.cpp index e624756561..d49b425372 100644 --- a/tests/expressions_device_um.cpp +++ b/tests/expressions_device_um.cpp @@ -85,7 +85,8 @@ struct UMExpressionsFixture : public TiledRangeFixture { template static Tile make_rand_tile(const typename TA::Range& r) { Tile tile(r); - for (std::size_t i = 0ul; i < tile.size(); ++i) set_random(tile[i]); + for (std::size_t i = 0ul; i < tile.size(); ++i) + set_random(tile.at_ordinal(i)); return tile; } From 0680b70b453751178c7f582259c8f9e623525376 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 16:48:25 -0400 Subject: [PATCH 40/62] [ci] disable Gitlab jobs except CUDA --- .gitlab-ci.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 02c3edc266..8b675a692c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -57,22 +57,10 @@ ubuntu: metrics: build/metrics.txt parallel: matrix: - - IMAGE : [ "ubuntu:20.04" ] - CXX: [ g++ ] - BUILD_TYPE : [ "RelWithDebInfo" ] - BLA_VENDOR : [ "BLAS_PREFERENCE_LIST=IntelMKL" ] - BLA_THREADS : [ "IntelMKL_THREAD_LAYER=tbb" ] - # ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] - TA_PYTHON : [ "TA_PYTHON=OFF" ] # needs to be fixed for MKL - RUNNER_TAGS: [ saas-linux-small-amd64 ] - - IMAGE : [ "ubuntu:22.04" ] - CXX: [ g++, clang++-13 ] - BUILD_TYPE : [ "RelWithDebInfo" ] - ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] - RUNNER_TAGS: [ saas-linux-small-amd64 ] - IMAGE : [ "ubuntu:22.04" ] CXX: [ g++ ] BUILD_TYPE : [ "RelWithDebInfo" ] + TA_PYTHON : [ "TA_PYTHON=OFF" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] RUNNER_TAGS: [ cuda ] From 016cc8155878b70b0216c517b8a12dcca98fd196 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 20:42:56 -0400 Subject: [PATCH 41/62] introduce {Tensor,Tile}::c{begin,end} --- src/TiledArray/tensor/tensor.h | 28 ++++++++++++++++++++++++++-- src/TiledArray/tile.h | 20 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index bf729e59d9..bd72af487c 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -1003,7 +1003,7 @@ class Tensor { /// Iterator factory - /// \return An iterator to the first data element + /// \return A const iterator to the first data element const_iterator begin() const { return (this->data() ? this->data() : NULL); } /// Iterator factory @@ -1013,7 +1013,7 @@ class Tensor { /// Iterator factory - /// \return An iterator to the last data element + /// \return A const iterator to the last data element const_iterator end() const { return (this->data() ? this->data() + this->size() : NULL); } @@ -1023,6 +1023,30 @@ class Tensor { /// \return An iterator to the last data element iterator end() { return (this->data() ? this->data() + this->size() : NULL); } + /// Iterator factory + + /// \return A const iterator to the first data element + const_iterator cbegin() const { return (this->data() ? this->data() : NULL); } + + /// Iterator factory + + /// \return A const iterator to the first data element + const_iterator cbegin() { return (this->data() ? this->data() : NULL); } + + /// Iterator factory + + /// \return A const iterator to the last data element + const_iterator cend() const { + return (this->data() ? this->data() + this->size() : NULL); + } + + /// Iterator factory + + /// \return A const iterator to the last data element + const_iterator cend() { + return (this->data() ? this->data() + this->size() : NULL); + } + /// Read-only access to the data /// \return A const pointer to the tensor data diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 39fca37d9e..90f7366bbc 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -201,6 +201,26 @@ class Tile { /// \return A const iterator to the last data element decltype(auto) end() const { return std::end(tensor()); } + /// Iterator factory + + /// \return A const iterator to the first data element + decltype(auto) cbegin() { return std::cbegin(tensor()); } + + /// Iterator factory + + /// \return A const iterator to the first data element + decltype(auto) cbegin() const { return std::cbegin(tensor()); } + + /// Iterator factory + + /// \return A const iterator to the last data element + decltype(auto) cend() { return std::cend(tensor()); } + + /// Iterator factory + + /// \return A const iterator to the last data element + decltype(auto) cend() const { return std::cend(tensor()); } + // Data accessor ------------------------------------------------------- /// Data direct access From a125ad7f1de1b0f9bc54454d35fe46970290cec6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 20:44:21 -0400 Subject: [PATCH 42/62] TensorInterface is a range --- src/TiledArray/tensor/tensor_interface.h | 76 ++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/src/TiledArray/tensor/tensor_interface.h b/src/TiledArray/tensor/tensor_interface.h index 7a23307036..7a2e350a2f 100644 --- a/src/TiledArray/tensor/tensor_interface.h +++ b/src/TiledArray/tensor/tensor_interface.h @@ -273,6 +273,82 @@ class TensorInterface { return data_[range_.ordinal(idx...)]; } + /// \brief Tensor interface iterator type + /// + /// Iterates over elements of a tensor interface whose range is iterable + template + class Iterator : public boost::iterator_facade< + Iterator, + std::conditional_t, + const typename TI::value_type, + typename TI::value_type>, + boost::forward_traversal_tag> { + public: + using range_iterator = typename TI::range_type::const_iterator; + + Iterator(range_iterator idx_it, TI& ti) : idx_it(idx_it), ti(ti) {} + + private: + range_iterator idx_it; + TI& ti; + + friend class boost::iterator_core_access; + + /// \brief increments this iterator + void increment() { ++idx_it; } + + /// \brief Iterator comparer + /// \return true, if \c `*this==*other` + bool equal(Iterator const& other) const { + return this->idx_it == other.idx_it; + } + + /// \brief dereferences this iterator + /// \return const reference to the current index + auto& dereference() const { + return ti.at_ordinal(ti.range().ordinal(*idx_it)); + } + }; + friend class Iterator; + friend class Iterator; + + typedef Iterator iterator; ///< Iterator type + typedef Iterator const_iterator; ///< Iterator type + + /// Const begin iterator + + /// \return An iterator that points to the beginning of this tensor view + const_iterator begin() const { + return const_iterator(range().begin(), *this); + } + + /// Const end iterator + + /// \return An iterator that points to the end of this tensor view + const_iterator end() const { return const_iterator(range().end(), *this); } + + /// Nonconst begin iterator + + /// \return An iterator that points to the beginning of this tensor view + iterator begin() { return iterator(range().begin(), *this); } + + /// Nonconst begin iterator + + /// \return An iterator that points to the beginning of this tensor view + iterator end() { return iterator(range().end(), *this); } + + /// Const begin iterator + + /// \return An iterator that points to the beginning of this tensor view + const_iterator cbegin() const { + return const_iterator(range().begin(), *this); + } + + /// Const end iterator + + /// \return An iterator that points to the end of this tensor view + const_iterator cend() const { return const_iterator(range().end(), *this); } + /// Check for empty view /// \return \c false From 81a3af5c3643e8d6a9a6f7acc010499d8f50a939 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 20:44:41 -0400 Subject: [PATCH 43/62] dox fixup --- src/TiledArray/range1.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/TiledArray/range1.h b/src/TiledArray/range1.h index 8b185936d4..a29e0d607c 100644 --- a/src/TiledArray/range1.h +++ b/src/TiledArray/range1.h @@ -163,8 +163,6 @@ struct Range1 { /// \return An iterator that points to the beginning of the local element set const_iterator cend() const { return end(); } - /// @} - /// shifts this Range1 /// @param[in] shift the shift to apply From 87b024b908f46dafd65cc30d1332c385957bc659 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 23 Sep 2024 20:46:12 -0400 Subject: [PATCH 44/62] btas::Tensor can be copied into from Tensor and TensorInterface ... this allows TA::retile on DistArrays of btas::Tensors --- src/TiledArray/external/btas.h | 7 +++++++ tests/btas.cpp | 15 +++++++++++++++ tests/expressions_btas.cpp | 2 ++ 3 files changed, 24 insertions(+) diff --git a/src/TiledArray/external/btas.h b/src/TiledArray/external/btas.h index fe84e6f0c6..c22afd3813 100644 --- a/src/TiledArray/external/btas.h +++ b/src/TiledArray/external/btas.h @@ -62,6 +62,13 @@ class boxrange_iteration_order { static constexpr int value = row_major; }; +template +class is_tensor> : public std::true_type {}; + +template +class is_tensor> + : public std::true_type {}; + } // namespace btas namespace TiledArray { diff --git a/tests/btas.cpp b/tests/btas.cpp index 9c15540e9a..4e972cfc28 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -256,6 +256,21 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(tensor_ctor, Tensor, tensor_types) { BOOST_REQUIRE_NO_THROW(Tensor t1 = t0); Tensor t1 = t0; BOOST_CHECK(t1.empty()); + + // can copy TA::Tensor to btas::Tensor + TA::Tensor ta_tensor(r); + BOOST_REQUIRE_NO_THROW(Tensor(ta_tensor)); + Tensor t2(ta_tensor); + for (auto i : r) { + BOOST_CHECK_EQUAL(ta_tensor(i), t2(i)); + } + + // can copy TA::TensorInterface to btas::Tensor + BOOST_REQUIRE_NO_THROW(Tensor(ta_tensor.block(r.lobound(), r.upbound()))); + Tensor t3(ta_tensor.block(r.lobound(), r.upbound())); + for (auto i : r) { + BOOST_CHECK_EQUAL(ta_tensor(i), t3(i)); + } } BOOST_AUTO_TEST_CASE_TEMPLATE(copy, Array, array_types) { diff --git a/tests/expressions_btas.cpp b/tests/expressions_btas.cpp index 83ff4b1ed0..7b1ae422ce 100644 --- a/tests/expressions_btas.cpp +++ b/tests/expressions_btas.cpp @@ -23,6 +23,8 @@ * */ +#include + #ifdef TILEDARRAY_HAS_BTAS #include "expressions_fixture.h" From f95e0dbe609c8090d2e5ab0203030b19e3ea8fb6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 24 Sep 2024 08:37:10 -0400 Subject: [PATCH 45/62] [unit] btas_suite/tensor_ctor: initialize ta_tensor properly --- tests/btas.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/btas.cpp b/tests/btas.cpp index 4e972cfc28..ebaf2f02a4 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -258,7 +258,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(tensor_ctor, Tensor, tensor_types) { BOOST_CHECK(t1.empty()); // can copy TA::Tensor to btas::Tensor - TA::Tensor ta_tensor(r); + TA::Tensor ta_tensor; + ta_tensor = make_rand_tile(r); BOOST_REQUIRE_NO_THROW(Tensor(ta_tensor)); Tensor t2(ta_tensor); for (auto i : r) { From 6a926a9fc0f6168d142717a347b72afb614fd1be Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 24 Sep 2024 08:55:11 -0400 Subject: [PATCH 46/62] fixup TensorInterface::Iterator::deference() --- src/TiledArray/tensor/tensor_interface.h | 12 +++++------- tests/btas.cpp | 13 +++++++++---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/TiledArray/tensor/tensor_interface.h b/src/TiledArray/tensor/tensor_interface.h index 7a2e350a2f..46663aad2f 100644 --- a/src/TiledArray/tensor/tensor_interface.h +++ b/src/TiledArray/tensor/tensor_interface.h @@ -259,8 +259,8 @@ class TensorInterface { /// \param idx The index pack template reference operator()(const Index&... idx) { - TA_ASSERT(range_.includes(idx...)); - return data_[range_.ordinal(idx...)]; + const auto ord = range_.ordinal(idx...); + return data_[ord]; } /// Element accessor @@ -269,8 +269,8 @@ class TensorInterface { /// \param idx The index pack template const_reference operator()(const Index&... idx) const { - TA_ASSERT(range_.includes(idx...)); - return data_[range_.ordinal(idx...)]; + const auto ord = range_.ordinal(idx...); + return data_[ord]; } /// \brief Tensor interface iterator type @@ -305,9 +305,7 @@ class TensorInterface { /// \brief dereferences this iterator /// \return const reference to the current index - auto& dereference() const { - return ti.at_ordinal(ti.range().ordinal(*idx_it)); - } + auto& dereference() const { return ti(*idx_it); } }; friend class Iterator; friend class Iterator; diff --git a/tests/btas.cpp b/tests/btas.cpp index ebaf2f02a4..c396110a2f 100644 --- a/tests/btas.cpp +++ b/tests/btas.cpp @@ -267,10 +267,15 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(tensor_ctor, Tensor, tensor_types) { } // can copy TA::TensorInterface to btas::Tensor - BOOST_REQUIRE_NO_THROW(Tensor(ta_tensor.block(r.lobound(), r.upbound()))); - Tensor t3(ta_tensor.block(r.lobound(), r.upbound())); - for (auto i : r) { - BOOST_CHECK_EQUAL(ta_tensor(i), t3(i)); + { + const auto l = {3, 3, 3}; + const auto u = r.upbound(); + BOOST_REQUIRE(r.includes(l)); + BOOST_REQUIRE_NO_THROW(Tensor(ta_tensor.block(l, u))); + Tensor t3(ta_tensor.block(l, u)); + for (auto i : t3.range()) { + BOOST_CHECK_EQUAL(ta_tensor(i), t3(i)); + } } } From 3c2f7e579668062debe9fd9105dbd8cfaf33f857 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 24 Sep 2024 09:05:19 -0400 Subject: [PATCH 47/62] pull in https://github.com/ValeevGroup/BTAS/pull/179 --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index db11ed24df..0e573bb050 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -41,7 +41,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 4e8f5233aa7881dccdfcc37ce07128833926d3c2 . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS), tag 4b3757cc2b5862f93589afc1e37523e543779c7a . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. diff --git a/external/versions.cmake b/external/versions.cmake index 87804775f9..3363908bf3 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,8 +16,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2) -set(TA_TRACKED_BTAS_PREVIOUS_TAG b7b2ea7513b087e35c6f1b26184a3904ac1e6b14) +set(TA_TRACKED_BTAS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83) From b1210c497b70b27c102c2aa4dcebccc38a2c916b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 15:34:12 -0400 Subject: [PATCH 48/62] ccache needs to be discovered before other prereqs --- CMakeLists.txt | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 101b1b0d16..8763e0da18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -299,6 +299,15 @@ include_directories(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src) ########################## add_custom_target(External-tiledarray) +# ccache is an optional dep but must be found first so that the rest of dependencies can use it +find_program(CCACHE ccache) +if(CCACHE) + mark_as_advanced(CCACHE) + message (STATUS "Found ccache: ${CCACHE}") + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++") + set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C") +endif(CCACHE) + # required deps: # 1. derive runtime (CUDA/HIP/...) first since others may depend on it if(ENABLE_CUDA) @@ -336,15 +345,7 @@ if(ENABLE_SCALAPACK) include(external/scalapackpp.cmake) endif() -# optional deps: -# 1. ccache -find_program(CCACHE ccache) -if(CCACHE) - mark_as_advanced(CCACHE) - message (STATUS "Found ccache: ${CCACHE}") - set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++") - set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C") -endif(CCACHE) +# other optional deps: # 2. TTG # N.B. make sure TA configures MADNESS correctly #if (TA_TTG) From 144c55b9d5ec72cbe7d3041dbb9cca86eea80c1f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 15:35:29 -0400 Subject: [PATCH 49/62] [ci] use hendrikmuhs/ccache-action@v1.2 for proper use of ccache --- .github/workflows/ci.yml | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 143c88f8ea..b085f9c8f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,20 +64,10 @@ jobs: sudo ln -s /usr/lib/x86_64-linux-gnu/libscalapack-openmpi.so /usr/lib/x86_64-linux-gnu/libscalapack.so echo "MPIEXEC=/usr/bin/mpiexec" >> $GITHUB_ENV - - name: Prepare ccache timestamp - id: ccache_cache_timestamp - shell: cmake -P {0} - run: | - string(TIMESTAMP current_date "%Y-%m-%d-%H;%M;%S" UTC) - message("::set-output name=timestamp::${current_date}") - - - name: Setup ccache cache files - uses: actions/cache@v1.1.0 + - name: Setup ccache + uses: hendrikmuhs/ccache-action@v1.2 with: - path: ${{github.workspace}}/build/.ccache - key: ${{ matrix.config.name }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }} - restore-keys: | - ${{ matrix.config.name }}-ccache- + key: ccache-${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.task_backend }} - name: "Configure build: ${{ env.BUILD_CONFIG }}" shell: bash From a263802aec44da64bf0d8d720511d05fbb8299a6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 16:14:46 -0400 Subject: [PATCH 50/62] use ccache for CUDA --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8763e0da18..a130211293 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -306,6 +306,7 @@ if(CCACHE) message (STATUS "Found ccache: ${CCACHE}") set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++") set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C") + set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling CUDA") endif(CCACHE) # required deps: From 4523aa68b563949a86ce76b4401759bcb9efd99b Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 16:15:11 -0400 Subject: [PATCH 51/62] pass compiler launchers to LibreTT & Umpire --- external/librett.cmake | 7 +++++++ external/umpire.cmake | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/external/librett.cmake b/external/librett.cmake index afebabb486..5eca3314ce 100644 --- a/external/librett.cmake +++ b/external/librett.cmake @@ -98,6 +98,13 @@ else() "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") endif(CMAKE_TOOLCHAIN_FILE) + foreach(lang C CXX CUDA) + if (DEFINED CMAKE_${lang}_COMPILER_LAUNCHER) + list(APPEND LIBRETT_CMAKE_ARGS + "-DCMAKE_${lang}_COMPILER_LAUNCHER=${CMAKE_${lang}_COMPILER_LAUNCHER}") + endif() + endforeach() + if (BUILD_SHARED_LIBS) set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) else(BUILD_SHARED_LIBS) diff --git a/external/umpire.cmake b/external/umpire.cmake index 37152e98d2..c6abe2dfd0 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -152,6 +152,13 @@ else() ) endif(CMAKE_TOOLCHAIN_FILE) + foreach(lang C CXX CUDA) + if (DEFINED CMAKE_${lang}_COMPILER_LAUNCHER) + list(APPEND UMPIRE_CMAKE_ARGS + "-DCMAKE_${lang}_COMPILER_LAUNCHER=${CMAKE_${lang}_COMPILER_LAUNCHER}") + endif() + endforeach() + if (BUILD_SHARED_LIBS) set(UMPIRE_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) else(BUILD_SHARED_LIBS) From 23e0eaaadda0e42ecb5e513743c0b9d65f9b5f0d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 16:28:08 -0400 Subject: [PATCH 52/62] [ci] build ta_test as part of "Build" step --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b085f9c8f1..4c6a097d9a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,7 +79,7 @@ jobs: working-directory: ${{github.workspace}}/build shell: bash run: | - ccache -p && ccache -z && cmake --build . --target tiledarray && cmake --build . --target examples && ccache -s + ccache -p && ccache -z && cmake --build . --target tiledarray ta_test examples && ccache -s - name: Test working-directory: ${{github.workspace}}/build @@ -87,5 +87,4 @@ jobs: #run: ctest -C $${{matrix.build_type}} run: | source ${{github.workspace}}/ci/openmpi.env - cmake --build . --target ta_test cmake --build . --target check-tiledarray From af4c88e08f4d32ba5412ddd349cfbbbdafd73a00 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 16:56:45 -0400 Subject: [PATCH 53/62] [cmake] pull in https://github.com/ValeevGroup/BTAS/pull/179 to speed up btas::Tensor construction from TensorInterface --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 0e573bb050..1b8a5de202 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -41,7 +41,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing* - Boost.Range: header-only, *only used for unit testing* - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. -- [BTAS](http://github.com/ValeevGroup/BTAS), tag 4b3757cc2b5862f93589afc1e37523e543779c7a . If usable BTAS installation is not found, TiledArray will download and compile +- [BTAS](http://github.com/ValeevGroup/BTAS), tag 1cfcb12647c768ccd83b098c64cda723e1275e49 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. - [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. diff --git a/external/versions.cmake b/external/versions.cmake index 3363908bf3..6c87fa5a72 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,8 +16,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) -set(TA_TRACKED_BTAS_PREVIOUS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2) +set(TA_TRACKED_BTAS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83) From 90d2ef72b6e199b981a3d0a33d45b4ef10b54be2 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 25 Sep 2024 18:28:39 -0400 Subject: [PATCH 54/62] [ci] control location of ccache cache + monitor ccache stats --- .gitlab-ci.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8b675a692c..33a8d0c9bf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,6 +25,14 @@ before_script: # TODO optimize ta_test build memory consumption - export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:=1} - echo "CMAKE_BUILD_PARALLEL_LEVEL=$CMAKE_BUILD_PARALLEL_LEVEL" + # configure ccache + - export CCACHE_DIR=/root/.ccache + - export CCACHE_COMPRESS=true + - export CCACHE_COMPRESSLEVEL=6 + # print out the ccache configuration + - ccache -p + # zero out the ccache statistics + - ccache -z ubuntu: stage: build @@ -64,3 +72,8 @@ ubuntu: ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ] RUNNER_TAGS: [ cuda ] + + +after_script: + # print out the ccache statistics + - ccache -s From 5a43925eb506efb07fb5c0b3e7ba402d2d10f7d6 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 27 Sep 2024 16:57:48 -0400 Subject: [PATCH 55/62] pull in Umpire https://github.com/LLNL/Umpire/pull/913 which makes https://github.com/ValeevGroup/tiledarray/commit/2e4572af6dae9c2ed92a3ace8807925f9acf99a3 obsolete --- INSTALL.md | 2 +- external/umpire.cmake | 2 -- external/umpire.finalize_io.patch | 47 ------------------------------- external/versions.cmake | 4 +-- 4 files changed, 3 insertions(+), 52 deletions(-) delete mode 100644 external/umpire.finalize_io.patch diff --git a/INSTALL.md b/INSTALL.md index 1b8a5de202..f2891672e2 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -69,7 +69,7 @@ Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on NVIDIA's CUDA-enabled accelerators. CUDA 11 or later is required. - [HIP/ROCm compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on AMD's ROCm-enabled accelerators. Note that TiledArray does not use ROCm directly but its C++ Heterogeneous-Compute Interface for Portability, `HIP`; although HIP can also be used to program CUDA-enabled devices, in TiledArray it is used only to program ROCm devices, hence ROCm and HIP will be used interchangeably. - [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, ROCm, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) extended to provide thread-safety improvements (via github.com/ValeevGroup/cutt) and extended to non-CUDA platforms by [@victor-anisimov](github.com/victor-anisimov) (tag 6eed30d4dd2a5aa58840fe895dcffd80be7fbece). - - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag v2024.02.1). + - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag 8c85866107f78a58403e20a2ae8e1f24c9852287). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: - [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ (C++17) wrapper for ScaLAPACK (tag 6397f52cf11c0dfd82a79698ee198a2fce515d81); pulls and builds the following additional prerequisite diff --git a/external/umpire.cmake b/external/umpire.cmake index c6abe2dfd0..ee2fa490e1 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -177,8 +177,6 @@ else() DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} GIT_REPOSITORY ${UMPIRE_URL} GIT_TAG ${UMPIRE_TAG} - #--Patch step----------------- - PATCH_COMMAND patch -p1 -i ${CMAKE_CURRENT_SOURCE_DIR}/external/umpire.finalize_io.patch #--Configure step------------- SOURCE_DIR ${EXTERNAL_SOURCE_DIR} LIST_SEPARATOR :: diff --git a/external/umpire.finalize_io.patch b/external/umpire.finalize_io.patch deleted file mode 100644 index fa78727d7f..0000000000 --- a/external/umpire.finalize_io.patch +++ /dev/null @@ -1,47 +0,0 @@ -diff --git a/src/umpire/util/io.cpp b/src/umpire/util/io.cpp -index 806fb9e3..551c5e82 100644 ---- a/src/umpire/util/io.cpp -+++ b/src/umpire/util/io.cpp -@@ -52,10 +52,23 @@ std::ostream& error() - - namespace util { - -+namespace detail { -+OutputBuffer& s_log_buffer_accessor() -+{ -+ static OutputBuffer buffer; -+ return buffer; -+} -+OutputBuffer& s_error_buffer_accessor() -+{ -+ static OutputBuffer buffer; -+ return buffer; -+} -+} -+ - void initialize_io(const bool enable_log) - { -- static util::OutputBuffer s_log_buffer; -- static util::OutputBuffer s_error_buffer; -+ OutputBuffer& s_log_buffer = detail::s_log_buffer_accessor(); -+ OutputBuffer& s_error_buffer = detail::s_error_buffer_accessor(); - - s_log_buffer.setConsoleStream(nullptr); - s_error_buffer.setConsoleStream(&std::cerr); -@@ -121,6 +134,16 @@ void initialize_io(const bool enable_log) - MPI::logMpiInfo(); - } - -+void finalize_io() -+{ -+ detail::s_log_buffer_accessor().sync(); -+ detail::s_log_buffer_accessor().setConsoleStream(nullptr); -+ detail::s_log_buffer_accessor().setFileStream(nullptr); -+ detail::s_error_buffer_accessor().sync(); -+ detail::s_error_buffer_accessor().setConsoleStream(nullptr); -+ detail::s_error_buffer_accessor().setFileStream(nullptr); -+} -+ - void flush_files() - { - log().flush(); diff --git a/external/versions.cmake b/external/versions.cmake index 6c87fa5a72..909a969c28 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -22,8 +22,8 @@ set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83) -set(TA_TRACKED_UMPIRE_TAG v2024.02.1) -set(TA_TRACKED_UMPIRE_PREVIOUS_TAG 20839b2e8e8972070dd8f75c7f00d50d6c399716) +set(TA_TRACKED_UMPIRE_TAG 8c85866107f78a58403e20a2ae8e1f24c9852287) +set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v2024.02.1) set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81) set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf ) From 7ad066d0d93ad5736dda4efa729cf047298cf79b Mon Sep 17 00:00:00 2001 From: Jonathon Misiewicz Date: Mon, 7 Oct 2024 16:40:18 -0400 Subject: [PATCH 56/62] Update CMakeLists.txt Silences a CMake warning. --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 80f2a49710..a16c05d0b2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -225,7 +225,7 @@ if(HIP_FOUND OR CUDA_FOUND) TiledArray/external/cuda.h TiledArray/device/cpu_cuda_vector.h) endif(CUDA_FOUND) -endif(CUDA_FOUND OR HIP_FOUND) +endif(HIP_FOUND OR CUDA_FOUND) set(TILEDARRAY_SOURCE_FILES TiledArray/tiledarray.cpp From 482ab5ea08227860116bcb0ae54901c9be2c9c37 Mon Sep 17 00:00:00 2001 From: Jonathon Misiewicz Date: Tue, 8 Oct 2024 15:54:19 -0400 Subject: [PATCH 57/62] Update umpire.cmake --- external/umpire.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/external/umpire.cmake b/external/umpire.cmake index ee2fa490e1..5b7a4f4078 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -223,6 +223,8 @@ else() "$;$;$;$;$;$;$" INTERFACE_LINK_LIBRARIES "$;$" + INTERFACE_COMPILE_DEFINITIONS + FMT_HEADER_ONLY=1 ) install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) From a414cab8bf34040fa9c05072aa1dbb526109ea34 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 8 Oct 2024 20:05:27 -0400 Subject: [PATCH 58/62] bump MAD tag to pull in https://github.com/m-a-d-n-e-s-s/madness/pull/550 --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index f2891672e2..ed0ba5046c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -43,7 +43,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. - [BTAS](http://github.com/ValeevGroup/BTAS), tag 1cfcb12647c768ccd83b098c64cda723e1275e49 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 93a9a5cec2a8fa87fba3afe8056607e6062a9058 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/versions.cmake b/external/versions.cmake index 909a969c28..d9d47a3bf2 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -11,8 +11,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) +set(TA_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) From d856c6a4f4de592e21c1904dba93f5c22ad7b633 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 8 Oct 2024 20:01:41 -0400 Subject: [PATCH 59/62] DistArray: init_{tiles,elements} and fill* are parametrized by fence template parameter that controls whether operation uses local, global, or no fence (default, same as before) --- src/TiledArray/array_impl.h | 45 ++++++++++++++++++++++++++++++++----- src/TiledArray/dist_array.h | 43 +++++++++++++++++++++++------------ src/TiledArray/fwd.h | 8 +++++++ 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/src/TiledArray/array_impl.h b/src/TiledArray/array_impl.h index df7138a9e7..9dbf5640c4 100644 --- a/src/TiledArray/array_impl.h +++ b/src/TiledArray/array_impl.h @@ -198,6 +198,17 @@ std::ostream& operator<<(std::ostream& os, const TileConstReference& a) { return os; } +/// Callaback used to update counter (typically, task counter) +template +struct IncrementCounter : public madness::CallbackInterface { + AtomicInt& counter; + IncrementCounter(AtomicInt& counter) : counter(counter) {} + void notify() override { + ++counter; + delete this; + } +}; + } // namespace detail } // namespace TiledArray @@ -770,20 +781,24 @@ class ArrayImpl : public TensorImpl, /// \tparam Op The type of the functor/function /// \param[in] op The operation used to generate tiles /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not set. Strong throw /// guarantee. /// \throw TiledArray::Exception if a tile is already set and skip_set is /// false. Weak throw guarantee. - template - void init_tiles(Op&& op, bool skip_set = false) { + template + std::int64_t init_tiles(Op&& op, bool skip_set = false) { // lifetime management of op depends on whether it is a lvalue ref (i.e. has // an external owner) or an rvalue ref // - if op is an lvalue ref: pass op to tasks // - if op is an rvalue ref pass make_shared_function(op) to tasks auto op_shared_handle = make_op_shared_handle(std::forward(op)); + std::int64_t ntiles_initialized{0}; auto it = this->pmap()->begin(); const auto end = this->pmap()->end(); + std::atomic ntask_completed{0}; for (; it != end; ++it) { const auto& index = *it; if (!this->is_zero(index)) { @@ -792,19 +807,39 @@ class ArrayImpl : public TensorImpl, if (fut.probe()) continue; } if constexpr (Exec == HostExecutor::MADWorld) { - Future tile = this->world().taskq.add( - [this_sptr = this->shared_from_this(), - index = ordinal_type(index), op_shared_handle]() -> value_type { + Future tile = + this->world().taskq.add([this_sptr = this->shared_from_this(), + index = ordinal_type(index), + op_shared_handle, this]() -> value_type { return op_shared_handle( this_sptr->trange().make_tile_range(index)); }); + ++ntiles_initialized; + if constexpr (fence == Fence::Local) { + tile.register_callback( + new IncrementCounter( + ntask_completed)); + } set(index, std::move(tile)); } else { static_assert(Exec == HostExecutor::Thread); set(index, op_shared_handle(this->trange().make_tile_range(index))); + ++ntiles_initialized; } } } + + if constexpr (fence == Fence::Local) { + if constexpr (Exec == HostExecutor::MADWorld) { + if (ntiles_initialized > 0) + this->world().await([&ntask_completed, ntiles_initialized]() { + return ntask_completed == ntiles_initialized; + }); + } + } else if constexpr (fence == Fence::Global) { + this->world().gop.fence(); + } + return ntiles_initialized; } }; // class ArrayImpl diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 3bc9fe3c62..1aa90ce351 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -906,23 +906,29 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already set. Weak throw guarantee. - void fill_local(const element_type& value = element_type(), - bool skip_set = false) { - init_tiles( + template + std::int64_t fill_local(const element_type& value = element_type(), + bool skip_set = false) { + return init_tiles( [value](const range_type& range) { return value_type(range, value); }, skip_set); } /// Fill all local tiles with the specified value + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \param[in] value What each local tile should be filled with. /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is uninitialized. Strong throw /// guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already set. Weak throw guarantee. - void fill(const element_type& value = numeric_type(), bool skip_set = false) { - fill_local(value, skip_set); + template + std::int64_t fill(const element_type& value = numeric_type(), + bool skip_set = false) { + return fill_local(value, skip_set); } /// Fill all local tiles with random values @@ -934,18 +940,21 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// generate random values of type T this function will be disabled via SFINAE /// and attempting to use it will lead to a compile-time error. /// + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \tparam T The type of random value to generate. Defaults to /// element_type. /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already initialized. Weak throw guarantee. template > - void fill_random(bool skip_set = false) { - init_elements( + std::int64_t fill_random(bool skip_set = false) { + return init_elements( [](const auto&) { return detail::MakeRandom::generate_value(); }); } @@ -978,6 +987,8 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// return tile; /// }); /// \endcode + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \tparam Op The type of the functor/function /// \param[in] op The operation used to generate tiles /// \param[in] skip_set If false, will throw if any tiles are already set @@ -985,9 +996,11 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. /// \throw TiledArray::Exception if a tile is already set and skip_set is /// false. Weak throw guarantee. - template - void init_tiles(Op&& op, bool skip_set = false) { - impl_ref().template init_tiles(std::forward(op), skip_set); + template + std::int64_t init_tiles(Op&& op, bool skip_set = false) { + return impl_ref().template init_tiles(std::forward(op), + skip_set); } /// Initialize elements of local, non-zero tiles with a user provided functor @@ -1009,15 +1022,17 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \tparam Op Type of the function/functor which will generate the elements. /// \param[in] op The operation used to generate elements /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarnatee. /// \throw TiledArray::Exception if skip_set is false and a local, non-zero /// tile is already initialized. Weak throw /// guarantee. - template - void init_elements(Op&& op, bool skip_set = false) { + template + std::int64_t init_elements(Op&& op, bool skip_set = false) { auto op_shared_handle = make_op_shared_handle(std::forward(op)); - init_tiles( + return init_tiles( [op = std::move(op_shared_handle)]( const TiledArray::Range& range) -> value_type { // Initialize the tile with the given range object diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 652b835fab..e33aea5c18 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -203,6 +203,14 @@ using Array enum class HostExecutor { Thread, MADWorld, Default = MADWorld }; +/// fence types +enum class Fence { + Global, //!< global fence (`world.gop.fence()`) + Local, //!< local fence (all local work done, equivalent to + //!< `world.taskq.fence() in absence of active messages) + No //!< no fence +}; + namespace conversions { /// user defined conversions From 6d661ab2a103ad88c633f82c6fad1c9fef102872 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 8 Oct 2024 20:02:30 -0400 Subject: [PATCH 60/62] diagonal_array: instead of taskq.fence, use more robust fence mechanism of init_tiles --- src/TiledArray/special/diagonal_array.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/TiledArray/special/diagonal_array.h b/src/TiledArray/special/diagonal_array.h index d60b23db94..eac0c65e92 100644 --- a/src/TiledArray/special/diagonal_array.h +++ b/src/TiledArray/special/diagonal_array.h @@ -157,7 +157,8 @@ std::enable_if_t::value, void> write_diag_tiles_to_array_rng(Array &A, RandomAccessIterator diagonals_begin) { using Tile = typename Array::value_type; - A.init_tiles( + // N.B. Fence::Local ensures lifetime of the diagonals range + A.template init_tiles( // Task to create each tile [diagonals_begin](const Range &rng) { // Compute range of diagonal elements in the tile @@ -221,7 +222,6 @@ diagonal_array(World &world, TiledRange const &trange, if constexpr (is_dense_v) { Array A(world, trange); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); - A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } else { // Compute shape and init the Array @@ -231,7 +231,6 @@ diagonal_array(World &world, TiledRange const &trange, ShapeType shape(shape_norm, trange); Array A(world, trange, shape); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); - A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } abort(); // unreachable From c955339a3c1f6139a39fb081f13414c0b05a11f7 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 9 Oct 2024 19:34:56 -0400 Subject: [PATCH 61/62] foreach and make_array use callbacks instead of atomic counters for local completion checks --- src/TiledArray/conversions/foreach.h | 23 +++++++++------- src/TiledArray/conversions/make_array.h | 36 ++++++++++++++++++------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/TiledArray/conversions/foreach.h b/src/TiledArray/conversions/foreach.h index 20f2d36ec3..2c77c91a0f 100644 --- a/src/TiledArray/conversions/foreach.h +++ b/src/TiledArray/conversions/foreach.h @@ -283,11 +283,10 @@ inline std:: arg.trange().tiles_range(), 0); // Construct the task function used to construct the result tiles. - madness::AtomicInt counter; - counter = 0; - int task_count = 0; + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; auto op_shared_handle = make_op_shared_handle(std::forward(op)); - const auto task = [op_shared_handle, &counter, &tile_norms]( + const auto task = [op_shared_handle, &tile_norms]( const ordinal_type ord, const_if_t& arg_tile, const ArgTiles&... arg_tiles) -> result_value_type { @@ -295,7 +294,6 @@ inline std:: auto result_tile = op_caller(std::move(op_shared_handle), tile_norms.at_ordinal(ord), arg_tile, arg_tiles...); - ++counter; return result_tile; }; @@ -310,7 +308,9 @@ inline std:: continue; auto result_tile = world.taskq.add(task, ord, arg.find_local(ord), args.find(ord)...); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new IncrementCounter(ntask_completed)); tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, use the (scaled) // norms of the first arg @@ -324,7 +324,9 @@ inline std:: auto result_tile = world.taskq.add(task, ord, detail::get_sparse_tile(ord, arg), detail::get_sparse_tile(ord, args)...); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new IncrementCounter(ntask_completed)); tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, find max // (scaled) norms of all args @@ -339,9 +341,10 @@ inline std:: } // Wait for tile norm data to be collected. - if (task_count > 0) - world.await( - [&counter, task_count]() -> bool { return counter == task_count; }); + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_created == ntask_completed; + }); // Construct the new array result_array_type result( diff --git a/src/TiledArray/conversions/make_array.h b/src/TiledArray/conversions/make_array.h index 6f5ada0bba..1295e6f8e4 100644 --- a/src/TiledArray/conversions/make_array.h +++ b/src/TiledArray/conversions/make_array.h @@ -26,6 +26,7 @@ #ifndef TILEDARRAY_CONVERSIONS_MAKE_ARRAY_H__INCLUDED #define TILEDARRAY_CONVERSIONS_MAKE_ARRAY_H__INCLUDED +#include "TiledArray/array_impl.h" #include "TiledArray/external/madness.h" #include "TiledArray/shape.h" #include "TiledArray/type_traits.h" @@ -79,6 +80,10 @@ inline Array make_array( // Make an empty result array Array result(world, trange); + // Construct the task function used to construct the result tiles. + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; + // Iterate over local tiles of arg for (const auto index : *result.pmap()) { // Spawn a task to evaluate the tile @@ -89,11 +94,20 @@ inline Array make_array( return tile; }, trange.make_tile_range(index)); - + ++ntask_created; + tile.register_callback( + new detail::IncrementCounter( + ntask_completed)); // Store result tile - result.set(index, tile); + result.set(index, std::move(tile)); } + // Wait for tile tasks to complete + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_completed == ntask_created; + }); + return result; } @@ -150,26 +164,28 @@ inline Array make_array( trange.tiles_range(), 0); // Construct the task function used to construct the result tiles. - madness::AtomicInt counter; - counter = 0; - int task_count = 0; + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; auto task = [&](const ordinal_type index) -> value_type { value_type tile; tile_norms.at_ordinal(index) = op(tile, trange.make_tile_range(index)); - ++counter; return tile; }; for (const auto index : *pmap) { auto result_tile = world.taskq.add(task, index); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new detail::IncrementCounter( + ntask_completed)); tiles.emplace_back(index, std::move(result_tile)); } // Wait for tile norm data to be collected. - if (task_count > 0) - world.await( - [&counter, task_count]() -> bool { return counter == task_count; }); + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_completed == ntask_created; + }); // Construct the new array Array result(world, trange, From 1986ccf0c6220dd2765da0342e94a2b27aecebb0 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sat, 12 Oct 2024 03:28:01 -0400 Subject: [PATCH 62/62] [python] amends PyTA for d856c6a4f4de592e21c1904dba93f5c22ad7b633 --- python/src/TiledArray/python/array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/TiledArray/python/array.h b/python/src/TiledArray/python/array.h index 782846df4c..e3cc1c79b7 100644 --- a/python/src/TiledArray/python/array.h +++ b/python/src/TiledArray/python/array.h @@ -208,7 +208,7 @@ void make_array_class(py::object m, const char *name) { py::return_value_policy::reference) .def_property_readonly("trange", &array::trange) .def_property_readonly("shape", &array::shape) - .def("fill", &Array::fill, py::arg("value"), + .def("fill", &Array::template fill<>, py::arg("value"), py::arg("skip_set") = false) .def("init", &array::init_tiles) // Array object needs be alive while iterator is used */