diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fd9c49aefa..b6fe18c009 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,7 @@ default: variables: MAD_NUM_THREADS : 2 + OMP_NUM_THREADS : 1 TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray" # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT TA_CONFIG : > @@ -18,6 +19,7 @@ variables: ${BLA_VENDOR} ${BLA_THREADS} ${ENABLE_SCALAPACK} + ${ENABLE_SLATE} before_script: # NB: if CMAKE_BUILD_PARALLEL_LEVEL is not set (i.e. using shared runner), use 1 to ensure we have enough memory @@ -35,6 +37,7 @@ ubuntu: variables: TA_PYTHON : "TA_PYTHON=ON" ENABLE_SCALAPACK : "ENABLE_SCALAPACK=OFF" + ENABLE_SLATE : "ENABLE_SLATE=OFF" script: - ./ci/.build-project --build ./build @@ -69,6 +72,7 @@ ubuntu: CXX: [ g++, clang++-13 ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] + ENABLE_SLATE : [ "ENABLE_SLATE=ON", "ENABLE_SLATE=OFF" ] RUNNER_TAGS: [ linux ] - IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ] CXX: [ g++ ] diff --git a/CMakeLists.txt b/CMakeLists.txt index a50f0a789f..cd0c2759fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,9 @@ add_feature_info(MPI ENABLE_MPI "Message-Passing Interface supports distributed- option(ENABLE_SCALAPACK "Enable ScaLAPACK Bindings in TiledArray" OFF) add_feature_info(ScaLAPACK ENABLE_SCALAPACK "ScaLAPACK provides distributed linear algebra") +option(ENABLE_SLATE "Enable SLATE Bindings in TiledArray" OFF) +add_feature_info(SLATE ENABLE_SLATE "SLATE provides distributed linear algebra on GPU architectures") + option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON) add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules") @@ -338,6 +341,9 @@ include(${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchBoost.cmake) if(ENABLE_SCALAPACK) include(external/scalapackpp.cmake) endif() +if(ENABLE_SLATE) + include(external/slate.cmake) +endif() # optional deps: # 1. ccache diff --git a/cmake/modules/FindOrFetchSLATE.cmake b/cmake/modules/FindOrFetchSLATE.cmake new file mode 100644 index 0000000000..d5a34c2b5a --- /dev/null +++ b/cmake/modules/FindOrFetchSLATE.cmake @@ -0,0 +1,32 @@ +# Try find_package +if (NOT TARGET slate) + find_package(slate QUIET CONFIG) + if (TARGET slate) + message(STATUS "Found SLATE CONFIG at ${slate_CONFIG}") + endif (TARGET slate) +endif (NOT TARGET slate) + +# If not found, build via FetchContent +if (NOT TARGET slate) + + # Make sure BLAS++/LAPACK++ are already in place + # (will typically be loaded from BTAS) + include(${vg_cmake_kit_SOURCE_DIR}/modules/FindOrFetchLinalgPP.cmake) + + if (NOT TILEDARRAY_HAS_CUDA) + set(gpu_backend none CACHE STRING "Device Backend for ICL Linalg++/SLATE") + endif (NOT TILEDARRAY_HAS_CUDA) + + include(FetchContent) + FetchContent_Declare( + slate + GIT_REPOSITORY https://github.com/icl-utk-edu/slate.git + GIT_TAG ${TA_TRACKED_SLATE_TAG} + ) + FetchContent_MakeAvailable(slate) + +endif (NOT TARGET slate) + +if (NOT TARGET slate) + message( FATAL_ERROR "FindOrFetchSLATE could not make slate target available") +endif (NOT TARGET slate) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index f74d35345a..d670a22015 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -33,6 +33,7 @@ add_subdirectory (cuda) add_subdirectory (dgemm) add_subdirectory (demo) add_subdirectory (scalapack) +add_subdirectory (slate) add_subdirectory (fock) add_subdirectory (mpi_tests) add_subdirectory (pmap_test) diff --git a/examples/slate/CMakeLists.txt b/examples/slate/CMakeLists.txt new file mode 100644 index 0000000000..e704ac94e4 --- /dev/null +++ b/examples/slate/CMakeLists.txt @@ -0,0 +1,38 @@ +# +# This file is a part of TiledArray. +# Copyright (C) 2016 Virginia Tech +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Drew Lewis +# Department of Chemistry, Virginia Tech +# +# CMakeLists.txt +# Dec 6th, 2016 +# + +# Create example executable + +if(ENABLE_SLATE) + +foreach(_exec conversion) + + # Add executable + add_executable(slate-${_exec} EXCLUDE_FROM_ALL ${_exec}.cpp) + target_link_libraries(slate-${_exec} PRIVATE tiledarray) + add_dependencies(examples-tiledarray slate-${_exec}) + +endforeach() + +endif(ENABLE_SLATE) diff --git a/examples/slate/conversion.cpp b/examples/slate/conversion.cpp new file mode 100644 index 0000000000..851f066743 --- /dev/null +++ b/examples/slate/conversion.cpp @@ -0,0 +1,140 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * conversion.cpp + * Created: 7 Feb, 2020 + * Edited: 13 May, 2020 + * + */ + +#include + +#include +#include +#include +#include + + +template +int64_t div_ceil(Integral1 x, Integral2 y) { + int64_t x_ll = x; + int64_t y_ll = y; + + auto d = std::div(x_ll, y_ll); + return d.quot + !!d.rem; +} + +TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { + assert(TA_NBs.size() > 0); + + std::default_random_engine gen(0); + std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1); + auto rand_indx = [&]() { return dist(gen); }; + auto rand_nb = [&]() { return TA_NBs[rand_indx()]; }; + + std::vector t_boundaries = {0}; + auto TA_NB = rand_nb(); + while (t_boundaries.back() + TA_NB < N) { + t_boundaries.emplace_back(t_boundaries.back() + TA_NB); + TA_NB = rand_nb(); + } + t_boundaries.emplace_back(N); + + std::vector ranges( + 2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end())); + + return TA::TiledRange(ranges.begin(), ranges.end()); +}; + + + +auto make_square_proc_grid(MPI_Comm comm) { + int mpi_size; MPI_Comm_size(comm, &mpi_size); + int p,q; + for(p = int( sqrt( mpi_size ) ); p > 0; --p) { + q = int( mpi_size / p ); + if(p*q == mpi_size) break; + } + return std::make_pair(p,q); +} + + + + + + + + + +int main(int argc, char** argv) { + auto& world = TA::initialize(argc, argv); + { + int64_t N = argc > 1 ? std::stoi(argv[1]) : 1000; + size_t NB = argc > 2 ? std::stoi(argv[2]) : 128; + + auto make_ta_reference = + [&](TA::Tensor& t, TA::Range const& range) { + + t = TA::Tensor(range, 0.0); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (int m = lo[0]; m < up[0]; ++m) { + for (int n = lo[1]; n < up[1]; ++n) { + t(m, n) = m - n; + } + } + + return t.norm(); + }; + + // Generate Reference TA tensor. + auto trange = gen_trange(N, {NB}); + auto ref_ta = + TA::make_array >(world, trange, make_ta_reference); + + // Do Conversion + auto A = TA::array_to_slate( ref_ta ); + auto A_ta = TA::slate_to_array>(A, world); + world.gop.fence(); + + // Slate matrix to eigen + Eigen::MatrixXd slate_eigen = Eigen::MatrixXd::Zero(N,N); + for (int64_t j = 0; j < A.nt(); ++j) + for (int64_t i = 0; i < A.mt(); ++i) { + A.tileBcast(i,j, A, slate::Layout::ColMajor); + auto T = A(i,j); + Eigen::Map T_map( T.data(), T.mb(), T.nb() ); + slate_eigen.block(i*NB,j*NB,T.mb(), T.nb()) = T_map; + } + //if(!world.rank()) { + //std::cout << "SLATE\n" << slate_eigen << std::endl; + //} + + A_ta.make_replicated(); + world.gop.fence(); + auto A_eigen = TA::array_to_eigen(A_ta); + //if(!world.rank()) std::cout << "TA\n" << A_eigen << std::endl; + std::cout << (A_eigen - slate_eigen).norm() << std::endl; + + + } + + TA::finalize(); +} diff --git a/external/slate.cmake b/external/slate.cmake new file mode 100644 index 0000000000..9c177d32af --- /dev/null +++ b/external/slate.cmake @@ -0,0 +1,21 @@ +if (NOT TARGET slate) + set(VGCMAKEKIT_TRACKED_SLATE_TAG ${TA_TRACKED_SLATE_TAG} CACHE STRING "slate tag") + include(FindOrFetchSLATE) +endif() + +# built {blacs,scalapack}pp as a subproject? install as part of tiledarray export as well +# to be able to use TiledArray_SLATE from the build tree +if (TARGET slate) + install( TARGETS slate EXPORT tiledarray COMPONENT tiledarray ) + # Add these dependencies to External + add_dependencies(External-tiledarray slate ) +endif() + +if (TARGET slate) + add_library( TiledArray_SLATE INTERFACE ) + target_link_libraries( TiledArray_SLATE INTERFACE slate ) + + install( TARGETS TiledArray_SLATE EXPORT tiledarray COMPONENT tiledarray ) + + set( TILEDARRAY_HAS_SLATE 1 ) +endif() diff --git a/external/versions.cmake b/external/versions.cmake index 6f6c05c977..19c50d392b 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -36,6 +36,9 @@ set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0) set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81) set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf ) +set(TA_TRACKED_SLATE_TAG 04d552f12e0d181c27652ecd3ebedb265b7eec07) +set(TA_TRACKED_SLATE_PREVIOUS_TAG 8651441aa87cd69b560d4dac8c5ceb0e7f8c32a4) + set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a8b3d6f4fd..b5ecc25f4b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -274,6 +274,9 @@ endif(CUDA_FOUND) if( TARGET TiledArray_SCALAPACK ) list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SCALAPACK) endif() +if( TARGET TiledArray_SLATE ) + list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SLATE) +endif() list(APPEND _TILEDARRAY_DEPENDENCIES "${LAPACK_LIBRARIES}") if( TARGET ttg-parsec ) diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index 0c4d5d5cbc..f136ea523c 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -74,6 +74,9 @@ /* Define if TA has enabled ScaLAPACK Bindings */ #cmakedefine TILEDARRAY_HAS_SCALAPACK 1 +/* Define if TA has enabled SLATE Bindings */ +#cmakedefine TILEDARRAY_HAS_SLATE 1 + /* Define if TiledArray configured with CUDA support */ #cmakedefine TILEDARRAY_HAS_CUDA @TILEDARRAY_HAS_CUDA@ #cmakedefine TILEDARRAY_CHECK_CUDA_ERROR @TILEDARRAY_CHECK_CUDA_ERROR@ diff --git a/src/TiledArray/conversions/slate.h b/src/TiledArray/conversions/slate.h new file mode 100644 index 0000000000..3212b763f6 --- /dev/null +++ b/src/TiledArray/conversions/slate.h @@ -0,0 +1,367 @@ +#ifndef TILEDARRAY_CONVERSIONS_SLATE_H +#define TILEDARRAY_CONVERSIONS_SLATE_H + +#include // TILEDARRAY_HAS_SLATE +#if TILEDARRAY_HAS_SLATE + +#include // slate::Matrix +#include // TA::numeric_type +#include // is_dense +#include // {,User}Pmap +#include // Eigen::{Matrix,Map} +#include // MADNESS + +namespace TiledArray { +namespace detail { + +/// C++14-esque typename wrapper for `numeric_type +template +using numeric_type_t = typename numeric_type::type; + +/// Deduce SLATE Matrix type from Array type +template +using slate_type_from_array_t = + typename slate::Matrix>; + + +template +auto slate_to_ta_tile(slate::Tile slate_tile, TA::Range const& range) { + + using col_major_mat_t = Eigen::Matrix; + using row_major_mat_t = Eigen::Matrix; + + using col_major_map_t = Eigen::Map; + using row_major_map_t = Eigen::Map; + + Tile tile(range, 0.0); + + // Create Maps + auto local_m = slate_tile.mb(); + auto local_n = slate_tile.nb(); + col_major_map_t slate_map(slate_tile.data(), local_m, local_n); + + auto local_m_ta = range.dim(0).extent(); + auto local_n_ta = range.dim(1).extent(); + row_major_map_t ta_map(tile.data(), local_m_ta, local_n_ta); + + // Copy data + ta_map = slate_map; + + return tile; +} + +template +Array make_array_from_slate_dense(Matrix& matrix, TA::TiledRange const& trange, + std::shared_ptr pmap, TA::World& world) { + + using value_type = typename Array::value_type; // Tile type + using element_type = typename std::remove_cv_t::element_type; + + + using col_major_mat_t = Eigen::Matrix; + using row_major_mat_t = Eigen::Matrix; + + using col_major_map_t = Eigen::Map; + using row_major_map_t = Eigen::Map; + + Array array(world, trange, pmap); + for (int64_t it = 0; it < matrix.mt(); ++it) + for (int64_t jt = 0; jt < matrix.nt(); ++jt) + if( matrix.tileIsLocal(it,jt) ) { + auto local_ordinal = trange.tiles_range().ordinal(it,jt); + + auto tile = world.taskq.add(slate_to_ta_tile, + matrix(it,jt), trange.make_tile_range(local_ordinal)); + + array.set(local_ordinal, tile); + } + + return array; +} + +template +Array make_array_from_slate_sparse(Matrix& matrix, TA::TiledRange const& trange, + std::shared_ptr pmap, TA::World& world) { + + typedef typename Array::value_type value_type; + typedef typename Array::ordinal_type ordinal_type; + typedef std::pair > datum_type; + + // Create a vector to hold local tiles + std::vector tiles; + tiles.reserve(pmap->size()); + + // Construct a tensor to hold updated tile norms for the result shape. + TA::Tensor::value_type> tile_norms( + trange.tiles_range(), 0); + + // Construct the task function used to construct the result tiles. + madness::AtomicInt counter; + counter = 0; + int task_count = 0; + auto task = [&](const ordinal_type index) -> value_type { + + value_type tile; + auto coords = trange.tiles_range().idx(index); + auto it = coords[0]; + auto jt = coords[1]; + if(!matrix.tileIsLocal(it,jt)) { + tile_norms[index] = 0.0; + } else { + tile = slate_to_ta_tile(matrix(it,jt), trange.make_tile_range(index)); + tile_norms[index] = norm(tile); + } + ++counter; + return tile; + + }; + + for (const auto index : *pmap) { + auto result_tile = world.taskq.add(task, index); + ++task_count; + tiles.emplace_back(index, std::move(result_tile)); + } + + // Wait for tile norm data to be collected. + if (task_count > 0) + world.await( + [&counter, task_count]() -> bool { return counter == task_count; }); + + // Construct the new array + Array result(world, trange, + typename Array::shape_type(world, tile_norms, trange), pmap); + for (auto& it : tiles) { + const auto index = it.first; + if (!result.is_zero(index)) result.set(it.first, it.second); + } + + return result; + +} + +} // namespace TiledArray::detail + +class SlateFunctors { + +public: + + using slate_int = int64_t; + using slate_process_idx = std::tuple; + using dim_functor_t = std::function; + using tile_functor_t = std::function; + + SlateFunctors( const dim_functor_t& Mb, const dim_functor_t& Nb, + const tile_functor_t& Rank, const tile_functor_t& Dev ) : + tileMb_(Mb), tileNb_(Nb), tileRank_(Rank), tileDevice_(Dev) { } + + template + SlateFunctors( TiledRange trange, PMapInterfacePointer pmap_ptr ) { + if( trange.rank() != 2 ) + throw std::runtime_error("Cannot Convert General Tensor to SLATE (RANK != 2)"); + + // Tile row dimension (MB) + tileMb_ = [trange](slate_int i) { return trange.dim(0).tile(i).extent(); }; + + // Tile col dimension (NB) + tileNb_ = [trange](slate_int i) { return trange.dim(1).tile(i).extent(); }; + + // Tile rank assignment + tileRank_ = [pmap_ptr, trange] (slate_process_idx ij) { + auto [i,j] = ij; + return pmap_ptr->owner(trange.tiles_range().ordinal(i,j)); + }; + + // Tile device assignment + // TODO: Needs to be more robust + tileDevice_ = [](slate_process_idx) { return 0; }; + + } + + auto& tileMb() { return tileMb_; } + auto& tileNb() { return tileNb_; } + auto& tileRank() { return tileRank_; } + auto& tileDevice() { return tileDevice_; } + + + template + SlateMatrixType make_matrix(int64_t M, int64_t N, MPI_Comm comm) { + return SlateMatrixType(M, N, tileMb_, tileNb_, tileRank_, tileDevice_, comm); + } + +private: + + dim_functor_t tileMb_, tileNb_; + tile_functor_t tileRank_, tileDevice_; +}; + + + + + +template +std::shared_ptr make_pmap_from_slate( SlateMatrixType&& matrix, World& world ) { + + // Compute SLATE Tile Statistics + size_t total_tiles = matrix.nt() * matrix.mt(); + size_t local_tiles = 0; + + // Create a map from tile ordinal to rank + // to avoid lifetime issues in the internal + // TA Pmap + std::vector tile2rank(total_tiles); + for (int64_t it = 0; it < matrix.mt(); ++it) + for (int64_t jt = 0; jt < matrix.nt(); ++jt) { + size_t ordinal = it*matrix.nt() + jt; // TODO: Use Range + tile2rank[ordinal] = matrix.tileRank( it, jt ); + if(matrix.tileIsLocal(it,jt)) local_tiles++; + } + + + // Create TA PMap + std::function ta_tile_functor = + [t2r = std::move(tile2rank)](size_t ordinal) { + return t2r[ordinal]; + }; + + return std::make_shared(world, total_tiles, local_tiles, + ta_tile_functor); +} + + + + + +/** + * @brief Convert Array to SLATE matrix + * + * @tparam Array Type of input Array + * + * @param[in] array Array to convert to SLATE. Must be rank-2. + * @returns SLATE representation `array` + */ +template +detail::slate_type_from_array_t +array_to_slate( const Array& array ) { + + using element_type = typename std::remove_cv_t::element_type; + using slate_matrix_t = typename slate::Matrix; + + using col_major_mat_t = Eigen::Matrix; + using row_major_mat_t = Eigen::Matrix; + + using col_major_map_t = Eigen::Map; + using row_major_map_t = Eigen::Map; + + /*******************************/ + /*** Generate SLATE Functors ***/ + /*******************************/ + auto& world = array.world(); + const auto& trange = array.trange(); + auto pmap = array.pmap(); + + SlateFunctors slate_functors( trange, pmap ); + + + /*********************************/ + /*** Create empty slate matrix ***/ + /*********************************/ + const auto M = trange.dim(0).extent(); + const auto N = trange.dim(1).extent(); + auto matrix = slate_functors.make_matrix(M, N, + world.mpi.comm().Get_mpi_comm()); + + /************************/ + /*** Copy TA -> SLATE ***/ + /************************/ + matrix.insertLocalTiles(); + + // Loop over local tiles via ordinal + // TODO: Make async + for( auto local_ordinal : *pmap ) { + // Compute coordinate of tile ordinal + auto local_coordinate = trange.tiles_range().idx(local_ordinal); + const auto it = local_coordinate[0]; + const auto jt = local_coordinate[1]; + + // Sanity Check + if(!matrix.tileIsLocal(it,jt)) + throw std::runtime_error("SLATE PMAP is not valid"); + + // Extract shallow copy of local SLATE tile and create + // data map + auto local_tile_slate = matrix(it,jt); + auto local_m = local_tile_slate.mb(); + auto local_n = local_tile_slate.nb(); + col_major_map_t slate_map(local_tile_slate.data(), local_m, local_n); + + // Create data map for TA tile + // TODO: This should be async in a MADNESS task + auto& local_tile = array.find_local(local_ordinal).get(); + auto local_m_ta = local_tile.range().dim(0).extent(); + auto local_n_ta = local_tile.range().dim(1).extent(); + row_major_map_t ta_map(local_tile.data(), local_m_ta, local_n_ta); + + // Copy TA tile to SLATE tile + slate_map = ta_map; + } // Loop over local tiles + + return matrix; +} // array_to_slate + +/** + * @brief Convert a SLATE matrix to an Array + */ +template +auto slate_to_array( /*const*/ detail::slate_type_from_array_t& matrix, World& world ) { + // TODO: SLATE Tile accessor is not const-accessible + // https://github.com/icl-utk-edu/slate/issues/59 + + using value_type = typename Array::value_type; // Tile type + using element_type = typename std::remove_cv_t::element_type; + using slate_matrix_t = typename slate::Matrix; + + using col_major_mat_t = Eigen::Matrix; + using row_major_mat_t = Eigen::Matrix; + + using col_major_map_t = Eigen::Map; + using row_major_map_t = Eigen::Map; + + // Create TA PMap from SLATE metadata + auto slate_pmap = make_pmap_from_slate(matrix, world); + + // Create TiledRange + std::vector row_tiling(matrix.mt()+1), col_tiling(matrix.nt()+1); + + row_tiling[0] = 0; + for(auto i = 0; i < matrix.mt(); ++i) + row_tiling[i+1] = row_tiling[i] + matrix.tileMb(i); + + col_tiling[0] = 0; + for(auto i = 0; i < matrix.nt(); ++i) + col_tiling[i+1] = col_tiling[i] + matrix.tileNb(i); + + + std::vector ranges = { + TA::TiledRange1(row_tiling.begin(), row_tiling.end()), + TA::TiledRange1(col_tiling.begin(), col_tiling.end()) + }; + TA::TiledRange trange(ranges.begin(), ranges.end()); + + // Create TArray + Array array; + if constexpr (is_dense::value) { + array = detail::make_array_from_slate_dense(matrix, + trange, slate_pmap, world); + } else { + array = detail::make_array_from_slate_sparse(matrix, + trange, slate_pmap, world); + } + + world.gop.fence(); + return array; +} + +} // namespace TiledArray + +#endif // TILEDARRAY_HAS_SLATE +#endif // TILEDARRAY_CONVERSIONS_SLATE_H diff --git a/src/TiledArray/math/linalg/slate/cholesky.h b/src/TiledArray/math/linalg/slate/cholesky.h new file mode 100644 index 0000000000..6f1cb17b28 --- /dev/null +++ b/src/TiledArray/math/linalg/slate/cholesky.h @@ -0,0 +1,183 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * cholesky.h + * Created: 24 July, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +#include +namespace TiledArray::math::linalg::slate { + +/** + * @brief Compute the Cholesky factorization of a HPD rank-2 tensor + * + * A(i,j) = L(i,k) * conj(L(j,k)) + * + * Example Usage: + * + * auto L = cholesky(A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be factorized. Must be rank-2 + * + * @returns The lower triangular Cholesky factor L in TA format + */ +template +auto cholesky(const Array& A) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + // Convert to SLATE + auto matrix = array_to_slate(A); + + // Perform POTRF + world.gop.fence(); // stage SLATE execution + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix); + ::slate::potrf(AH); + zero_triangle(::slate::Uplo::Upper, matrix); + world.gop.fence(); // stage SLATE execution + + // Convert L to TA and return + return slate_to_array(matrix, world); + +} + + + +template +auto cholesky_linv(const Array& A) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + auto matrix = array_to_slate(A); + + // Perform POTRF + world.gop.fence(); // stage SLATE execution + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix); + ::slate::potrf(AH); + zero_triangle(::slate::Uplo::Upper, matrix); + + // Copy L if needed + using matrix_type = std::decay_t; + std::shared_ptr L_ptr = nullptr; + if constexpr (Both) { + L_ptr = std::make_shared(slate_to_array(matrix,world)); + world.gop.fence(); // Make sure copy is done before inverting L + } + + // Perform TRTRI + ::slate::TriangularMatrix L_slate(::slate::Uplo::Lower, + ::slate::Diag::NonUnit, matrix); + ::slate::trtri(L_slate); + + // Convert Linv to TA + auto Linv = slate_to_array(matrix, world); + world.gop.fence(); // Make sure copy is done before return + + // Return Linv or L + Linv (in that order) + if constexpr (Both) { + return std::make_tuple( *L_ptr, Linv ); + } else { + return Linv; + } + +} + + + + +template +auto cholesky_solve(const AArray& A, const BArray& B) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + /* + if( world != B.world() ) { + TA_EXCEPTION("A and B must be distributed on same MADWorld context"); + } + */ + + // Convert to SLATE + auto A_slate = array_to_slate(A); + auto B_slate = array_to_slate(B); + + // Solve linear system + world.gop.fence(); // stage SLATE execution + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, A_slate); + ::slate::posv( AH, B_slate ); + + // Convert solution to TA + return slate_to_array(B_slate, world); + +} + + + +template +auto cholesky_lsolve(Op trans, const AArray& A, const BArray& B) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + /* + if( world != B.world() ) { + TA_EXCEPTION("A and B must be distributed on same MADWorld context"); + } + */ + + // Convert to SLATE + auto A_slate = array_to_slate(A); + auto B_slate = array_to_slate(B); + world.gop.fence(); // stage SLATE execution + + // Factorize A + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, A_slate); + ::slate::potrf(AH); + + // Solve linear system OP(L) * X = B + ::slate::TriangularMatrix L_slate(::slate::Uplo::Lower, + ::slate::Diag::NonUnit, A_slate); + if( trans == Op::Trans ) L_slate = ::slate::transpose(L_slate); + if( trans == Op::ConjTrans ) L_slate = ::slate::conj_transpose(L_slate); + ::slate::trsm( ::slate::Side::Left, 1.0, L_slate, B_slate ); + + // Zero out the upper triangle + zero_triangle(::slate::Uplo::Upper, A_slate); + + // Convert solution and L to TA + auto L = slate_to_array(A_slate, world); + auto X = slate_to_array(B_slate, world); + return std::make_tuple(L, X); + +} + +} // namespace TiledArray::math::linalg::slate + +#endif // TILEDARRAY_HAS_SLATE + +#endif // TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED diff --git a/src/TiledArray/math/linalg/slate/heig.h b/src/TiledArray/math/linalg/slate/heig.h new file mode 100644 index 0000000000..d0bbc483f0 --- /dev/null +++ b/src/TiledArray/math/linalg/slate/heig.h @@ -0,0 +1,115 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * cholesky.h + * Created: 24 July, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +#include +#include + +namespace TiledArray::math::linalg::slate { + +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + + + // Convert to SLATE + auto matrix = array_to_slate(A); + + // Allocate space for singular values + const auto M = matrix.m(); + const auto N = matrix.n(); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); + + std::vector<::blas::real_type> W(N); + + // Perform Eigenvalue Decomposition + world.gop.fence(); // stage SLATE execution + + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix); + auto Z = matrix.emptyLike(); Z.insertLocalTiles(); + ::slate::eig(AH, W, Z); + + + // Convert eigenvectors back to TA + auto Z_ta = slate_to_array(Z, world); + if(evec_trange.rank() != 0 and evec_trange != A.trange()) { + Z_ta = retile(Z_ta, evec_trange); + } + world.gop.fence(); // Maintain lifetimes of SLATE data + + return std::tuple(W, Z_ta); +} + + +template +auto heig(const ArrayA& A, const ArrayB& B) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + + // Convert to SLATE + auto matrix_A = array_to_slate(A); + auto matrix_B = array_to_slate(B); + + // Allocate space for singular values + const auto M = matrix_A.m(); + const auto N = matrix_A.n(); + if (M != N) TA_EXCEPTION("Matrix must be square for EVP"); + if (matrix_B.m() != matrix_B.n()) + TA_EXCEPTION("Metric must be square for EVP"); + if(matrix_B.m() != M) + TA_EXCEPTION("Matrix and Metric must be the same size"); + + std::vector<::blas::real_type> W(N); + + // Perform Eigenvalue Decomposition + world.gop.fence(); // stage SLATE execution + + ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix_A); + ::slate::HermitianMatrix BH(::slate::Uplo::Lower, matrix_B); + auto Z = matrix_A.emptyLike(); Z.insertLocalTiles(); + ::slate::eig(1, AH, BH, W, Z); // AX = BXE + + + // Convert eigenvectors back to TA + auto Z_ta = slate_to_array(Z, world); + world.gop.fence(); // Maintain lifetimes of SLATE data + + return std::tuple(W, Z_ta); +} + +} // namespace TiledArray::math::linalg::slate + +#endif // TILEDARRAY_HAS_SLATE + +#endif // TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED diff --git a/src/TiledArray/math/linalg/slate/lu.h b/src/TiledArray/math/linalg/slate/lu.h new file mode 100644 index 0000000000..0c3e675d0c --- /dev/null +++ b/src/TiledArray/math/linalg/slate/lu.h @@ -0,0 +1,91 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * cholesky.h + * Created: 24 July, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +#include +namespace TiledArray::math::linalg::slate { + +template +auto lu_solve(const ArrayA& A, const ArrayB& B) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + /* + if( world != B.world() ) { + TA_EXCEPTION("A and B must be distributed on same MADWorld context"); + } + */ + + // Convert to SLATE + world.gop.fence(); // stage SLATE execution + auto A_slate = array_to_slate(A); + auto B_slate = array_to_slate(B); + world.gop.fence(); // stage SLATE execution + + // Solve Linear System + ::slate::lu_solve( A_slate, B_slate ); + + // Convert solution to TA + auto X = slate_to_array(B_slate, world); + world.gop.fence(); // stage SLATE execution + + return X; +} + +template +auto lu_inv(const Array& A) { + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + + // Convert to SLATE + world.gop.fence(); // stage SLATE execution + auto A_slate = array_to_slate(A); + world.gop.fence(); // stage SLATE execution + + // Perform LU Factorization + ::slate::Pivots pivots; + ::slate::lu_factor(A_slate, pivots); + + // Invert from factors + ::slate::lu_inverse_using_factor(A_slate, pivots); + + // Convert inverse to TA + auto X = slate_to_array(A_slate, world); + world.gop.fence(); // stage SLATE execution + + return X; +} + +} // namespace TiledArray::math::linalg::scalapack + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_MATH_LINALG_SCALAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/math/linalg/slate/qr.h b/src/TiledArray/math/linalg/slate/qr.h new file mode 100644 index 0000000000..4e8096c3fc --- /dev/null +++ b/src/TiledArray/math/linalg/slate/qr.h @@ -0,0 +1,100 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * qr.h + * Created: 2 August, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +#include + +namespace TiledArray::math::linalg::slate { + +template +auto householder_qr( const Array& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange() ) { + + if(q_trange.rank() == 0) { + q_trange = V.trange(); + } + + if(r_trange.rank() == 0) { + auto col_tiling = V.trange().dim(1); + r_trange = TiledRange( {col_tiling, col_tiling} ); + } + + // SLATE does not yet have ORGQR/UNGQR + // https://github.com/icl-utk-edu/slate/issues/80 + + using element_type = typename std::remove_cv_t::element_type; + auto& world = V.world(); + + // Convert to SLATE + auto matrix = array_to_slate(V); + + // Perform GETRF + ::slate::TriangularFactors T; + ::slate::geqrf(matrix, T); + + // Form Q + auto Q = matrix.emptyLike(); Q.insertLocalTiles(); + ::slate::set(0.0, 1.0, Q); + ::slate::unmqr(::slate::Side::Left, ::slate::Op::NoTrans, matrix, T, Q); + + auto Q_ta = slate_to_array(Q, world); + + if constexpr (QOnly) { + return Q_ta; + } else { + SlateFunctors r_functors( r_trange, V.pmap() ); + const auto N = V.trange().dim(1).extent(); + auto comm = world.mpi.comm().Get_mpi_comm(); + auto R = r_functors.make_matrix<::slate::Matrix>(N,N,comm); + R.insertLocalTiles(); + ::slate::set(0.0, 0.0, R); + + // Triangular views of target operand matrices + ::slate::TriangularMatrix + R_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, R); + ::slate::TriangularMatrix + A_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, matrix); + + // Copy upper triangle of QR factors into R + ::slate::copy(A_tri, R_tri); + + // Convert to TA + auto R_ta = slate_to_array(R, world); + return std::tuple(Q_ta, R_ta); + } + +} + +} // namespace TiledArray::math::linalg::slate + +#endif // TILEDARRAY_HAS_SLATE + +#endif // TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED diff --git a/src/TiledArray/math/linalg/slate/svd.h b/src/TiledArray/math/linalg/slate/svd.h new file mode 100644 index 0000000000..652dc67e17 --- /dev/null +++ b/src/TiledArray/math/linalg/slate/svd.h @@ -0,0 +1,102 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * cholesky.h + * Created: 24 July, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +#include +#include + +namespace TiledArray::math::linalg::slate { + +template +auto svd(const Array& A, TA::TiledRange u_trange, TA::TiledRange vt_trange) { + + constexpr bool need_uv = (Vectors == SVD::AllVectors); + constexpr bool need_u = (Vectors == SVD::LeftVectors) or need_uv; + constexpr bool need_vt = (Vectors == SVD::RightVectors) or need_uv; + constexpr bool vals_only = not need_u and not need_vt; + + using element_type = typename std::remove_cv_t::element_type; + auto& world = A.world(); + auto comm = world.mpi.comm().Get_mpi_comm(); + + // Convert to SLATE + auto matrix = array_to_slate(A); + using slate_matrix_t = std::decay_t; + + // Allocate space for singular values + const auto M = matrix.m(); + const auto N = matrix.n(); + const auto SVD_SIZE = std::min(M,N); + std::vector<::blas::real_type> S(SVD_SIZE); + + world.gop.fence(); // stage SLATE execution + + // Generate functors + SlateFunctors u_functors(u_trange, A.pmap()); + SlateFunctors vt_functors(vt_trange, A.pmap()); + + slate_matrix_t U, VT; + + // Allocate if required + if(need_u) { + U = u_functors.make_matrix(M, SVD_SIZE, comm); + U.insertLocalTiles(); + } + if(need_vt) { + VT = vt_functors.make_matrix(SVD_SIZE, N, comm); + VT.insertLocalTiles(); + } + + // Do SVD + // If U/VT are default state, they will not be used + ::slate::svd(matrix, S, U, VT); + + Array U_ta, VT_ta; + if(need_u) { U_ta = slate_to_array(U, world); } + if(need_vt) { VT_ta = slate_to_array(VT, world); } + + if constexpr (need_uv) { + return std::tuple(S, U_ta, VT_ta); + } else if constexpr (need_u) { + return std::tuple(S, U_ta); + } else if constexpr (need_vt) { + return std::tuple(S, VT_ta); + } else { + return S; + } + +} + +} // namespace TiledArray::math::linalg::slate + +#endif // TILEDARRAY_HAS_SLATE + +#endif // TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED diff --git a/src/TiledArray/math/linalg/slate/util.h b/src/TiledArray/math/linalg/slate/util.h new file mode 100644 index 0000000000..ca65fe3dce --- /dev/null +++ b/src/TiledArray/math/linalg/slate/util.h @@ -0,0 +1,91 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Applied Mathematics and Computational Research Division, + * Lawrence Berkeley National Laboratory + * + * util.h + * Created: 25 July, 2023 + * + */ +#ifndef TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SLATE + +#include +namespace TiledArray::math::linalg::slate { + +template +void zero_triangle(::slate::Uplo tri, SlateMatrixType& A, bool zero_diag = false ) { + + const auto nt = A.nt(); // Number of column tiles + const auto mt = A.mt(); // Number of row tiles + + auto zero_block = [&](auto it, auto jt) { + if( A.tileIsLocal(it,jt) ) { + auto tile = A(it,jt); + const auto stride = tile.stride(); + const auto mb = tile.mb(); + const auto nb = tile.nb(); + auto* data = tile.data(); + for(int j = 0; j < nb; ++j) + for(int i = 0; i < mb; ++i) { + data[i + j*stride] = 0.0; + } + } + }; + + auto zero_tri = [&](auto it, auto jt) { + if( A.tileIsLocal(it,jt) ) { + auto tile = A(it,jt); + const auto stride = tile.stride(); + const auto mb = tile.mb(); + const auto nb = tile.nb(); + auto* data = tile.data(); + if( tri == ::slate::Uplo::Lower ) { + for(int j = 0; j < nb; ++j) + for(int i = j+1; i < mb; ++i) { + data[i + j*stride] = 0.0; + } + } else { + for(int j = 0; j < nb; ++j) + for(int i = 0; i < j; ++i) { + data[i + j*stride] = 0.0; + } + } + } + }; + + // TODO: Should be done in parallel + for(auto jt = 0; jt < nt; ++jt) { + zero_tri(jt, jt); // Handle diagonal block + for(auto it = jt + 1; it < mt; ++it) { + if( tri == ::slate::Uplo::Lower ) zero_block(it,jt); + else zero_block(jt,it); + } + } + +} + +} // namespace TiledArray::math::linalg::slate + +#endif // TILEDARRAY_HAS_SLATE + +#endif // TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED diff --git a/src/TiledArray/pmap/cyclic_pmap.h b/src/TiledArray/pmap/cyclic_pmap.h index 6d2df0088b..43357e8f30 100644 --- a/src/TiledArray/pmap/cyclic_pmap.h +++ b/src/TiledArray/pmap/cyclic_pmap.h @@ -31,6 +31,11 @@ namespace TiledArray { namespace detail { +enum class CyclicPmapOrder { + RowMajor, + ColMajor +}; + /// Maps cyclically a sequence of indices onto a 2-d matrix of processes /// Consider a sequence of indices \f$ \{ k | k \in [0,N) \} \f$, @@ -45,6 +50,7 @@ namespace detail { /// col} \} \f$ /// /// \note This class is used to map tile indices to processes. +template class CyclicPmap : public Pmap { protected: // Import Pmap protected variables @@ -63,7 +69,12 @@ class CyclicPmap : public Pmap { size_type local_cols_ = 0; ///< The number of columns that belong to this rank + inline size_type coordinate_to_index(size_type i, size_type j) const noexcept { + if constexpr (is_row_major_v) return i*cols_ + j; + else return i + j*rows_; + } public: + static constexpr bool is_row_major_v = (Order == CyclicPmapOrder::RowMajor); typedef Pmap::size_type size_type; ///< Size type /// Construct process map @@ -96,8 +107,13 @@ class CyclicPmap : public Pmap { // Compute local size_, if have any if (rank_ < (proc_rows_ * proc_cols_)) { // Compute rank coordinates - rank_row_ = rank_ / proc_cols_; - rank_col_ = rank_ % proc_cols_; + if constexpr (is_row_major_v) { + rank_row_ = rank_ / proc_cols_; + rank_col_ = rank_ % proc_cols_; + } else { + rank_row_ = rank_ % proc_rows_; + rank_col_ = rank_ / proc_rows_; + } local_rows_ = (rows_ / proc_rows_) + ((rows_ % proc_rows_) > rank_row_ ? 1ul : 0ul); @@ -127,13 +143,14 @@ class CyclicPmap : public Pmap { virtual size_type owner(const size_type tile) const { TA_ASSERT(tile < size_); // Compute tile coordinate in tile grid - const size_type tile_row = tile / cols_; - const size_type tile_col = tile % cols_; + const size_type tile_row = is_row_major_v ? tile / cols_ : tile % rows_; + const size_type tile_col = is_row_major_v ? tile % cols_ : tile / rows_; // Compute process coordinate of tile in the process grid const size_type proc_row = tile_row % proc_rows_; const size_type proc_col = tile_col % proc_cols_; // Compute the process that owns tile - const size_type proc = proc_row * proc_cols_ + proc_col; + const size_type proc = is_row_major_v ? + proc_row * proc_cols_ + proc_col : proc_row + proc_col * proc_rows_; TA_ASSERT(proc < procs_); @@ -149,48 +166,59 @@ class CyclicPmap : public Pmap { } private: +#if 0 virtual void advance(size_type& value, bool increment) const { - if (increment) { - auto row = value / cols_; - const auto row_end = (row + 1) * cols_; - value += proc_cols_; - if (value >= row_end) { // if past the end of row ... - row += proc_rows_; - if (row < rows_) { // still have tiles - value = row * cols_ + rank_col_; // first tile in this row - } else // done - value = size_; - } - } else { // decrement - auto row = value / cols_; - const auto row_begin = row * cols_; - if (value < proc_cols_) { // protect against unsigned wraparound - return; - } - value -= proc_cols_; - if (value < row_begin) { // if past the beginning of row ... - if (row < proc_rows_) // protect against unsigned wraparound + if constexpr (is_row_major_v) { + if (increment) { + auto row = value / cols_; + const auto row_end = (row + 1) * cols_; + value += proc_cols_; + if (value >= row_end) { // if past the end of row ... + row += proc_rows_; + if (row < rows_) { // still have tiles + value = coordinate_to_index(row, rank_col_); // first tile in this row + } else // done + value = size_; + } + } else { // decrement + auto row = value / cols_; + const auto row_begin = row * cols_; + if (value < proc_cols_) { // protect against unsigned wraparound return; - row -= proc_rows_; - value = row * cols_ + rank_col_ + - (local_cols_ - 1) * proc_cols_; // last tile in this row + } + value -= proc_cols_; + if (value < row_begin) { // if past the beginning of row ... + if (row < proc_rows_) // protect against unsigned wraparound + return; + row -= proc_rows_; + value = coordinate_to_index(row, rank_col_) + + (local_cols_ - 1) * proc_cols_; // last tile in this row + } } + } else { + Pmap::advance(value, increment); } } +#endif public: + +#if 0 virtual const_iterator begin() const { + const auto proc_index = coordinate_to_index(rank_row_, rank_col_); return this->local_size_ > 0 - ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_, - rank_row_ * cols_ + rank_col_, false, true) + ? Iterator(*this, proc_index, this->size_, proc_index, + false, is_row_major_v) : end(); // make end() if empty } virtual const_iterator end() const { + const auto proc_index = coordinate_to_index(rank_row_, rank_col_); return this->local_size_ > 0 - ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_, - this->size_, false, true) - : Iterator(*this, 0, this->size_, this->size_, false, true); + ? Iterator(*this, proc_index, this->size_, + this->size_, false, is_row_major_v) + : Iterator(*this, 0, this->size_, this->size_, false, is_row_major_v); } +#endif }; // class CyclicPmap diff --git a/src/TiledArray/pmap/pmap.h b/src/TiledArray/pmap/pmap.h index 0682901bab..528321f3f9 100644 --- a/src/TiledArray/pmap/pmap.h +++ b/src/TiledArray/pmap/pmap.h @@ -150,7 +150,7 @@ class Pmap { /// \name Iteration /// @{ - private: + protected: /// customizes how to iterate over local indices /// overload this and construct Iterator with `use_pmap_advance=true` diff --git a/src/TiledArray/pmap/user_pmap.h b/src/TiledArray/pmap/user_pmap.h index 50966f5744..0a74b660d7 100644 --- a/src/TiledArray/pmap/user_pmap.h +++ b/src/TiledArray/pmap/user_pmap.h @@ -87,12 +87,12 @@ class UserPmap : public Pmap { virtual bool known_local_size() const { return known_local_size_; } - virtual const_iterator begin() const { - return Iterator(*this, 0, this->size_, 0, false); - } - virtual const_iterator end() const { - return Iterator(*this, 0, this->size_, this->size_, false); - } + //virtual const_iterator begin() const { + // return Iterator(*this, 0, this->size_, 0, false); + //} + //virtual const_iterator end() const { + // return Iterator(*this, 0, this->size_, this->size_, false); + //} private: bool known_local_size_ = false; diff --git a/src/TiledArray/proc_grid.h b/src/TiledArray/proc_grid.h index a401e0ac1e..4594553700 100644 --- a/src/TiledArray/proc_grid.h +++ b/src/TiledArray/proc_grid.h @@ -515,7 +515,7 @@ class ProcGrid { std::shared_ptr make_pmap() const { TA_ASSERT(world_); - return std::make_shared(*world_, rows_, cols_, proc_rows_, + return std::make_shared>(*world_, rows_, cols_, proc_rows_, proc_cols_); } @@ -528,7 +528,7 @@ class ProcGrid { std::shared_ptr make_col_phase_pmap(const size_type rows) const { TA_ASSERT(world_); - return std::make_shared(*world_, rows, cols_, proc_rows_, + return std::make_shared>(*world_, rows, cols_, proc_rows_, proc_cols_); } @@ -541,7 +541,7 @@ class ProcGrid { std::shared_ptr make_row_phase_pmap(const size_type cols) const { TA_ASSERT(world_); - return std::make_shared(*world_, rows_, cols, proc_rows_, + return std::make_shared>(*world_, rows_, cols, proc_rows_, proc_cols_); } }; // class Grid diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 217c522018..6360082c6c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -98,9 +98,16 @@ set(ta_test_src_files ta_test.cpp # t_tot_tot_contract_.cpp # tot_tot_tot_contract_.cpp einsum.cpp - linalg.cpp + linalg/linalg.cpp + linalg/non_dist.cpp cp.cpp ) +if(ENABLE_SCALAPACK) + list(APPEND ta_test_src_files linalg/scalapack.cpp) +endif() +if(ENABLE_SLATE) + list(APPEND ta_test_src_files linalg/slate.cpp) +endif() if(CUDA_FOUND) list(APPEND ta_test_src_files librett.cpp expressions_cuda_um.cpp tensor_um.cpp) diff --git a/tests/cyclic_pmap.cpp b/tests/cyclic_pmap.cpp index 509b9f92bf..aa387150a2 100644 --- a/tests/cyclic_pmap.cpp +++ b/tests/cyclic_pmap.cpp @@ -32,7 +32,21 @@ struct CyclicPmapFixture { BOOST_FIXTURE_TEST_SUITE(cyclic_pmap_suite, CyclicPmapFixture) -BOOST_AUTO_TEST_CASE(constructor) { +template +struct cyclic_pmap_order_wrapper { + static constexpr auto value = Order; +}; + + +using cyclic_pmap_orders = boost::mpl::list< + cyclic_pmap_order_wrapper, + cyclic_pmap_order_wrapper +>; + +BOOST_AUTO_TEST_CASE_TEMPLATE(constructor, Order, cyclic_pmap_orders) { + + using pmap_type = TiledArray::detail::CyclicPmap; + for (ProcessID x = 1ul; x <= GlobalFixture::world->size(); ++x) { for (ProcessID y = 1ul; y <= GlobalFixture::world->size(); ++y) { // Compute the limits for process rows @@ -48,10 +62,9 @@ BOOST_AUTO_TEST_CASE(constructor) { max_proc_rows)); const std::size_t p_cols = GlobalFixture::world->size() / p_rows; - BOOST_REQUIRE_NO_THROW(TiledArray::detail::CyclicPmap pmap( - *GlobalFixture::world, x, y, p_rows, p_cols)); - TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows, - p_cols); + BOOST_REQUIRE_NO_THROW( pmap_type pmap( *GlobalFixture::world, x, y, + p_rows, p_cols)); + pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols); BOOST_CHECK_EQUAL(pmap.rank(), GlobalFixture::world->rank()); BOOST_CHECK_EQUAL(pmap.procs(), GlobalFixture::world->size()); BOOST_CHECK_EQUAL(pmap.size(), x * y); @@ -60,32 +73,27 @@ BOOST_AUTO_TEST_CASE(constructor) { ProcessID size = GlobalFixture::world->size(); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, - 0ul, 10ul, 1, 1), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 0ul, 10ul, 1, 1), TiledArray::Exception); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, - 10ul, 0ul, 1, 1), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 0ul, 1, 1), TiledArray::Exception); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, - 10ul, 10ul, 0, 1), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 0, 1), TiledArray::Exception); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, - 10ul, 10ul, 1, 0), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, 0), TiledArray::Exception); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap( - *GlobalFixture::world, 10ul, 10ul, size * 2, 1), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size * 2, 1), TiledArray::Exception); - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap( - *GlobalFixture::world, 10ul, 10ul, 1, size * 2), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, size * 2), TiledArray::Exception); if (size > 1) { - BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap( - *GlobalFixture::world, 10ul, 10ul, size, size), + BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size, size), TiledArray::Exception); } } -BOOST_AUTO_TEST_CASE(owner) { +BOOST_AUTO_TEST_CASE_TEMPLATE(owner, Order, cyclic_pmap_orders) { + + using pmap_type = TiledArray::detail::CyclicPmap; const std::size_t rank = GlobalFixture::world->rank(); const std::size_t size = GlobalFixture::world->size(); @@ -108,8 +116,7 @@ BOOST_AUTO_TEST_CASE(owner) { const std::size_t p_cols = GlobalFixture::world->size() / p_rows; const std::size_t tiles = x * y; - TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows, - p_cols); + pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols); for (std::size_t tile = 0; tile < tiles; ++tile) { std::fill_n(p_owner, size, 0); @@ -121,6 +128,18 @@ BOOST_AUTO_TEST_CASE(owner) { // Make sure everyone agrees on who owns what. for (std::size_t p = 0ul; p < size; ++p) BOOST_CHECK_EQUAL(p_owner[p], p_owner[rank]); + + size_t true_owner; + if(Order::value == TiledArray::detail::CyclicPmapOrder::RowMajor) { + auto proc_row = (tile / pmap.ncols()) % pmap.nrows_proc(); + auto proc_col = (tile % pmap.ncols()) % pmap.ncols_proc(); + true_owner = proc_row * pmap.ncols_proc() + proc_col; + } else { + auto proc_row = (tile % pmap.nrows()) % pmap.nrows_proc(); + auto proc_col = (tile / pmap.nrows()) % pmap.ncols_proc(); + true_owner = proc_row + proc_col * pmap.nrows_proc(); + } + BOOST_CHECK_EQUAL(p_owner[rank], true_owner); } } } @@ -128,7 +147,8 @@ BOOST_AUTO_TEST_CASE(owner) { delete[] p_owner; } -BOOST_AUTO_TEST_CASE(local_size) { +BOOST_AUTO_TEST_CASE_TEMPLATE(local_size, Order, cyclic_pmap_orders) { + using pmap_type = TiledArray::detail::CyclicPmap; for (std::size_t x = 1ul; x < 10ul; ++x) { for (std::size_t y = 1ul; y < 10ul; ++y) { // Compute the limits for process rows @@ -145,8 +165,7 @@ BOOST_AUTO_TEST_CASE(local_size) { const std::size_t p_cols = GlobalFixture::world->size() / p_rows; const std::size_t tiles = x * y; - TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows, - p_cols); + pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols); std::size_t total_size = pmap.local_size(); GlobalFixture::world->gop.sum(total_size); @@ -159,7 +178,8 @@ BOOST_AUTO_TEST_CASE(local_size) { } } -BOOST_AUTO_TEST_CASE(local_group) { +BOOST_AUTO_TEST_CASE_TEMPLATE(local_group, Order, cyclic_pmap_orders) { + using pmap_type = TiledArray::detail::CyclicPmap; ProcessID tile_owners[100]; for (std::size_t x = 1ul; x < 10ul; ++x) { @@ -178,18 +198,15 @@ BOOST_AUTO_TEST_CASE(local_group) { const std::size_t p_cols = GlobalFixture::world->size() / p_rows; const std::size_t tiles = x * y; - TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows, - p_cols); + pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols); // Check that all local elements map to this rank - for (detail::CyclicPmap::const_iterator it = pmap.begin(); - it != pmap.end(); ++it) { + for (auto it = pmap.begin(); it != pmap.end(); ++it) { BOOST_CHECK_EQUAL(pmap.owner(*it), GlobalFixture::world->rank()); } std::fill_n(tile_owners, tiles, 0); - for (detail::CyclicPmap::const_iterator it = pmap.begin(); - it != pmap.end(); ++it) { + for (auto it = pmap.begin(); it != pmap.end(); ++it) { tile_owners[*it] += GlobalFixture::world->rank(); } diff --git a/tests/linalg.cpp b/tests/linalg.cpp deleted file mode 100644 index 5c84d0b5e4..0000000000 --- a/tests/linalg.cpp +++ /dev/null @@ -1,1079 +0,0 @@ -#include -#include -#include "TiledArray/config.h" -//#include "range_fixture.h" -#include "unit_test_config.h" - -#include "TiledArray/math/linalg/non-distributed/cholesky.h" -#include "TiledArray/math/linalg/non-distributed/heig.h" -#include "TiledArray/math/linalg/non-distributed/lu.h" -#include "TiledArray/math/linalg/non-distributed/svd.h" - -#include "TiledArray/math/linalg/cholesky.h" -#include "TiledArray/math/linalg/heig.h" -#include "TiledArray/math/linalg/lu.h" -#include "TiledArray/math/linalg/svd.h" - -namespace TA = TiledArray; -namespace non_dist = TA::math::linalg::non_distributed; - -#if TILEDARRAY_HAS_SCALAPACK -namespace scalapack = TA::math::linalg::scalapack; -#include "TiledArray/math/linalg/scalapack/all.h" -#define TILEDARRAY_SCALAPACK_TEST(F, E) \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray::scalapack", non_dist::F, scalapack::F, E); \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray", non_dist::F, TiledArray::F, E); -#else -#define TILEDARRAY_SCALAPACK_TEST(...) -#endif - -#if TILEDARRAY_HAS_TTG -#include "TiledArray/math/linalg/ttg/cholesky.h" -#define TILEDARRAY_TTG_TEST(F, E) \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E); -#else -#define TILEDARRAY_TTG_TEST(...) -#endif - -struct ReferenceFixture { - size_t N; - std::vector htoeplitz_vector; - std::vector exact_evals; - - inline double matrix_element_generator(int64_t i, int64_t j) { -#if 0 - // Generates a Hankel matrix: absurd condition number - return i+j; -#else - // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i - j)]; -#endif - } - - template - inline double make_ta_reference(Tile& t, TA::Range const& range) { - t = Tile(range, 0.0); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) { - for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m, n); - } - } - - return norm(t); - }; - - ReferenceFixture(int64_t N = 1000) - : N(N), htoeplitz_vector(N), exact_evals(N) { - // Generate an hermitian Circulant vector - std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); - htoeplitz_vector[0] = 100; - std::default_random_engine gen(0); - std::uniform_real_distribution<> dist(0., 1.); - for (int64_t i = 1; i <= (N / 2); ++i) { - double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N - i] = val; - } - - // Compute exact eigenvalues - const double ff = 2. * M_PI / N; - for (int64_t j = 0; j < N; ++j) { - double val = htoeplitz_vector[0]; - ; - for (int64_t k = 1; k < N; ++k) - val += htoeplitz_vector[N - k] * std::cos(ff * j * k); - exact_evals[j] = val; - } - - std::sort(exact_evals.begin(), exact_evals.end()); - } -}; - -struct LinearAlgebraFixture : ReferenceFixture { -#if TILEDARRAY_HAS_SCALAPACK - - blacspp::Grid grid; - scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? - - LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) - : ReferenceFixture(N), - grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { - for (size_t i = 0; i < N; ++i) { - for (size_t j = 0; j < N; ++j) { - if (ref_matrix.dist().i_own(i, j)) { - auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); - ref_matrix.local_mat()(i_local, j_local) = - matrix_element_generator(i, j); - } - } - } - } -#endif - - template - static void compare(const char* context, const A& non_dist, const A& result, - double e) { - // clang-format off - BOOST_TEST_CONTEXT(context) - ; - // clang-format on - auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get(); - BOOST_CHECK_SMALL(diff_with_non_dist, e); - } - - template - static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f, - std::integer_sequence) { - auto l = {(f(std::get(t1), std::get(t2)), 0)...}; - } - - template - static void for_each_pair_of_tuples(std::tuple const& t1, - std::tuple const& t2, F f) { - for_each_pair_of_tuples_impl( - t1, t2, f, std::make_integer_sequence()); - } - - template - static void compare(const char* context, const std::tuple& non_dist, - const std::tuple& result, double e) { - for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) { - compare(context, arg1, arg2, e); - }); - } -}; - -TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { - TA_ASSERT(TA_NBs.size() > 0); - - std::default_random_engine gen(0); - std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1); - auto rand_indx = [&]() { return dist(gen); }; - auto rand_nb = [&]() { return TA_NBs[rand_indx()]; }; - - std::vector t_boundaries = {0}; - auto TA_NB = rand_nb(); - while (t_boundaries.back() + TA_NB < N) { - t_boundaries.emplace_back(t_boundaries.back() + TA_NB); - TA_NB = rand_nb(); - } - t_boundaries.emplace_back(N); - - std::vector ranges( - 2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end())); - - return TA::TiledRange(ranges.begin(), ranges.end()); -}; - -BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) - -#if TILEDARRAY_HAS_SCALAPACK - -BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB)}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_ta = - scalapack::block_cyclic_to_array>(ref_matrix, trange); - GlobalFixture::world->gop.fence(); - - auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB / 2)}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_ta = - scalapack::block_cyclic_to_array>(ref_matrix, trange); - GlobalFixture::world->gop.fence(); - - auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB)}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); - GlobalFixture::world->gop.fence(); - - double local_norm_diff = - (test_matrix.local_mat() - ref_matrix.local_mat()).norm(); - local_norm_diff *= local_norm_diff; - - double norm_diff; - MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD); - - norm_diff = std::sqrt(norm_diff); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - [[maybe_unused]] auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_ta = - scalapack::block_cyclic_to_array>(ref_matrix, trange); - GlobalFixture::world->gop.fence(); - - auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); - GlobalFixture::world->gop.fence(); - - double local_norm_diff = - (test_matrix.local_mat() - ref_matrix.local_mat()).norm(); - local_norm_diff *= local_norm_diff; - - double norm_diff; - MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD); - - norm_diff = std::sqrt(norm_diff); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB)}); - - // test with TA and btas tile - using typelist_t = - std::tuple, btas::Tensor>; - typelist_t typevals; - - auto test = [&](const auto& typeval_ref) { - using Tile = std::decay_t; - using Array = TA::DistArray; - - auto ref_ta = TA::make_array( - *GlobalFixture::world, trange, - [this](Tile& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array(ref_matrix, trange); - GlobalFixture::world->gop.fence(); - - auto norm_diff = - (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); - }; - - test(std::get<0>(typevals)); - test(std::get<1>(typevals)); -}; - -BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB)}); - - // test with TA and btas tile - using typelist_t = - std::tuple, btas::Tensor>; - typelist_t typevals; - - auto test = [&](const auto& typeval_ref) { - using Tile = std::decay_t; - using Array = TA::DistArray; - - auto ref_ta = TA::make_array( - *GlobalFixture::world, trange, - [this](Tile& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); - GlobalFixture::world->gop.fence(); - - double local_norm_diff = - (test_matrix.local_mat() - ref_matrix.local_mat()).norm(); - local_norm_diff *= local_norm_diff; - - double norm_diff; - MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD); - - norm_diff = std::sqrt(norm_diff); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); - }; - - test(std::get<0>(typevals)); - test(std::get<1>(typevals)); -}; - -BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { - GlobalFixture::world->gop.fence(); - - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); - - auto NB = ref_matrix.dist().nb(); - - auto trange = gen_trange(N, {static_cast(NB)}); - - const TA::TArray ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - GlobalFixture::world->gop.fence(); - auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); - GlobalFixture::world->gop.fence(); - - double local_norm_diff = - (test_matrix.local_mat() - ref_matrix.local_mat()).norm(); - local_norm_diff *= local_norm_diff; - - double norm_diff; - MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD); - - norm_diff = std::sqrt(norm_diff); - - BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -}; - -#endif // TILEDARRAY_HAS_SCALAPACK - -BOOST_AUTO_TEST_CASE(heig_same_tiling) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - const auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto [evals, evecs] = non_dist::heig(ref_ta); - auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta); - // auto evals = heig( ref_ta ); - - BOOST_CHECK(evecs.trange() == ref_ta.trange()); - - // check eigenvectors against non_dist only, for now ... - decltype(evecs) evecs_error; - evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j"); - // TODO need to fix phases of the eigenvectors to be able to compare ... - // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), - // N * N * std::numeric_limits::epsilon()); - - // Check eigenvalue correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) { - BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); - BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol); - } - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(heig_diff_tiling) { - GlobalFixture::world->gop.fence(); - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = non_dist::heig(ref_ta, new_trange); - auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta, new_trange); - - BOOST_CHECK(evecs.trange() == new_trange); - - // check eigenvectors against non_dist only, for now ... - decltype(evecs) evecs_error; - evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j"); - // TODO need to fix phases of the eigenvectors to be able to compare ... - // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(), - // N * N * std::numeric_limits::epsilon()); - - // Check eigenvalue correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) { - BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); - BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol); - } - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(heig_generalized) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto dense_iden = TA::make_array>( - *GlobalFixture::world, trange, - [](TA::Tensor& t, TA::Range const& range) -> double { - t = TA::Tensor(range, 0.0); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) t(m, n) = 1.; - - return t.norm(); - }); - - GlobalFixture::world->gop.fence(); - auto [evals, evecs] = non_dist::heig(ref_ta, dense_iden); - // auto evals = heig( ref_ta ); - - BOOST_CHECK(evecs.trange() == ref_ta.trange()); - - // TODO: Check validity of eigenvectors, not crucial for the time being - - // Check eigenvalue correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) - BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(cholesky) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto A = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto L = non_dist::cholesky(A); - - BOOST_CHECK(L.trange() == A.trange()); - - decltype(A) A_minus_LLt; - A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj(); - - const double epsilon = N * N * std::numeric_limits::epsilon(); - - BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon); - - // check against NON_DIST also - auto L_ref = non_dist::cholesky(A); - decltype(L) L_diff; - L_diff("i,j") = L("i,j") - L_ref("i,j"); - - BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), epsilon); - - TILEDARRAY_SCALAPACK_TEST(cholesky(A), epsilon); - - TILEDARRAY_TTG_TEST(cholesky(A), epsilon); -} - -BOOST_AUTO_TEST_CASE(cholesky_linv) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto A = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - decltype(A) Acopy = A.clone(); - - auto Linv = TA::cholesky_linv(A); - - BOOST_CHECK(Linv.trange() == A.trange()); - - TA::TArray tmp(*GlobalFixture::world, trange); - tmp("i,j") = Linv("i,k") * A("k,j"); - A("i,j") = tmp("i,k") * Linv("j,k"); - - TA::foreach_inplace(A, [](TA::Tensor& tile) { - auto range = tile.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) { - tile(m, n) -= 1.; - } - }); - - double epsilon = N * N * std::numeric_limits::epsilon(); - double norm = A("i,j").norm().get(); - - BOOST_CHECK_SMALL(norm, epsilon); - - TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); - - TILEDARRAY_TTG_TEST(cholesky_linv(Acopy), epsilon); -} - -BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto A = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto [L, Linv] = TA::cholesky_linv(A); - - BOOST_CHECK(Linv.trange() == A.trange()); - BOOST_CHECK(L.trange() == A.trange()); - - TA::TArray tmp(*GlobalFixture::world, trange); - tmp("i,j") = Linv("i,k") * L("k,j"); - - TA::foreach_inplace(tmp, [](TA::Tensor& tile) { - auto range = tile.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) { - tile(m, n) -= 1.; - } - }); - - double epsilon = N * N * std::numeric_limits::epsilon(); - double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm, epsilon); - - TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); - - TILEDARRAY_TTG_TEST(cholesky_linv(A), epsilon); -} - -BOOST_AUTO_TEST_CASE(cholesky_solve) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto A = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto iden = non_dist::cholesky_solve(A, A); - BOOST_CHECK(iden.trange() == A.trange()); - - auto iden_non_dist = non_dist::cholesky_solve(A, A); - decltype(iden) iden_error; - iden_error("i,j") = iden("i,j") - iden_non_dist("i,j"); - BOOST_CHECK_SMALL(iden_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - - TA::foreach_inplace(iden, [](TA::Tensor& tile) { - auto range = tile.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) { - tile(m, n) -= 1.; - } - }); - - double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(cholesky_lsolve) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto A = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - // Should produce X = L**H - auto [L, X] = non_dist::cholesky_lsolve(TA::NoTranspose, A, A); - BOOST_CHECK(X.trange() == A.trange()); - BOOST_CHECK(L.trange() == A.trange()); - - // first, test against NON_DIST - auto [L_non_dist, X_non_dist] = - non_dist::cholesky_lsolve(TA::NoTranspose, A, A); - decltype(L) L_error; - L_error("i,j") = L("i,j") - L_non_dist("i,j"); - BOOST_CHECK_SMALL(L_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - decltype(X) X_error; - X_error("i,j") = X("i,j") - X_non_dist("i,j"); - BOOST_CHECK_SMALL(X_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - - X("i,j") -= L("j,i"); - - double norm = X("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(lu_solve) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto iden = non_dist::lu_solve(ref_ta, ref_ta); - - BOOST_CHECK(iden.trange() == ref_ta.trange()); - - TA::foreach_inplace(iden, [](TA::Tensor& tile) { - auto range = tile.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) { - tile(m, n) -= 1.; - } - }); - - double epsilon = N * N * std::numeric_limits::epsilon(); - double norm = iden("i,j").norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm, epsilon); - TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(lu_inv) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - TA::TArray iden(*GlobalFixture::world, trange); - - auto Ainv = non_dist::lu_inv(ref_ta); - iden("i,j") = Ainv("i,k") * ref_ta("k,j"); - - BOOST_CHECK(iden.trange() == ref_ta.trange()); - - TA::foreach_inplace(iden, [](TA::Tensor& tile) { - auto range = tile.range(); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) - for (auto n = lo[1]; n < up[1]; ++n) - if (m == n) { - tile(m, n) -= 1.; - } - }); - - double epsilon = N * N * std::numeric_limits::epsilon(); - double norm = iden("i,j").norm(*GlobalFixture::world).get(); - - BOOST_CHECK_SMALL(norm, epsilon); - TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); - - GlobalFixture::world->gop.fence(); -} - -#if 1 -BOOST_AUTO_TEST_CASE(svd_values_only) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto S = non_dist::svd(ref_ta, trange, trange); - - std::vector exact_singular_values = exact_evals; - std::sort(exact_singular_values.begin(), exact_singular_values.end(), - std::greater()); - - // Check singular value correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) - BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(svd_leftvectors) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto [S, U] = non_dist::svd(ref_ta, trange, trange); - - std::vector exact_singular_values = exact_evals; - std::sort(exact_singular_values.begin(), exact_singular_values.end(), - std::greater()); - - // Check singular value correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) - BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(svd_rightvectors) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto [S, VT] = non_dist::svd(ref_ta, trange, trange); - - std::vector exact_singular_values = exact_evals; - std::sort(exact_singular_values.begin(), exact_singular_values.end(), - std::greater()); - - // Check singular value correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) - BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(svd_allvectors) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - auto [S, U, VT] = non_dist::svd(ref_ta, trange, trange); - - std::vector exact_singular_values = exact_evals; - std::sort(exact_singular_values.begin(), exact_singular_values.end(), - std::greater()); - - // Check singular value correctness - double tol = N * N * std::numeric_limits::epsilon(); - for (int64_t i = 0; i < N; ++i) - BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol); - GlobalFixture::world->gop.fence(); -} -#endif - -template -void householder_qr_q_only_test(const ArrayT& A, double tol) { - using value_type = typename ArrayT::element_type; - -#if TILEDARRAY_HAS_SCALAPACK - auto Q = use_scalapack ? scalapack::householder_qr(A) - : non_dist::householder_qr(A); -#else - static_assert(not use_scalapack); - auto Q = non_dist::householder_qr(A); -#endif - - // Make sure the Q is orthogonal at least - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); - Iden.make_replicated(); - auto I_eig = TA::array_to_eigen(Iden); - const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); -} - -template -void householder_qr_test(const ArrayT& A, double tol) { -#if TILEDARRAY_HAS_SCALAPACK - auto [Q, R] = use_scalapack ? scalapack::householder_qr(A) - : non_dist::householder_qr(A); -#else - static_assert(not use_scalapack); - auto [Q, R] = non_dist::householder_qr(A); -#endif - - // Check reconstruction error - TA::TArray QR_ERROR; - QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); - BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol); - - // Check orthonormality of Q - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); - Iden.make_replicated(); - auto I_eig = TA::array_to_eigen(Iden); - const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); -} - -BOOST_AUTO_TEST_CASE(householder_qr_q_only) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - double tol = N * N * std::numeric_limits::epsilon(); - householder_qr_q_only_test(ref_ta, tol); -#if TILEDARRAY_HAS_SCALAPACK - householder_qr_q_only_test(ref_ta, tol); -#endif - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(householder_qr) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - double tol = N * N * std::numeric_limits::epsilon(); - householder_qr_test(ref_ta, tol); -#if TILEDARRAY_HAS_SCALAPACK - householder_qr_test(ref_ta, tol); -#endif - - GlobalFixture::world->gop.fence(); -} - -template -void cholesky_qr_q_only_test(const ArrayT& A, double tol) { - using value_type = typename ArrayT::element_type; - - auto Q = TiledArray::math::linalg::cholesky_qr(A); - - // Make sure the Q is orthogonal at least - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); - Iden.make_replicated(); - auto I_eig = TA::array_to_eigen(Iden); - const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); -} - -template -void cholesky_qr_test(const ArrayT& A, double tol) { - auto [Q, R] = TiledArray::math::linalg::cholesky_qr(A); - - // Check reconstruction error - TA::TArray QR_ERROR; - QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); - BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol); - - // Check orthonormality of Q - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); - Iden.make_replicated(); - auto I_eig = TA::array_to_eigen(Iden); - const auto N = A.trange().dim(1).extent(); - BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); -} - -BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - double tol = N * N * std::numeric_limits::epsilon(); - cholesky_qr_q_only_test(ref_ta, tol); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_CASE(cholesky_qr) { - GlobalFixture::world->gop.fence(); - - auto trange = gen_trange(N, {128ul}); - - auto ref_ta = TA::make_array>( - *GlobalFixture::world, trange, - [this](TA::Tensor& t, TA::Range const& range) -> double { - return this->make_ta_reference(t, range); - }); - - double tol = N * N * std::numeric_limits::epsilon(); - cholesky_qr_test(ref_ta, tol); - - GlobalFixture::world->gop.fence(); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/linalg/cholesky_tests.h b/tests/linalg/cholesky_tests.h new file mode 100644 index 0000000000..7612a0a57a --- /dev/null +++ b/tests/linalg/cholesky_tests.h @@ -0,0 +1,138 @@ +#pragma once +#include "linalg_fixture.h" +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator +#include "misc_util.h" // Misc utilities + +// Cholesky (POTRF) Test +template +void ReferenceFixture::cholesky_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto L = Derived::cholesky(A); + + BOOST_CHECK(L.trange() == A.trange()); + + decltype(A) A_minus_LLt; + A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj(); + + const double epsilon = N * N * std::numeric_limits::epsilon(); + + BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon); + + world.gop.fence(); +} + + + +// Cholesky LINV (POTRF + TRTRI) Test +template +void ReferenceFixture::cholesky_linv_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto Linv = Derived::template cholesky_linv(A); + BOOST_CHECK(Linv.trange() == A.trange()); + + TA::TArray tmp(world, trange); + tmp("i,j") = Linv("i,k") * A("k,j"); + A("i,j") = tmp("i,k") * Linv("j,k"); + subtract_identity_inplace(A); // A -= I + + double epsilon = N * N * std::numeric_limits::epsilon(); + double norm = A("i,j").norm().get(); + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} + + + +// Cholesky LINV (POTRF + TRTRI) + L Return Test +template +void ReferenceFixture::cholesky_linv_retl_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto [L, Linv] = Derived::template cholesky_linv(A); + + BOOST_CHECK(Linv.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); + + TA::TArray tmp(world, trange); + tmp("i,j") = Linv("i,k") * L("k,j"); + subtract_identity_inplace(tmp); // tmp -= I + + double epsilon = N * N * std::numeric_limits::epsilon(); + double norm = tmp("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} + + + +// Cholesky Solve (POSV) Test +template +void ReferenceFixture::cholesky_solve_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto iden = Derived::cholesky_solve(A, A); + BOOST_CHECK(iden.trange() == A.trange()); + subtract_identity_inplace(iden); // iden -= I + + const auto epsilon = N * N * std::numeric_limits::epsilon(); + double norm = iden("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} + + + +// Cholesky L-Solve (POTRF + TRSM) Test +template +void ReferenceFixture::cholesky_lsolve_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + // Should produce X = L**H + auto [L, X] = Derived::cholesky_lsolve(TA::NoTranspose, A, A); + BOOST_CHECK(X.trange() == A.trange()); + BOOST_CHECK(L.trange() == A.trange()); + + X("i,j") -= L("j,i"); + + const auto epsilon = N * N * std::numeric_limits::epsilon(); + double norm = X("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} diff --git a/tests/linalg/compare_utilities.h b/tests/linalg/compare_utilities.h new file mode 100644 index 0000000000..d16f7d842f --- /dev/null +++ b/tests/linalg/compare_utilities.h @@ -0,0 +1,112 @@ +#pragma once +#include +#include "unit_test_config.h" + + +template +static void compare_replicated_vector(const char* context, const A& S_nd, const A& S, + double e) { + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on + + const size_t n = S.size(); + BOOST_REQUIRE_EQUAL(n, S_nd.size()); + for(size_t i = 0; i < n; ++i) { + BOOST_CHECK_SMALL(std::abs(S[i] - S_nd[i]), e); + } +} + +template +static void compare_subspace(const char* context, const A& non_dist, const A& result, + double e) { + + namespace TA = TiledArray; + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on + + auto nd_eigen = TA::array_to_eigen(non_dist); + auto rs_eigen = TA::array_to_eigen(result); + + Eigen::MatrixXd G; G = nd_eigen.adjoint() * rs_eigen; + Eigen::MatrixXd G2; G2 = G.adjoint() * G; // Accounts for phase-flips + const auto n = G.rows(); + auto G2_mI_nrm = (G2 - Eigen::MatrixXd::Identity(n,n)).norm(); + BOOST_CHECK_SMALL(G2_mI_nrm, e); +} + +template +static void compare_eig(const char* context, const A& non_dist, const A& result, + double e) { + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on + + auto [evals_nd, evecs_nd] = non_dist; + auto [evals, evecs ] = result; + + compare_replicated_vector(context, evals_nd, evals, e); + + // The test problem for the unit tests has a non-degenerate spectrum + // we only need to check for phase-flips in this check + evecs.make_replicated(); // Need to be replicated for Eigen conversion + evecs_nd.make_replicated(); + compare_subspace(context, evecs_nd, evecs, e); +} + +template +static void compare_svd(const char* context, const A& non_dist, const A& result, + double e) { + namespace TA = TiledArray; + + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on + + if constexpr (Vectors == TA::SVD::ValuesOnly) { + compare_replicated_vector(context, non_dist, result, e); + return; + } else { + const auto& S = std::get<0>(result); + const auto& S_nd = std::get<0>(non_dist); + compare_replicated_vector(context, S_nd, S, e); + } + +} +template +static void compare(const char* context, const A& non_dist, const A& result, + double e) { + // clang-format off + BOOST_TEST_CONTEXT(context) + ; + // clang-format on + auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get(); + BOOST_CHECK_SMALL(diff_with_non_dist, e); +} + +template +static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f, + std::integer_sequence) { + auto l = {(f(std::get(t1), std::get(t2)), 0)...}; +} + +template +static void for_each_pair_of_tuples(std::tuple const& t1, + std::tuple const& t2, F f) { + for_each_pair_of_tuples_impl( + t1, t2, f, std::make_integer_sequence()); +} + +template +static void compare(const char* context, const std::tuple& non_dist, + const std::tuple& result, double e) { + for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) { + compare(context, arg1, arg2, e); + }); +} + diff --git a/tests/linalg/gen_trange.h b/tests/linalg/gen_trange.h new file mode 100644 index 0000000000..41bda9e796 --- /dev/null +++ b/tests/linalg/gen_trange.h @@ -0,0 +1,25 @@ +#pragma once +#include +#include + +inline TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { + TA_ASSERT(TA_NBs.size() > 0); + + std::default_random_engine gen(0); + std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1); + auto rand_indx = [&]() { return dist(gen); }; + auto rand_nb = [&]() { return TA_NBs[rand_indx()]; }; + + std::vector t_boundaries = {0}; + auto TA_NB = rand_nb(); + while (t_boundaries.back() + TA_NB < N) { + t_boundaries.emplace_back(t_boundaries.back() + TA_NB); + TA_NB = rand_nb(); + } + t_boundaries.emplace_back(N); + + std::vector ranges( + 2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end())); + + return TA::TiledRange(ranges.begin(), ranges.end()); +}; diff --git a/tests/linalg/heig_tests.h b/tests/linalg/heig_tests.h new file mode 100644 index 0000000000..8a96f4d382 --- /dev/null +++ b/tests/linalg/heig_tests.h @@ -0,0 +1,108 @@ +#pragma once +#include "linalg_fixture.h" +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator +#include "misc_util.h" // Misc utilities + +// HEIG Test - INPUT/OUTPUT have the same tiling +template +void ReferenceFixture::heig_same_tiling_test(TA::World& world) { + world.gop.fence(); // Start epoch + + auto trange = gen_trange(N, {128ul}); + std::cout << "TRANGE = " << trange << std::endl; + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + // Solve EVP + auto [evals, evecs] = Derived::heig(A); + BOOST_CHECK(evecs.trange() == A.trange()); // Check for correct trange + + // Check eigenvalue correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol); + + // Check eigenvectors + TA::TArray tmp; + tmp("i,j") = A("i,k") * evecs("k,j"); + A("i,j") = evecs("k,i").conj() * tmp("k,j"); + subtract_diagonal_tensor_inplace(A, evals); + + const auto norm = A("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + world.gop.fence(); // End epoch +} + + + +// HEIG Test - INPUT/OUTPUT have different tilings +template +void ReferenceFixture::heig_diff_tiling_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + auto new_trange = gen_trange(N, {64ul} ); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(*GlobalFixture::world, trange); + auto A_new = generate_ta_reference(*GlobalFixture::world, new_trange); + + // Solve EVP + auto [evals, evecs] = Derived::heig(A, new_trange); + BOOST_CHECK(evecs.trange() == new_trange); + + // Check eigenvalue correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol); + + // Check eigenvectors + TA::TArray tmp; + tmp("i,j") = A_new("i,k") * evecs("k,j"); + A_new("i,j") = evecs("k,i").conj() * tmp("k,j"); + subtract_diagonal_tensor_inplace(A_new, evals); + + const auto norm = A_new("i,j").norm(*GlobalFixture::world).get(); + BOOST_CHECK_SMALL(norm, tol); + + GlobalFixture::world->gop.fence(); +} + + + +// Generalized HEIG Test +template +void ReferenceFixture::heig_generalized_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + // Generate Identity Tensor in TA + auto dense_iden = generate_ta_identity(world, trange); + + // Solve EVP + auto [evals, evecs] = Derived::heig(A, dense_iden); + BOOST_CHECK(evecs.trange() == A.trange()); + + // Check eigenvalue correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol); + + // Check eigenvectors + TA::TArray tmp; + tmp("i,j") = A("i,k") * evecs("k,j"); + A("i,j") = evecs("k,i").conj() * tmp("k,j"); + subtract_diagonal_tensor_inplace(A, evals); + + const auto norm = A("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + world.gop.fence(); +} diff --git a/tests/linalg/linalg.cpp b/tests/linalg/linalg.cpp new file mode 100644 index 0000000000..dbb379037e --- /dev/null +++ b/tests/linalg/linalg.cpp @@ -0,0 +1,147 @@ +#include +#include +#include "TiledArray/config.h" +//#include "range_fixture.h" +#include "unit_test_config.h" + +#include "linalg_fixture.h" // ReferenceFixture +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator + +#include "TiledArray/math/linalg/non-distributed/cholesky.h" +#include "TiledArray/math/linalg/non-distributed/heig.h" +#include "TiledArray/math/linalg/non-distributed/lu.h" +#include "TiledArray/math/linalg/non-distributed/svd.h" + +#include "TiledArray/math/linalg/cholesky.h" +#include "TiledArray/math/linalg/heig.h" +#include "TiledArray/math/linalg/lu.h" +#include "TiledArray/math/linalg/svd.h" + +namespace TA = TiledArray; +namespace non_dist = TA::math::linalg::non_distributed; + +#if TILEDARRAY_HAS_TTG +#include "TiledArray/math/linalg/ttg/cholesky.h" +#define TILEDARRAY_TTG_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E); +#else +#define TILEDARRAY_TTG_TEST(...) +#endif + + +struct LinearAlgebraFixture : ReferenceFixture<> { }; + + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) + +#if TILEDARRAY_HAS_TTG +BOOST_AUTO_TEST_CASE(cholesky) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + using array_type = TA::TArray; + auto A = generate_ta_reference(*GlobalFixture::world, trange); + + const double epsilon = N * N * std::numeric_limits::epsilon(); + TILEDARRAY_TTG_TEST(cholesky(A), epsilon); + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(cholesky_linv) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + using array_type = TA::TArray; + auto A = generate_ta_reference(*GlobalFixture::world, trange); + + double epsilon = N * N * std::numeric_limits::epsilon(); + TILEDARRAY_TTG_TEST(cholesky_linv(A), epsilon); + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + using array_type = TA::TArray; + auto A = generate_ta_reference(*GlobalFixture::world, trange); + + double epsilon = N * N * std::numeric_limits::epsilon(); + TILEDARRAY_TTG_TEST(cholesky_linv(A), epsilon); + GlobalFixture::world->gop.fence(); +} +#endif + +template +void cholesky_qr_q_only_test(const ArrayT& A, double tol) { + using value_type = typename ArrayT::element_type; + + auto Q = TiledArray::math::linalg::cholesky_qr(A); + + // Make sure the Q is orthogonal at least + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); +} + +template +void cholesky_qr_test(const ArrayT& A, double tol) { + auto [Q, R] = TiledArray::math::linalg::cholesky_qr(A); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol); + + // Check orthonormality of Q + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_q_only_test(ref_ta, tol); + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_test(ref_ta, tol); + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/linalg/linalg_fixture.h b/tests/linalg/linalg_fixture.h new file mode 100644 index 0000000000..3578783cd8 --- /dev/null +++ b/tests/linalg/linalg_fixture.h @@ -0,0 +1,120 @@ +#pragma once +#include + +template +struct ReferenceFixture { + size_t N; + std::vector htoeplitz_vector; + std::vector exact_evals; + + inline double matrix_element_generator(int64_t i, int64_t j) { +#if 0 + // Generates a Hankel matrix: absurd condition number + return i+j; +#else + // Generates a Circulant matrix: good condition number + return htoeplitz_vector[std::abs(i - j)]; +#endif + } + + template + inline auto make_ta_reference(Tile& t, TA::Range const& range) { + t = Tile(range, 0.0); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) { + for (auto n = lo[1]; n < up[1]; ++n) { + t(m, n) = matrix_element_generator(m, n); + } + } + + return norm(t); + }; + + template + inline auto generate_ta_reference(TA::World& world, TA::TiledRange trange) { + return TA::make_array(world, trange, + [this](auto& t, TA::Range const& range) -> auto { + return this->make_ta_reference(t,range); + }); + } + + template + inline double make_ta_identity(Tile& t, TA::Range const& range) { + t = Tile(range, 0.0); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) t(m, n) = 1.; + + return t.norm(); + } + + template + inline auto generate_ta_identity(TA::World& world, TA::TiledRange trange) { + return TA::make_array(world, trange, + [this](auto& t, TA::Range const& range) -> auto { + return this->make_ta_identity(t,range); + }); + } + + ReferenceFixture(int64_t N = 1000) + : N(N), htoeplitz_vector(N), exact_evals(N) { + // Generate an hermitian Circulant vector + std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); + htoeplitz_vector[0] = 100; + std::default_random_engine gen(0); + std::uniform_real_distribution<> dist(0., 1.); + for (int64_t i = 1; i <= (N / 2); ++i) { + double val = dist(gen); + htoeplitz_vector[i] = val; + htoeplitz_vector[N - i] = val; + } + + // Compute exact eigenvalues + const double ff = 2. * M_PI / N; + for (int64_t j = 0; j < N; ++j) { + double val = htoeplitz_vector[0]; + ; + for (int64_t k = 1; k < N; ++k) + val += htoeplitz_vector[N - k] * std::cos(ff * j * k); + exact_evals[j] = val; + } + + std::sort(exact_evals.begin(), exact_evals.end()); + } + + + void heig_same_tiling_test(TA::World& world); + void heig_diff_tiling_test(TA::World& world); + void heig_generalized_test(TA::World& world); + + void cholesky_test(TA::World& world); + void cholesky_linv_test(TA::World& world); + void cholesky_linv_retl_test(TA::World& world); + void cholesky_solve_test(TA::World& world); + void cholesky_lsolve_test(TA::World& world); + + void lu_solve_test(TA::World& world); + void lu_inv_test(TA::World& world); + + void svd_values_only_test(TA::World& world); + void svd_leftvectors_test(TA::World& world); + void svd_rightvectors_test(TA::World& world); + void svd_allvectors_test(TA::World& world); + + void householder_qr_q_only_test(TA::World& world); + void householder_qr_test(TA::World& world); +}; + +// Macro to generate tests +#define LINALG_TEST_IMPL_MPI_SAFE(NAME, SERIAL_ONLY) \ +BOOST_AUTO_TEST_CASE(NAME) { \ + const auto world_size = GlobalFixture::world->size(); \ + if(SERIAL_ONLY and world_size > 1) return; \ + NAME##_##test(*GlobalFixture::world); \ +} + +#define LINALG_TEST_IMPL(NAME) LINALG_TEST_IMPL_MPI_SAFE(NAME,false) + diff --git a/tests/linalg/lu_tests.h b/tests/linalg/lu_tests.h new file mode 100644 index 0000000000..b05dc701db --- /dev/null +++ b/tests/linalg/lu_tests.h @@ -0,0 +1,56 @@ +#pragma once +#include "linalg_fixture.h" +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator +#include "misc_util.h" // Misc utilities + +// LU Solve (GESV) Test +template +void ReferenceFixture::lu_solve_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto iden = Derived::lu_solve(A, A); + BOOST_CHECK(iden.trange() == A.trange()); + subtract_identity_inplace(iden); // iden -= I + + double epsilon = N * N * std::numeric_limits::epsilon(); + double norm = iden("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} + + + +// LU Inverse (GETRF + GETRI) Test +template +void ReferenceFixture::lu_inv_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + TA::TArray iden(world, trange); + + auto Ainv = Derived::lu_inv(A); + iden("i,j") = Ainv("i,k") * A("k,j"); + + BOOST_CHECK(iden.trange() == A.trange()); + subtract_identity_inplace(iden); // iden -= I + + double epsilon = N * N * std::numeric_limits::epsilon(); + double norm = iden("i,j").norm(world).get(); + + BOOST_CHECK_SMALL(norm, epsilon); + + world.gop.fence(); +} diff --git a/tests/linalg/misc_util.h b/tests/linalg/misc_util.h new file mode 100644 index 0000000000..e6d8c673dc --- /dev/null +++ b/tests/linalg/misc_util.h @@ -0,0 +1,52 @@ +#pragma once +#include + +template +void subtract_diagonal_tensor_inplace(Array& A, const ReplicatedDiag& D) { + + TiledArray::foreach_inplace( A, [=](auto& tile) { + auto range = tile.range(); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) + if (m == n) { tile(m, n) -= D[m]; } + }); + +} + +template +void subtract_identity_inplace(Array& A) { + using element_type = typename Array::element_type; + const auto M = A.trange().dim(0).extent(); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK(M == N); + std::vector D(N,1.0); + subtract_diagonal_tensor_inplace(A, D); +} + +template +void multiply_tensor_by_diag_inplace(char SIDE, Array& A, const ReplicatedDiag& D) { + + TiledArray::foreach_inplace( A, [=](auto& tile) { + auto range = tile.range(); + auto lo = range.lobound_data(); + auto up = range.upbound_data(); + // A(i,j) = D(i,i) * A(i,j) + if(SIDE == 'L') { + for (auto m = lo[0]; m < up[0]; ++m) { + const auto d = D[m]; + for (auto n = lo[1]; n < up[1]; ++n) { + tile(m, n) *= d; + } + } + // A(i,j) = A(i,j) * D(j,j) + } else { + for (auto m = lo[0]; m < up[0]; ++m) + for (auto n = lo[1]; n < up[1]; ++n) { + tile(m, n) *= D[n]; + } + } + }); + +} diff --git a/tests/linalg/non_dist.cpp b/tests/linalg/non_dist.cpp new file mode 100644 index 0000000000..1dcb0344fb --- /dev/null +++ b/tests/linalg/non_dist.cpp @@ -0,0 +1,96 @@ +#include "heig_tests.h" // EVP tests +#include "cholesky_tests.h" // Cholesky tests +#include "lu_tests.h" // LU tests +#include "svd_tests.h" // SVD tests +#include "qr_tests.h" // QR tests + +// Non-distributed linear algebra utilities +#include "TiledArray/math/linalg/non-distributed/cholesky.h" +#include "TiledArray/math/linalg/non-distributed/heig.h" +#include "TiledArray/math/linalg/non-distributed/lu.h" +#include "TiledArray/math/linalg/non-distributed/svd.h" + +namespace TA = TiledArray; +namespace non_dist = TA::math::linalg::non_distributed; + +struct NonDistLinearAlgebraFixture : ReferenceFixture { + + NonDistLinearAlgebraFixture(int64_t N = 1000) : + ReferenceFixture(N) {} + + template + static auto heig(Args&&... args) { + return non_dist::heig(std::forward(args)...); + } + + template + static auto cholesky(Args&&... args) { + return non_dist::cholesky(std::forward(args)...); + } + + template + static auto cholesky_linv(Args&&... args) { + return non_dist::cholesky_linv(std::forward(args)...); + } + + template + static auto cholesky_solve(Args&&... args) { + return non_dist::cholesky_solve(std::forward(args)...); + } + + template + static auto cholesky_lsolve(Args&&... args) { + return non_dist::cholesky_lsolve(std::forward(args)...); + } + + template + static auto lu_solve(Args&&... args) { + return non_dist::lu_solve(std::forward(args)...); + } + + template + static auto lu_inv(Args&&... args) { + return non_dist::lu_inv(std::forward(args)...); + } + + template + static auto svd(Args&&... args) { + return non_dist::svd(std::forward(args)...); + } + + template + static auto householder_qr(Args&&... args) { + return non_dist::householder_qr(std::forward(args)...); + } +}; + + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_non_dist, NonDistLinearAlgebraFixture) + +// HEIG tests +LINALG_TEST_IMPL(heig_same_tiling); +LINALG_TEST_IMPL(heig_diff_tiling); +LINALG_TEST_IMPL(heig_generalized); + +// Cholesky tests +LINALG_TEST_IMPL(cholesky); +LINALG_TEST_IMPL(cholesky_linv); +LINALG_TEST_IMPL(cholesky_linv_retl); +LINALG_TEST_IMPL(cholesky_solve); +LINALG_TEST_IMPL(cholesky_lsolve); + +// LU tests +LINALG_TEST_IMPL(lu_solve); +LINALG_TEST_IMPL(lu_inv); + +// SVD tests +LINALG_TEST_IMPL(svd_values_only); +LINALG_TEST_IMPL(svd_leftvectors); +LINALG_TEST_IMPL(svd_rightvectors); +LINALG_TEST_IMPL(svd_allvectors); + +// QR tests +LINALG_TEST_IMPL(householder_qr_q_only); +LINALG_TEST_IMPL(householder_qr); + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/linalg/qr_tests.h b/tests/linalg/qr_tests.h new file mode 100644 index 0000000000..7f1adf43b6 --- /dev/null +++ b/tests/linalg/qr_tests.h @@ -0,0 +1,61 @@ + +#pragma once +#include "linalg_fixture.h" +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator +#include "misc_util.h" // Misc utilities + + +template +void ReferenceFixture::householder_qr_q_only_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + // Compute Q + auto Q = Derived::template householder_qr(A); + + // Make sure the Q is orthogonal at least + double tol = N * N * std::numeric_limits::epsilon(); + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + subtract_identity_inplace(Iden); + const auto norm = Iden("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + world.gop.fence(); +} + +template +void ReferenceFixture::householder_qr_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + // Compute QR + auto [Q, R] = Derived::template householder_qr(A); + + double tol = N * N * std::numeric_limits::epsilon(); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL(QR_ERROR("i,j").norm(world).get(), tol); + + // Check orthonormality of Q + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + subtract_identity_inplace(Iden); + const auto norm = Iden("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + world.gop.fence(); +} diff --git a/tests/linalg/scalapack.cpp b/tests/linalg/scalapack.cpp new file mode 100644 index 0000000000..618890771f --- /dev/null +++ b/tests/linalg/scalapack.cpp @@ -0,0 +1,244 @@ +#include "heig_tests.h" // EVP tests +#include "cholesky_tests.h" // Cholesky tests +#include "lu_tests.h" // LU tests +#include "svd_tests.h" // SVD tests +#include "qr_tests.h" // QR tests + +// ScaLAPACK linear algebra utilities +#include "TiledArray/math/linalg/scalapack/all.h" + +namespace TA = TiledArray; +namespace scalapack = TA::math::linalg::scalapack; + +struct ScaLAPACKLinearAlgebraFixture : + ReferenceFixture { + + blacspp::Grid grid; + scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? + + ScaLAPACKLinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : + ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { + + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + if (ref_matrix.dist().i_own(i, j)) { + auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); + ref_matrix.local_mat()(i_local, j_local) = + matrix_element_generator(i, j); + } + } + } + + } + + template + static auto heig(Args&&... args) { + return scalapack::heig(std::forward(args)...); + } + + template + static auto cholesky(Args&&... args) { + return scalapack::cholesky(std::forward(args)...); + } + + template + static auto cholesky_linv(Args&&... args) { + return scalapack::cholesky_linv(std::forward(args)...); + } + + template + static auto cholesky_solve(Args&&... args) { + return scalapack::cholesky_solve(std::forward(args)...); + } + + template + static auto cholesky_lsolve(Args&&... args) { + return scalapack::cholesky_lsolve(std::forward(args)...); + } + + template + static auto lu_solve(Args&&... args) { + return scalapack::lu_solve(std::forward(args)...); + } + + template + static auto lu_inv(Args&&... args) { + return scalapack::lu_inv(std::forward(args)...); + } + + template + static auto svd(Args&&... args) { + return scalapack::svd(std::forward(args)...); + } + + template + static auto householder_qr(Args&&... args) { + return scalapack::householder_qr(std::forward(args)...); + } + + + + + template + void block_cyclic_to_tiled_array_test(TA::TiledRange& trange, TA::World& world) { + + world.gop.fence(); + + // Generate Reference Tensor + auto ref_ta = generate_ta_reference(world, trange); + world.gop.fence(); + + // Convert reference matrix to Tensor + auto test_ta = + scalapack::block_cyclic_to_array(ref_matrix, trange); + world.gop.fence(); + + auto norm_diff = + (ref_ta("i,j") - test_ta("i,j")).norm(world).get(); + + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); + + world.gop.fence(); + + } + + template + void tiled_array_to_block_cyclic_test(TA::TiledRange& trange, TA::World& world) { + + world.gop.fence(); + + // Generate Reference Tensor + auto ref_ta = generate_ta_reference(world, trange); + world.gop.fence(); + + // Convert reference tensor to matrix + auto NB = ref_matrix.dist().nb(); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); + world.gop.fence(); + + double local_norm_diff = + (test_matrix.local_mat() - ref_matrix.local_mat()).norm(); + local_norm_diff *= local_norm_diff; + + double norm_diff; + MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + + norm_diff = std::sqrt(norm_diff); + + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); + + world.gop.fence(); + + } + +}; + + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_scalapack, ScaLAPACKLinearAlgebraFixture) + +using ta_test_types = boost::mpl::list< + TA::DistArray, TA::DensePolicy>, + TA::DistArray, TA::DensePolicy>, + TA::DistArray, TA::SparsePolicy>, + TA::DistArray, TA::SparsePolicy> +>; + +// ScaLAPACK -> TA, tilings equal +BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_equal, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + auto NB = ref_matrix.dist().nb(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {static_cast(NB)}); + block_cyclic_to_tiled_array_test(trange, *GlobalFixture::world); +}; + +// ScaLAPACK -> TA, tiled range smaller than NB +BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_all_small, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + auto NB = ref_matrix.dist().nb(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {static_cast(NB/2)}); + block_cyclic_to_tiled_array_test(trange, *GlobalFixture::world); +}; + +// ScaLAPACK -> TA, random tiling +BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_random, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); + block_cyclic_to_tiled_array_test(trange, *GlobalFixture::world); +}; + + + +// TA -> ScaLAPACK: tilings equal +BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_equal, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + auto NB = ref_matrix.dist().nb(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {static_cast(NB)}); + tiled_array_to_block_cyclic_test(trange, *GlobalFixture::world); +}; + +// TA -> ScaLAPACK, tiled range smaller than NB +BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_all_small, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + auto NB = ref_matrix.dist().nb(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {static_cast(NB/2)}); + tiled_array_to_block_cyclic_test(trange, *GlobalFixture::world); +}; + +// TA -> ScaLAPACK, random tiling +BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_random, array_type, ta_test_types) { + auto [M, N] = ref_matrix.dims(); + BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square + + auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); + tiled_array_to_block_cyclic_test(trange, *GlobalFixture::world); +}; + + +BOOST_AUTO_TEST_CASE(const_tiled_array_to_block_cyclic) { + // Just check that it compiles, meat is tested elsewhere + using array_type = const TA::TArray; + using my_t = decltype(scalapack::array_to_block_cyclic(std::declval(), std::declval(), std::declval(), std::declval())); + constexpr auto my_bool = std::is_same_v>; + BOOST_REQUIRE(my_bool); +}; + +// HEIG tests +LINALG_TEST_IMPL(heig_same_tiling); +LINALG_TEST_IMPL(heig_diff_tiling); +LINALG_TEST_IMPL(heig_generalized); + +// Cholesky tests +LINALG_TEST_IMPL(cholesky); +LINALG_TEST_IMPL(cholesky_linv); +LINALG_TEST_IMPL(cholesky_linv_retl); +LINALG_TEST_IMPL(cholesky_solve); +LINALG_TEST_IMPL(cholesky_lsolve); + +// LU tests +LINALG_TEST_IMPL(lu_solve); +LINALG_TEST_IMPL(lu_inv); + +// SVD tests +LINALG_TEST_IMPL(svd_values_only); +LINALG_TEST_IMPL(svd_leftvectors); +LINALG_TEST_IMPL(svd_rightvectors); +LINALG_TEST_IMPL(svd_allvectors); + +// QR tests +LINALG_TEST_IMPL(householder_qr_q_only); +LINALG_TEST_IMPL(householder_qr); + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/linalg/slate.cpp b/tests/linalg/slate.cpp new file mode 100644 index 0000000000..d58a81b6cd --- /dev/null +++ b/tests/linalg/slate.cpp @@ -0,0 +1,213 @@ +#include "heig_tests.h" // EVP tests +#include "cholesky_tests.h" // Cholesky tests +#include "lu_tests.h" // LU tests +#include "svd_tests.h" // SVD tests +#include "qr_tests.h" // QR tests + +// SLATE linear algebra utilities +#include +#include +#include +#include +#include +#include + +namespace TA = TiledArray; +namespace slate_la = TA::math::linalg::slate; + +struct SLATELinearAlgebraFixture : + ReferenceFixture { + + SLATELinearAlgebraFixture(int64_t N = 1000) : + ReferenceFixture(N) {} + + slate::Matrix make_ref_slate(int64_t N, TA::SlateFunctors& slate_functors, + MPI_Comm comm) { + + slate::Matrix A(N, N, slate_functors.tileMb(), slate_functors.tileNb(), + slate_functors.tileRank(), slate_functors.tileDevice(), comm); + + A.insertLocalTiles(); + int64_t j_off = 0; + for (int64_t j = 0; j < A.nt(); ++j) { + + int64_t i_off = 0; + for (int64_t i = 0; i < A.mt(); ++i) { + + if(A.tileIsLocal(i,j)) { + auto T = A(i,j); + for(auto jj = 0; jj < T.nb(); ++jj) + for(auto ii = 0; ii < T.mb(); ++ii) { + T.at(ii,jj) = matrix_element_generator(i_off+ii,j_off+jj); + } + } + + i_off += A.tileMbFunc()(i); + } + + j_off += A.tileNbFunc()(j); + } + + return A; + } + + template + static auto heig(Args&&... args) { + return slate_la::heig(std::forward(args)...); + } + + template + static auto cholesky(Args&&... args) { + return slate_la::cholesky(std::forward(args)...); + } + + template + static auto cholesky_linv(Args&&... args) { + return slate_la::cholesky_linv(std::forward(args)...); + } + + template + static auto cholesky_solve(Args&&... args) { + return slate_la::cholesky_solve(std::forward(args)...); + } + + template + static auto cholesky_lsolve(Args&&... args) { + return slate_la::cholesky_lsolve(std::forward(args)...); + } + + template + static auto lu_solve(Args&&... args) { + return slate_la::lu_solve(std::forward(args)...); + } + + template + static auto lu_inv(Args&&... args) { + return slate_la::lu_inv(std::forward(args)...); + } + + template + static auto svd(Args&&... args) { + return slate_la::svd(std::forward(args)...); + } + + template + static auto householder_qr(Args&&... args) { + return slate_la::householder_qr(std::forward(args)...); + } + + + template + void tiled_array_to_slate_test(TA::TiledRange& trange, TA::World& world) { + + world.gop.fence(); + + auto ref_ta = generate_ta_reference(world, trange); + world.gop.fence(); + + auto slate_matrix = TA::array_to_slate(ref_ta); + world.gop.fence(); + BOOST_CHECK( slate_matrix.mt() == trange.dim(0).tile_extent() ); + BOOST_CHECK( slate_matrix.nt() == trange.dim(1).tile_extent() ); + BOOST_CHECK( slate_matrix.m() == N ); + BOOST_CHECK( slate_matrix.n() == N ); + + TA::SlateFunctors slate_functors( trange, ref_ta.pmap() ); + auto ref_slate = this->make_ref_slate(N, slate_functors, MPI_COMM_WORLD); + + slate::add( 1.0, ref_slate, -1.0, slate_matrix ); + auto norm_diff = slate::norm(slate::Norm::Fro, slate_matrix); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); + + world.gop.fence(); + + } + + template + void slate_to_tiled_array_test(TA::TiledRange& trange, TA::World& world) { + + world.gop.fence(); + + auto ref_ta = generate_ta_reference(world, trange); + world.gop.fence(); + + TA::SlateFunctors slate_functors( trange, ref_ta.pmap() ); + auto ref_slate = this->make_ref_slate(N, slate_functors, MPI_COMM_WORLD); + + world.gop.fence(); + auto test_ta = TA::slate_to_array(ref_slate, world); + world.gop.fence(); + + auto norm_diff = (ref_ta("i,j") - test_ta("i,j")).norm(world).get(); + BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon()); + + world.gop.fence(); + + } +}; + + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_slate, SLATELinearAlgebraFixture) + +using ta_test_types = boost::mpl::list< + TA::DistArray, TA::DensePolicy>, + TA::DistArray, TA::DensePolicy>, + TA::DistArray, TA::SparsePolicy>, + TA::DistArray, TA::SparsePolicy> +>; + +// SLATE -> TA: tilings equal +BOOST_AUTO_TEST_CASE_TEMPLATE(slate_to_tiled_array_equal, array_type, ta_test_types) { + auto trange = gen_trange(N, {static_cast(128)}); + slate_to_tiled_array_test(trange, *GlobalFixture::world); +}; + +// SLATE -> TA, random tiling +BOOST_AUTO_TEST_CASE_TEMPLATE(slate_to_tiled_array_random, array_type, ta_test_types) { + auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); + slate_to_tiled_array_test(trange, *GlobalFixture::world); +}; + + +// TA -> SLATE: tilings equal +BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_slate_equal, array_type, ta_test_types) { + auto trange = gen_trange(N, {static_cast(128)}); + tiled_array_to_slate_test(trange, *GlobalFixture::world); +}; + +// TA -> SLATE, random tiling +BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_slate_random, array_type, ta_test_types) { + auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul}); + tiled_array_to_slate_test(trange, *GlobalFixture::world); +}; + +// HEIG tests (serial only) +// TODO: Can make parallel-capable when the following issue is closed +// https://github.com/icl-utk-edu/slate/issues/102 +LINALG_TEST_IMPL_MPI_SAFE(heig_same_tiling, true); +LINALG_TEST_IMPL_MPI_SAFE(heig_diff_tiling, true); +LINALG_TEST_IMPL_MPI_SAFE(heig_generalized, true); + +// Cholesky tests +LINALG_TEST_IMPL(cholesky); +LINALG_TEST_IMPL(cholesky_linv); +LINALG_TEST_IMPL(cholesky_linv_retl); +LINALG_TEST_IMPL(cholesky_solve); +LINALG_TEST_IMPL(cholesky_lsolve); + +// LU tests +LINALG_TEST_IMPL(lu_solve); +LINALG_TEST_IMPL(lu_inv); + +// SVD tests +LINALG_TEST_IMPL(svd_values_only); +LINALG_TEST_IMPL(svd_leftvectors); +LINALG_TEST_IMPL(svd_rightvectors); +LINALG_TEST_IMPL(svd_allvectors); + +// QR tests +LINALG_TEST_IMPL(householder_qr_q_only); +LINALG_TEST_IMPL(householder_qr); + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/tests/linalg/svd_tests.h b/tests/linalg/svd_tests.h new file mode 100644 index 0000000000..f3f01e6399 --- /dev/null +++ b/tests/linalg/svd_tests.h @@ -0,0 +1,131 @@ +#pragma once +#include "linalg_fixture.h" +#include "compare_utilities.h" // Tensor comparison utilities +#include "gen_trange.h" // TiledRange generator +#include "misc_util.h" // Misc utilities + + +template +void ReferenceFixture::svd_values_only_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto S = Derived::template svd(A, trange, trange); + + std::vector exact_singular_values = exact_evals; + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); + + // Check singular value correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol); + + world.gop.fence(); +} + +template +void ReferenceFixture::svd_leftvectors_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto [S, U] = Derived::template svd(A, trange, trange); + + std::vector exact_singular_values = exact_evals; + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); + + // Check singular value correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol); + + // Since A is Hermitian, U is also A's eigenvectors + // A <- U**H * A * U + TA::TArray tmp; + tmp("i,j") = A("i,k") * U("k,j"); + A("i,j") = U("k,i").conj() * tmp("k,j"); + subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA + + const auto norm = A("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + world.gop.fence(); +} + +template +void ReferenceFixture::svd_rightvectors_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto [S, VT] = Derived::template svd(A, trange, trange); + + std::vector exact_singular_values = exact_evals; + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); + + // Check singular value correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol); + + + // Since A is Hermitian, VT is also (the c-transpose) A's eigenvectors + // A <- VT * A * VT**H + TA::TArray tmp; + tmp("i,j") = A("i,k") * VT("j,k").conj(); + A("i,j") = VT("i,k") * tmp("k,j"); + subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA + + const auto norm = A("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + + world.gop.fence(); +} + +template +void ReferenceFixture::svd_allvectors_test(TA::World& world) { + world.gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + // Generate Reference Tensor in TA + using array_type = TA::TArray; + auto A = generate_ta_reference(world, trange); + + auto [S, U, VT] = Derived::template svd(A, trange, trange); + + std::vector exact_singular_values = exact_evals; + std::sort(exact_singular_values.begin(), exact_singular_values.end(), + std::greater()); + + // Check singular value correctness + double tol = N * N * std::numeric_limits::epsilon(); + compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol); + + // Recreate SVD + // A <- U**H * A * VT**H + TA::TArray tmp; + tmp("i,j") = A("i,k") * VT("j,k").conj(); + A("i,j") = U("k,i").conj() * tmp("k,j"); + subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA + + const auto norm = A("i,j").norm(world).get(); + BOOST_CHECK_SMALL(norm, tol); + + + world.gop.fence(); +}