diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index fd9c49aefa..b6fe18c009 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,6 +7,7 @@ default:
variables:
MAD_NUM_THREADS : 2
+ OMP_NUM_THREADS : 1
TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray"
# Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT
TA_CONFIG : >
@@ -18,6 +19,7 @@ variables:
${BLA_VENDOR}
${BLA_THREADS}
${ENABLE_SCALAPACK}
+ ${ENABLE_SLATE}
before_script:
# NB: if CMAKE_BUILD_PARALLEL_LEVEL is not set (i.e. using shared runner), use 1 to ensure we have enough memory
@@ -35,6 +37,7 @@ ubuntu:
variables:
TA_PYTHON : "TA_PYTHON=ON"
ENABLE_SCALAPACK : "ENABLE_SCALAPACK=OFF"
+ ENABLE_SLATE : "ENABLE_SLATE=OFF"
script:
- ./ci/.build-project
--build ./build
@@ -69,6 +72,7 @@ ubuntu:
CXX: [ g++, clang++-13 ]
BUILD_TYPE : [ "Release", "Debug" ]
ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ]
+ ENABLE_SLATE : [ "ENABLE_SLATE=ON", "ENABLE_SLATE=OFF" ]
RUNNER_TAGS: [ linux ]
- IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ]
CXX: [ g++ ]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a50f0a789f..cd0c2759fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,6 +121,9 @@ add_feature_info(MPI ENABLE_MPI "Message-Passing Interface supports distributed-
option(ENABLE_SCALAPACK "Enable ScaLAPACK Bindings in TiledArray" OFF)
add_feature_info(ScaLAPACK ENABLE_SCALAPACK "ScaLAPACK provides distributed linear algebra")
+option(ENABLE_SLATE "Enable SLATE Bindings in TiledArray" OFF)
+add_feature_info(SLATE ENABLE_SLATE "SLATE provides distributed linear algebra on GPU architectures")
+
option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON)
add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules")
@@ -338,6 +341,9 @@ include(${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchBoost.cmake)
if(ENABLE_SCALAPACK)
include(external/scalapackpp.cmake)
endif()
+if(ENABLE_SLATE)
+ include(external/slate.cmake)
+endif()
# optional deps:
# 1. ccache
diff --git a/cmake/modules/FindOrFetchSLATE.cmake b/cmake/modules/FindOrFetchSLATE.cmake
new file mode 100644
index 0000000000..d5a34c2b5a
--- /dev/null
+++ b/cmake/modules/FindOrFetchSLATE.cmake
@@ -0,0 +1,32 @@
+# Try find_package
+if (NOT TARGET slate)
+ find_package(slate QUIET CONFIG)
+ if (TARGET slate)
+ message(STATUS "Found SLATE CONFIG at ${slate_CONFIG}")
+ endif (TARGET slate)
+endif (NOT TARGET slate)
+
+# If not found, build via FetchContent
+if (NOT TARGET slate)
+
+ # Make sure BLAS++/LAPACK++ are already in place
+ # (will typically be loaded from BTAS)
+ include(${vg_cmake_kit_SOURCE_DIR}/modules/FindOrFetchLinalgPP.cmake)
+
+ if (NOT TILEDARRAY_HAS_CUDA)
+ set(gpu_backend none CACHE STRING "Device Backend for ICL Linalg++/SLATE")
+ endif (NOT TILEDARRAY_HAS_CUDA)
+
+ include(FetchContent)
+ FetchContent_Declare(
+ slate
+ GIT_REPOSITORY https://github.com/icl-utk-edu/slate.git
+ GIT_TAG ${TA_TRACKED_SLATE_TAG}
+ )
+ FetchContent_MakeAvailable(slate)
+
+endif (NOT TARGET slate)
+
+if (NOT TARGET slate)
+ message( FATAL_ERROR "FindOrFetchSLATE could not make slate target available")
+endif (NOT TARGET slate)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f74d35345a..d670a22015 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -33,6 +33,7 @@ add_subdirectory (cuda)
add_subdirectory (dgemm)
add_subdirectory (demo)
add_subdirectory (scalapack)
+add_subdirectory (slate)
add_subdirectory (fock)
add_subdirectory (mpi_tests)
add_subdirectory (pmap_test)
diff --git a/examples/slate/CMakeLists.txt b/examples/slate/CMakeLists.txt
new file mode 100644
index 0000000000..e704ac94e4
--- /dev/null
+++ b/examples/slate/CMakeLists.txt
@@ -0,0 +1,38 @@
+#
+# This file is a part of TiledArray.
+# Copyright (C) 2016 Virginia Tech
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# Drew Lewis
+# Department of Chemistry, Virginia Tech
+#
+# CMakeLists.txt
+# Dec 6th, 2016
+#
+
+# Create example executable
+
+if(ENABLE_SLATE)
+
+foreach(_exec conversion)
+
+ # Add executable
+ add_executable(slate-${_exec} EXCLUDE_FROM_ALL ${_exec}.cpp)
+ target_link_libraries(slate-${_exec} PRIVATE tiledarray)
+ add_dependencies(examples-tiledarray slate-${_exec})
+
+endforeach()
+
+endif(ENABLE_SLATE)
diff --git a/examples/slate/conversion.cpp b/examples/slate/conversion.cpp
new file mode 100644
index 0000000000..851f066743
--- /dev/null
+++ b/examples/slate/conversion.cpp
@@ -0,0 +1,140 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2020 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ * conversion.cpp
+ * Created: 7 Feb, 2020
+ * Edited: 13 May, 2020
+ *
+ */
+
+#include
+
+#include
+#include
+#include
+#include
+
+
+template
+int64_t div_ceil(Integral1 x, Integral2 y) {
+ int64_t x_ll = x;
+ int64_t y_ll = y;
+
+ auto d = std::div(x_ll, y_ll);
+ return d.quot + !!d.rem;
+}
+
+TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) {
+ assert(TA_NBs.size() > 0);
+
+ std::default_random_engine gen(0);
+ std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1);
+ auto rand_indx = [&]() { return dist(gen); };
+ auto rand_nb = [&]() { return TA_NBs[rand_indx()]; };
+
+ std::vector t_boundaries = {0};
+ auto TA_NB = rand_nb();
+ while (t_boundaries.back() + TA_NB < N) {
+ t_boundaries.emplace_back(t_boundaries.back() + TA_NB);
+ TA_NB = rand_nb();
+ }
+ t_boundaries.emplace_back(N);
+
+ std::vector ranges(
+ 2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end()));
+
+ return TA::TiledRange(ranges.begin(), ranges.end());
+};
+
+
+
+auto make_square_proc_grid(MPI_Comm comm) {
+ int mpi_size; MPI_Comm_size(comm, &mpi_size);
+ int p,q;
+ for(p = int( sqrt( mpi_size ) ); p > 0; --p) {
+ q = int( mpi_size / p );
+ if(p*q == mpi_size) break;
+ }
+ return std::make_pair(p,q);
+}
+
+
+
+
+
+
+
+
+
+int main(int argc, char** argv) {
+ auto& world = TA::initialize(argc, argv);
+ {
+ int64_t N = argc > 1 ? std::stoi(argv[1]) : 1000;
+ size_t NB = argc > 2 ? std::stoi(argv[2]) : 128;
+
+ auto make_ta_reference =
+ [&](TA::Tensor& t, TA::Range const& range) {
+
+ t = TA::Tensor(range, 0.0);
+ auto lo = range.lobound_data();
+ auto up = range.upbound_data();
+ for (int m = lo[0]; m < up[0]; ++m) {
+ for (int n = lo[1]; n < up[1]; ++n) {
+ t(m, n) = m - n;
+ }
+ }
+
+ return t.norm();
+ };
+
+ // Generate Reference TA tensor.
+ auto trange = gen_trange(N, {NB});
+ auto ref_ta =
+ TA::make_array >(world, trange, make_ta_reference);
+
+ // Do Conversion
+ auto A = TA::array_to_slate( ref_ta );
+ auto A_ta = TA::slate_to_array>(A, world);
+ world.gop.fence();
+
+ // Slate matrix to eigen
+ Eigen::MatrixXd slate_eigen = Eigen::MatrixXd::Zero(N,N);
+ for (int64_t j = 0; j < A.nt(); ++j)
+ for (int64_t i = 0; i < A.mt(); ++i) {
+ A.tileBcast(i,j, A, slate::Layout::ColMajor);
+ auto T = A(i,j);
+ Eigen::Map T_map( T.data(), T.mb(), T.nb() );
+ slate_eigen.block(i*NB,j*NB,T.mb(), T.nb()) = T_map;
+ }
+ //if(!world.rank()) {
+ //std::cout << "SLATE\n" << slate_eigen << std::endl;
+ //}
+
+ A_ta.make_replicated();
+ world.gop.fence();
+ auto A_eigen = TA::array_to_eigen(A_ta);
+ //if(!world.rank()) std::cout << "TA\n" << A_eigen << std::endl;
+ std::cout << (A_eigen - slate_eigen).norm() << std::endl;
+
+
+ }
+
+ TA::finalize();
+}
diff --git a/external/slate.cmake b/external/slate.cmake
new file mode 100644
index 0000000000..9c177d32af
--- /dev/null
+++ b/external/slate.cmake
@@ -0,0 +1,21 @@
+if (NOT TARGET slate)
+ set(VGCMAKEKIT_TRACKED_SLATE_TAG ${TA_TRACKED_SLATE_TAG} CACHE STRING "slate tag")
+ include(FindOrFetchSLATE)
+endif()
+
+# built {blacs,scalapack}pp as a subproject? install as part of tiledarray export as well
+# to be able to use TiledArray_SLATE from the build tree
+if (TARGET slate)
+ install( TARGETS slate EXPORT tiledarray COMPONENT tiledarray )
+ # Add these dependencies to External
+ add_dependencies(External-tiledarray slate )
+endif()
+
+if (TARGET slate)
+ add_library( TiledArray_SLATE INTERFACE )
+ target_link_libraries( TiledArray_SLATE INTERFACE slate )
+
+ install( TARGETS TiledArray_SLATE EXPORT tiledarray COMPONENT tiledarray )
+
+ set( TILEDARRAY_HAS_SLATE 1 )
+endif()
diff --git a/external/versions.cmake b/external/versions.cmake
index 6f6c05c977..19c50d392b 100644
--- a/external/versions.cmake
+++ b/external/versions.cmake
@@ -36,6 +36,9 @@ set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0)
set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81)
set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf )
+set(TA_TRACKED_SLATE_TAG 04d552f12e0d181c27652ecd3ebedb265b7eec07)
+set(TA_TRACKED_SLATE_PREVIOUS_TAG 8651441aa87cd69b560d4dac8c5ceb0e7f8c32a4)
+
set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864)
set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a8b3d6f4fd..b5ecc25f4b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -274,6 +274,9 @@ endif(CUDA_FOUND)
if( TARGET TiledArray_SCALAPACK )
list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SCALAPACK)
endif()
+if( TARGET TiledArray_SLATE )
+ list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SLATE)
+endif()
list(APPEND _TILEDARRAY_DEPENDENCIES "${LAPACK_LIBRARIES}")
if( TARGET ttg-parsec )
diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in
index 0c4d5d5cbc..f136ea523c 100644
--- a/src/TiledArray/config.h.in
+++ b/src/TiledArray/config.h.in
@@ -74,6 +74,9 @@
/* Define if TA has enabled ScaLAPACK Bindings */
#cmakedefine TILEDARRAY_HAS_SCALAPACK 1
+/* Define if TA has enabled SLATE Bindings */
+#cmakedefine TILEDARRAY_HAS_SLATE 1
+
/* Define if TiledArray configured with CUDA support */
#cmakedefine TILEDARRAY_HAS_CUDA @TILEDARRAY_HAS_CUDA@
#cmakedefine TILEDARRAY_CHECK_CUDA_ERROR @TILEDARRAY_CHECK_CUDA_ERROR@
diff --git a/src/TiledArray/conversions/slate.h b/src/TiledArray/conversions/slate.h
new file mode 100644
index 0000000000..3212b763f6
--- /dev/null
+++ b/src/TiledArray/conversions/slate.h
@@ -0,0 +1,367 @@
+#ifndef TILEDARRAY_CONVERSIONS_SLATE_H
+#define TILEDARRAY_CONVERSIONS_SLATE_H
+
+#include // TILEDARRAY_HAS_SLATE
+#if TILEDARRAY_HAS_SLATE
+
+#include // slate::Matrix
+#include // TA::numeric_type
+#include // is_dense
+#include // {,User}Pmap
+#include // Eigen::{Matrix,Map}
+#include // MADNESS
+
+namespace TiledArray {
+namespace detail {
+
+/// C++14-esque typename wrapper for `numeric_type
+template
+using numeric_type_t = typename numeric_type::type;
+
+/// Deduce SLATE Matrix type from Array type
+template
+using slate_type_from_array_t =
+ typename slate::Matrix>;
+
+
+template
+auto slate_to_ta_tile(slate::Tile slate_tile, TA::Range const& range) {
+
+ using col_major_mat_t = Eigen::Matrix;
+ using row_major_mat_t = Eigen::Matrix;
+
+ using col_major_map_t = Eigen::Map;
+ using row_major_map_t = Eigen::Map;
+
+ Tile tile(range, 0.0);
+
+ // Create Maps
+ auto local_m = slate_tile.mb();
+ auto local_n = slate_tile.nb();
+ col_major_map_t slate_map(slate_tile.data(), local_m, local_n);
+
+ auto local_m_ta = range.dim(0).extent();
+ auto local_n_ta = range.dim(1).extent();
+ row_major_map_t ta_map(tile.data(), local_m_ta, local_n_ta);
+
+ // Copy data
+ ta_map = slate_map;
+
+ return tile;
+}
+
+template
+Array make_array_from_slate_dense(Matrix& matrix, TA::TiledRange const& trange,
+ std::shared_ptr pmap, TA::World& world) {
+
+ using value_type = typename Array::value_type; // Tile type
+ using element_type = typename std::remove_cv_t::element_type;
+
+
+ using col_major_mat_t = Eigen::Matrix;
+ using row_major_mat_t = Eigen::Matrix;
+
+ using col_major_map_t = Eigen::Map;
+ using row_major_map_t = Eigen::Map;
+
+ Array array(world, trange, pmap);
+ for (int64_t it = 0; it < matrix.mt(); ++it)
+ for (int64_t jt = 0; jt < matrix.nt(); ++jt)
+ if( matrix.tileIsLocal(it,jt) ) {
+ auto local_ordinal = trange.tiles_range().ordinal(it,jt);
+
+ auto tile = world.taskq.add(slate_to_ta_tile,
+ matrix(it,jt), trange.make_tile_range(local_ordinal));
+
+ array.set(local_ordinal, tile);
+ }
+
+ return array;
+}
+
+template
+Array make_array_from_slate_sparse(Matrix& matrix, TA::TiledRange const& trange,
+ std::shared_ptr pmap, TA::World& world) {
+
+ typedef typename Array::value_type value_type;
+ typedef typename Array::ordinal_type ordinal_type;
+ typedef std::pair > datum_type;
+
+ // Create a vector to hold local tiles
+ std::vector tiles;
+ tiles.reserve(pmap->size());
+
+ // Construct a tensor to hold updated tile norms for the result shape.
+ TA::Tensor::value_type> tile_norms(
+ trange.tiles_range(), 0);
+
+ // Construct the task function used to construct the result tiles.
+ madness::AtomicInt counter;
+ counter = 0;
+ int task_count = 0;
+ auto task = [&](const ordinal_type index) -> value_type {
+
+ value_type tile;
+ auto coords = trange.tiles_range().idx(index);
+ auto it = coords[0];
+ auto jt = coords[1];
+ if(!matrix.tileIsLocal(it,jt)) {
+ tile_norms[index] = 0.0;
+ } else {
+ tile = slate_to_ta_tile(matrix(it,jt), trange.make_tile_range(index));
+ tile_norms[index] = norm(tile);
+ }
+ ++counter;
+ return tile;
+
+ };
+
+ for (const auto index : *pmap) {
+ auto result_tile = world.taskq.add(task, index);
+ ++task_count;
+ tiles.emplace_back(index, std::move(result_tile));
+ }
+
+ // Wait for tile norm data to be collected.
+ if (task_count > 0)
+ world.await(
+ [&counter, task_count]() -> bool { return counter == task_count; });
+
+ // Construct the new array
+ Array result(world, trange,
+ typename Array::shape_type(world, tile_norms, trange), pmap);
+ for (auto& it : tiles) {
+ const auto index = it.first;
+ if (!result.is_zero(index)) result.set(it.first, it.second);
+ }
+
+ return result;
+
+}
+
+} // namespace TiledArray::detail
+
+class SlateFunctors {
+
+public:
+
+ using slate_int = int64_t;
+ using slate_process_idx = std::tuple;
+ using dim_functor_t = std::function;
+ using tile_functor_t = std::function;
+
+ SlateFunctors( const dim_functor_t& Mb, const dim_functor_t& Nb,
+ const tile_functor_t& Rank, const tile_functor_t& Dev ) :
+ tileMb_(Mb), tileNb_(Nb), tileRank_(Rank), tileDevice_(Dev) { }
+
+ template
+ SlateFunctors( TiledRange trange, PMapInterfacePointer pmap_ptr ) {
+ if( trange.rank() != 2 )
+ throw std::runtime_error("Cannot Convert General Tensor to SLATE (RANK != 2)");
+
+ // Tile row dimension (MB)
+ tileMb_ = [trange](slate_int i) { return trange.dim(0).tile(i).extent(); };
+
+ // Tile col dimension (NB)
+ tileNb_ = [trange](slate_int i) { return trange.dim(1).tile(i).extent(); };
+
+ // Tile rank assignment
+ tileRank_ = [pmap_ptr, trange] (slate_process_idx ij) {
+ auto [i,j] = ij;
+ return pmap_ptr->owner(trange.tiles_range().ordinal(i,j));
+ };
+
+ // Tile device assignment
+ // TODO: Needs to be more robust
+ tileDevice_ = [](slate_process_idx) { return 0; };
+
+ }
+
+ auto& tileMb() { return tileMb_; }
+ auto& tileNb() { return tileNb_; }
+ auto& tileRank() { return tileRank_; }
+ auto& tileDevice() { return tileDevice_; }
+
+
+ template
+ SlateMatrixType make_matrix(int64_t M, int64_t N, MPI_Comm comm) {
+ return SlateMatrixType(M, N, tileMb_, tileNb_, tileRank_, tileDevice_, comm);
+ }
+
+private:
+
+ dim_functor_t tileMb_, tileNb_;
+ tile_functor_t tileRank_, tileDevice_;
+};
+
+
+
+
+
+template
+std::shared_ptr make_pmap_from_slate( SlateMatrixType&& matrix, World& world ) {
+
+ // Compute SLATE Tile Statistics
+ size_t total_tiles = matrix.nt() * matrix.mt();
+ size_t local_tiles = 0;
+
+ // Create a map from tile ordinal to rank
+ // to avoid lifetime issues in the internal
+ // TA Pmap
+ std::vector tile2rank(total_tiles);
+ for (int64_t it = 0; it < matrix.mt(); ++it)
+ for (int64_t jt = 0; jt < matrix.nt(); ++jt) {
+ size_t ordinal = it*matrix.nt() + jt; // TODO: Use Range
+ tile2rank[ordinal] = matrix.tileRank( it, jt );
+ if(matrix.tileIsLocal(it,jt)) local_tiles++;
+ }
+
+
+ // Create TA PMap
+ std::function ta_tile_functor =
+ [t2r = std::move(tile2rank)](size_t ordinal) {
+ return t2r[ordinal];
+ };
+
+ return std::make_shared(world, total_tiles, local_tiles,
+ ta_tile_functor);
+}
+
+
+
+
+
+/**
+ * @brief Convert Array to SLATE matrix
+ *
+ * @tparam Array Type of input Array
+ *
+ * @param[in] array Array to convert to SLATE. Must be rank-2.
+ * @returns SLATE representation `array`
+ */
+template
+detail::slate_type_from_array_t
+array_to_slate( const Array& array ) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ using slate_matrix_t = typename slate::Matrix;
+
+ using col_major_mat_t = Eigen::Matrix;
+ using row_major_mat_t = Eigen::Matrix;
+
+ using col_major_map_t = Eigen::Map;
+ using row_major_map_t = Eigen::Map;
+
+ /*******************************/
+ /*** Generate SLATE Functors ***/
+ /*******************************/
+ auto& world = array.world();
+ const auto& trange = array.trange();
+ auto pmap = array.pmap();
+
+ SlateFunctors slate_functors( trange, pmap );
+
+
+ /*********************************/
+ /*** Create empty slate matrix ***/
+ /*********************************/
+ const auto M = trange.dim(0).extent();
+ const auto N = trange.dim(1).extent();
+ auto matrix = slate_functors.make_matrix(M, N,
+ world.mpi.comm().Get_mpi_comm());
+
+ /************************/
+ /*** Copy TA -> SLATE ***/
+ /************************/
+ matrix.insertLocalTiles();
+
+ // Loop over local tiles via ordinal
+ // TODO: Make async
+ for( auto local_ordinal : *pmap ) {
+ // Compute coordinate of tile ordinal
+ auto local_coordinate = trange.tiles_range().idx(local_ordinal);
+ const auto it = local_coordinate[0];
+ const auto jt = local_coordinate[1];
+
+ // Sanity Check
+ if(!matrix.tileIsLocal(it,jt))
+ throw std::runtime_error("SLATE PMAP is not valid");
+
+ // Extract shallow copy of local SLATE tile and create
+ // data map
+ auto local_tile_slate = matrix(it,jt);
+ auto local_m = local_tile_slate.mb();
+ auto local_n = local_tile_slate.nb();
+ col_major_map_t slate_map(local_tile_slate.data(), local_m, local_n);
+
+ // Create data map for TA tile
+ // TODO: This should be async in a MADNESS task
+ auto& local_tile = array.find_local(local_ordinal).get();
+ auto local_m_ta = local_tile.range().dim(0).extent();
+ auto local_n_ta = local_tile.range().dim(1).extent();
+ row_major_map_t ta_map(local_tile.data(), local_m_ta, local_n_ta);
+
+ // Copy TA tile to SLATE tile
+ slate_map = ta_map;
+ } // Loop over local tiles
+
+ return matrix;
+} // array_to_slate
+
+/**
+ * @brief Convert a SLATE matrix to an Array
+ */
+template
+auto slate_to_array( /*const*/ detail::slate_type_from_array_t& matrix, World& world ) {
+ // TODO: SLATE Tile accessor is not const-accessible
+ // https://github.com/icl-utk-edu/slate/issues/59
+
+ using value_type = typename Array::value_type; // Tile type
+ using element_type = typename std::remove_cv_t::element_type;
+ using slate_matrix_t = typename slate::Matrix;
+
+ using col_major_mat_t = Eigen::Matrix;
+ using row_major_mat_t = Eigen::Matrix;
+
+ using col_major_map_t = Eigen::Map;
+ using row_major_map_t = Eigen::Map;
+
+ // Create TA PMap from SLATE metadata
+ auto slate_pmap = make_pmap_from_slate(matrix, world);
+
+ // Create TiledRange
+ std::vector row_tiling(matrix.mt()+1), col_tiling(matrix.nt()+1);
+
+ row_tiling[0] = 0;
+ for(auto i = 0; i < matrix.mt(); ++i)
+ row_tiling[i+1] = row_tiling[i] + matrix.tileMb(i);
+
+ col_tiling[0] = 0;
+ for(auto i = 0; i < matrix.nt(); ++i)
+ col_tiling[i+1] = col_tiling[i] + matrix.tileNb(i);
+
+
+ std::vector ranges = {
+ TA::TiledRange1(row_tiling.begin(), row_tiling.end()),
+ TA::TiledRange1(col_tiling.begin(), col_tiling.end())
+ };
+ TA::TiledRange trange(ranges.begin(), ranges.end());
+
+ // Create TArray
+ Array array;
+ if constexpr (is_dense::value) {
+ array = detail::make_array_from_slate_dense(matrix,
+ trange, slate_pmap, world);
+ } else {
+ array = detail::make_array_from_slate_sparse(matrix,
+ trange, slate_pmap, world);
+ }
+
+ world.gop.fence();
+ return array;
+}
+
+} // namespace TiledArray
+
+#endif // TILEDARRAY_HAS_SLATE
+#endif // TILEDARRAY_CONVERSIONS_SLATE_H
diff --git a/src/TiledArray/math/linalg/slate/cholesky.h b/src/TiledArray/math/linalg/slate/cholesky.h
new file mode 100644
index 0000000000..6f1cb17b28
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/cholesky.h
@@ -0,0 +1,183 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * cholesky.h
+ * Created: 24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+#include
+namespace TiledArray::math::linalg::slate {
+
+/**
+ * @brief Compute the Cholesky factorization of a HPD rank-2 tensor
+ *
+ * A(i,j) = L(i,k) * conj(L(j,k))
+ *
+ * Example Usage:
+ *
+ * auto L = cholesky(A, ...)
+ *
+ * @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *
+ * @param[in] A Input array to be factorized. Must be rank-2
+ *
+ * @returns The lower triangular Cholesky factor L in TA format
+ */
+template
+auto cholesky(const Array& A) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ // Convert to SLATE
+ auto matrix = array_to_slate(A);
+
+ // Perform POTRF
+ world.gop.fence(); // stage SLATE execution
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix);
+ ::slate::potrf(AH);
+ zero_triangle(::slate::Uplo::Upper, matrix);
+ world.gop.fence(); // stage SLATE execution
+
+ // Convert L to TA and return
+ return slate_to_array(matrix, world);
+
+}
+
+
+
+template
+auto cholesky_linv(const Array& A) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ auto matrix = array_to_slate(A);
+
+ // Perform POTRF
+ world.gop.fence(); // stage SLATE execution
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix);
+ ::slate::potrf(AH);
+ zero_triangle(::slate::Uplo::Upper, matrix);
+
+ // Copy L if needed
+ using matrix_type = std::decay_t;
+ std::shared_ptr L_ptr = nullptr;
+ if constexpr (Both) {
+ L_ptr = std::make_shared(slate_to_array(matrix,world));
+ world.gop.fence(); // Make sure copy is done before inverting L
+ }
+
+ // Perform TRTRI
+ ::slate::TriangularMatrix L_slate(::slate::Uplo::Lower,
+ ::slate::Diag::NonUnit, matrix);
+ ::slate::trtri(L_slate);
+
+ // Convert Linv to TA
+ auto Linv = slate_to_array(matrix, world);
+ world.gop.fence(); // Make sure copy is done before return
+
+ // Return Linv or L + Linv (in that order)
+ if constexpr (Both) {
+ return std::make_tuple( *L_ptr, Linv );
+ } else {
+ return Linv;
+ }
+
+}
+
+
+
+
+template
+auto cholesky_solve(const AArray& A, const BArray& B) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ /*
+ if( world != B.world() ) {
+ TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+ }
+ */
+
+ // Convert to SLATE
+ auto A_slate = array_to_slate(A);
+ auto B_slate = array_to_slate(B);
+
+ // Solve linear system
+ world.gop.fence(); // stage SLATE execution
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, A_slate);
+ ::slate::posv( AH, B_slate );
+
+ // Convert solution to TA
+ return slate_to_array(B_slate, world);
+
+}
+
+
+
+template
+auto cholesky_lsolve(Op trans, const AArray& A, const BArray& B) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ /*
+ if( world != B.world() ) {
+ TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+ }
+ */
+
+ // Convert to SLATE
+ auto A_slate = array_to_slate(A);
+ auto B_slate = array_to_slate(B);
+ world.gop.fence(); // stage SLATE execution
+
+ // Factorize A
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, A_slate);
+ ::slate::potrf(AH);
+
+ // Solve linear system OP(L) * X = B
+ ::slate::TriangularMatrix L_slate(::slate::Uplo::Lower,
+ ::slate::Diag::NonUnit, A_slate);
+ if( trans == Op::Trans ) L_slate = ::slate::transpose(L_slate);
+ if( trans == Op::ConjTrans ) L_slate = ::slate::conj_transpose(L_slate);
+ ::slate::trsm( ::slate::Side::Left, 1.0, L_slate, B_slate );
+
+ // Zero out the upper triangle
+ zero_triangle(::slate::Uplo::Upper, A_slate);
+
+ // Convert solution and L to TA
+ auto L = slate_to_array(A_slate, world);
+ auto X = slate_to_array(B_slate, world);
+ return std::make_tuple(L, X);
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/heig.h b/src/TiledArray/math/linalg/slate/heig.h
new file mode 100644
index 0000000000..d0bbc483f0
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/heig.h
@@ -0,0 +1,115 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * cholesky.h
+ * Created: 24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+#include
+#include
+
+namespace TiledArray::math::linalg::slate {
+
+template
+auto heig(const Array& A, TiledRange evec_trange = TiledRange()) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+
+
+ // Convert to SLATE
+ auto matrix = array_to_slate(A);
+
+ // Allocate space for singular values
+ const auto M = matrix.m();
+ const auto N = matrix.n();
+ if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
+
+ std::vector<::blas::real_type> W(N);
+
+ // Perform Eigenvalue Decomposition
+ world.gop.fence(); // stage SLATE execution
+
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix);
+ auto Z = matrix.emptyLike(); Z.insertLocalTiles();
+ ::slate::eig(AH, W, Z);
+
+
+ // Convert eigenvectors back to TA
+ auto Z_ta = slate_to_array(Z, world);
+ if(evec_trange.rank() != 0 and evec_trange != A.trange()) {
+ Z_ta = retile(Z_ta, evec_trange);
+ }
+ world.gop.fence(); // Maintain lifetimes of SLATE data
+
+ return std::tuple(W, Z_ta);
+}
+
+
+template
+auto heig(const ArrayA& A, const ArrayB& B) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+
+ // Convert to SLATE
+ auto matrix_A = array_to_slate(A);
+ auto matrix_B = array_to_slate(B);
+
+ // Allocate space for singular values
+ const auto M = matrix_A.m();
+ const auto N = matrix_A.n();
+ if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
+ if (matrix_B.m() != matrix_B.n())
+ TA_EXCEPTION("Metric must be square for EVP");
+ if(matrix_B.m() != M)
+ TA_EXCEPTION("Matrix and Metric must be the same size");
+
+ std::vector<::blas::real_type> W(N);
+
+ // Perform Eigenvalue Decomposition
+ world.gop.fence(); // stage SLATE execution
+
+ ::slate::HermitianMatrix AH(::slate::Uplo::Lower, matrix_A);
+ ::slate::HermitianMatrix BH(::slate::Uplo::Lower, matrix_B);
+ auto Z = matrix_A.emptyLike(); Z.insertLocalTiles();
+ ::slate::eig(1, AH, BH, W, Z); // AX = BXE
+
+
+ // Convert eigenvectors back to TA
+ auto Z_ta = slate_to_array(Z, world);
+ world.gop.fence(); // Maintain lifetimes of SLATE data
+
+ return std::tuple(W, Z_ta);
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/lu.h b/src/TiledArray/math/linalg/slate/lu.h
new file mode 100644
index 0000000000..0c3e675d0c
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/lu.h
@@ -0,0 +1,91 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * cholesky.h
+ * Created: 24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+#include
+namespace TiledArray::math::linalg::slate {
+
+template
+auto lu_solve(const ArrayA& A, const ArrayB& B) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ /*
+ if( world != B.world() ) {
+ TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+ }
+ */
+
+ // Convert to SLATE
+ world.gop.fence(); // stage SLATE execution
+ auto A_slate = array_to_slate(A);
+ auto B_slate = array_to_slate(B);
+ world.gop.fence(); // stage SLATE execution
+
+ // Solve Linear System
+ ::slate::lu_solve( A_slate, B_slate );
+
+ // Convert solution to TA
+ auto X = slate_to_array(B_slate, world);
+ world.gop.fence(); // stage SLATE execution
+
+ return X;
+}
+
+template
+auto lu_inv(const Array& A) {
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+
+ // Convert to SLATE
+ world.gop.fence(); // stage SLATE execution
+ auto A_slate = array_to_slate(A);
+ world.gop.fence(); // stage SLATE execution
+
+ // Perform LU Factorization
+ ::slate::Pivots pivots;
+ ::slate::lu_factor(A_slate, pivots);
+
+ // Invert from factors
+ ::slate::lu_inverse_using_factor(A_slate, pivots);
+
+ // Convert inverse to TA
+ auto X = slate_to_array(A_slate, world);
+ world.gop.fence(); // stage SLATE execution
+
+ return X;
+}
+
+} // namespace TiledArray::math::linalg::scalapack
+
+#endif // TILEDARRAY_HAS_SCALAPACK
+#endif // TILEDARRAY_MATH_LINALG_SCALAPACK_LU_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/qr.h b/src/TiledArray/math/linalg/slate/qr.h
new file mode 100644
index 0000000000..4e8096c3fc
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/qr.h
@@ -0,0 +1,100 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * qr.h
+ * Created: 2 August, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+#include
+
+namespace TiledArray::math::linalg::slate {
+
+template
+auto householder_qr( const Array& V, TiledRange q_trange = TiledRange(),
+ TiledRange r_trange = TiledRange() ) {
+
+ if(q_trange.rank() == 0) {
+ q_trange = V.trange();
+ }
+
+ if(r_trange.rank() == 0) {
+ auto col_tiling = V.trange().dim(1);
+ r_trange = TiledRange( {col_tiling, col_tiling} );
+ }
+
+ // SLATE does not yet have ORGQR/UNGQR
+ // https://github.com/icl-utk-edu/slate/issues/80
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = V.world();
+
+ // Convert to SLATE
+ auto matrix = array_to_slate(V);
+
+ // Perform GETRF
+ ::slate::TriangularFactors T;
+ ::slate::geqrf(matrix, T);
+
+ // Form Q
+ auto Q = matrix.emptyLike(); Q.insertLocalTiles();
+ ::slate::set(0.0, 1.0, Q);
+ ::slate::unmqr(::slate::Side::Left, ::slate::Op::NoTrans, matrix, T, Q);
+
+ auto Q_ta = slate_to_array(Q, world);
+
+ if constexpr (QOnly) {
+ return Q_ta;
+ } else {
+ SlateFunctors r_functors( r_trange, V.pmap() );
+ const auto N = V.trange().dim(1).extent();
+ auto comm = world.mpi.comm().Get_mpi_comm();
+ auto R = r_functors.make_matrix<::slate::Matrix>(N,N,comm);
+ R.insertLocalTiles();
+ ::slate::set(0.0, 0.0, R);
+
+ // Triangular views of target operand matrices
+ ::slate::TriangularMatrix
+ R_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, R);
+ ::slate::TriangularMatrix
+ A_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, matrix);
+
+ // Copy upper triangle of QR factors into R
+ ::slate::copy(A_tri, R_tri);
+
+ // Convert to TA
+ auto R_ta = slate_to_array(R, world);
+ return std::tuple(Q_ta, R_ta);
+ }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/svd.h b/src/TiledArray/math/linalg/slate/svd.h
new file mode 100644
index 0000000000..652dc67e17
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/svd.h
@@ -0,0 +1,102 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * cholesky.h
+ * Created: 24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+#include
+#include
+
+namespace TiledArray::math::linalg::slate {
+
+template
+auto svd(const Array& A, TA::TiledRange u_trange, TA::TiledRange vt_trange) {
+
+ constexpr bool need_uv = (Vectors == SVD::AllVectors);
+ constexpr bool need_u = (Vectors == SVD::LeftVectors) or need_uv;
+ constexpr bool need_vt = (Vectors == SVD::RightVectors) or need_uv;
+ constexpr bool vals_only = not need_u and not need_vt;
+
+ using element_type = typename std::remove_cv_t::element_type;
+ auto& world = A.world();
+ auto comm = world.mpi.comm().Get_mpi_comm();
+
+ // Convert to SLATE
+ auto matrix = array_to_slate(A);
+ using slate_matrix_t = std::decay_t;
+
+ // Allocate space for singular values
+ const auto M = matrix.m();
+ const auto N = matrix.n();
+ const auto SVD_SIZE = std::min(M,N);
+ std::vector<::blas::real_type> S(SVD_SIZE);
+
+ world.gop.fence(); // stage SLATE execution
+
+ // Generate functors
+ SlateFunctors u_functors(u_trange, A.pmap());
+ SlateFunctors vt_functors(vt_trange, A.pmap());
+
+ slate_matrix_t U, VT;
+
+ // Allocate if required
+ if(need_u) {
+ U = u_functors.make_matrix(M, SVD_SIZE, comm);
+ U.insertLocalTiles();
+ }
+ if(need_vt) {
+ VT = vt_functors.make_matrix(SVD_SIZE, N, comm);
+ VT.insertLocalTiles();
+ }
+
+ // Do SVD
+ // If U/VT are default state, they will not be used
+ ::slate::svd(matrix, S, U, VT);
+
+ Array U_ta, VT_ta;
+ if(need_u) { U_ta = slate_to_array(U, world); }
+ if(need_vt) { VT_ta = slate_to_array(VT, world); }
+
+ if constexpr (need_uv) {
+ return std::tuple(S, U_ta, VT_ta);
+ } else if constexpr (need_u) {
+ return std::tuple(S, U_ta);
+ } else if constexpr (need_vt) {
+ return std::tuple(S, VT_ta);
+ } else {
+ return S;
+ }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/util.h b/src/TiledArray/math/linalg/slate/util.h
new file mode 100644
index 0000000000..ca65fe3dce
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/util.h
@@ -0,0 +1,91 @@
+/*
+ * This file is a part of TiledArray.
+ * Copyright (C) 2023 Virginia Tech
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * David Williams-Young
+ * Applied Mathematics and Computational Research Division,
+ * Lawrence Berkeley National Laboratory
+ *
+ * util.h
+ * Created: 25 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
+
+#include
+#if TILEDARRAY_HAS_SLATE
+
+#include
+namespace TiledArray::math::linalg::slate {
+
+template
+void zero_triangle(::slate::Uplo tri, SlateMatrixType& A, bool zero_diag = false ) {
+
+ const auto nt = A.nt(); // Number of column tiles
+ const auto mt = A.mt(); // Number of row tiles
+
+ auto zero_block = [&](auto it, auto jt) {
+ if( A.tileIsLocal(it,jt) ) {
+ auto tile = A(it,jt);
+ const auto stride = tile.stride();
+ const auto mb = tile.mb();
+ const auto nb = tile.nb();
+ auto* data = tile.data();
+ for(int j = 0; j < nb; ++j)
+ for(int i = 0; i < mb; ++i) {
+ data[i + j*stride] = 0.0;
+ }
+ }
+ };
+
+ auto zero_tri = [&](auto it, auto jt) {
+ if( A.tileIsLocal(it,jt) ) {
+ auto tile = A(it,jt);
+ const auto stride = tile.stride();
+ const auto mb = tile.mb();
+ const auto nb = tile.nb();
+ auto* data = tile.data();
+ if( tri == ::slate::Uplo::Lower ) {
+ for(int j = 0; j < nb; ++j)
+ for(int i = j+1; i < mb; ++i) {
+ data[i + j*stride] = 0.0;
+ }
+ } else {
+ for(int j = 0; j < nb; ++j)
+ for(int i = 0; i < j; ++i) {
+ data[i + j*stride] = 0.0;
+ }
+ }
+ }
+ };
+
+ // TODO: Should be done in parallel
+ for(auto jt = 0; jt < nt; ++jt) {
+ zero_tri(jt, jt); // Handle diagonal block
+ for(auto it = jt + 1; it < mt; ++it) {
+ if( tri == ::slate::Uplo::Lower ) zero_block(it,jt);
+ else zero_block(jt,it);
+ }
+ }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
diff --git a/src/TiledArray/pmap/cyclic_pmap.h b/src/TiledArray/pmap/cyclic_pmap.h
index 6d2df0088b..43357e8f30 100644
--- a/src/TiledArray/pmap/cyclic_pmap.h
+++ b/src/TiledArray/pmap/cyclic_pmap.h
@@ -31,6 +31,11 @@
namespace TiledArray {
namespace detail {
+enum class CyclicPmapOrder {
+ RowMajor,
+ ColMajor
+};
+
/// Maps cyclically a sequence of indices onto a 2-d matrix of processes
/// Consider a sequence of indices \f$ \{ k | k \in [0,N) \} \f$,
@@ -45,6 +50,7 @@ namespace detail {
/// col} \} \f$
///
/// \note This class is used to map tile indices to processes.
+template
class CyclicPmap : public Pmap {
protected:
// Import Pmap protected variables
@@ -63,7 +69,12 @@ class CyclicPmap : public Pmap {
size_type local_cols_ =
0; ///< The number of columns that belong to this rank
+ inline size_type coordinate_to_index(size_type i, size_type j) const noexcept {
+ if constexpr (is_row_major_v) return i*cols_ + j;
+ else return i + j*rows_;
+ }
public:
+ static constexpr bool is_row_major_v = (Order == CyclicPmapOrder::RowMajor);
typedef Pmap::size_type size_type; ///< Size type
/// Construct process map
@@ -96,8 +107,13 @@ class CyclicPmap : public Pmap {
// Compute local size_, if have any
if (rank_ < (proc_rows_ * proc_cols_)) {
// Compute rank coordinates
- rank_row_ = rank_ / proc_cols_;
- rank_col_ = rank_ % proc_cols_;
+ if constexpr (is_row_major_v) {
+ rank_row_ = rank_ / proc_cols_;
+ rank_col_ = rank_ % proc_cols_;
+ } else {
+ rank_row_ = rank_ % proc_rows_;
+ rank_col_ = rank_ / proc_rows_;
+ }
local_rows_ =
(rows_ / proc_rows_) + ((rows_ % proc_rows_) > rank_row_ ? 1ul : 0ul);
@@ -127,13 +143,14 @@ class CyclicPmap : public Pmap {
virtual size_type owner(const size_type tile) const {
TA_ASSERT(tile < size_);
// Compute tile coordinate in tile grid
- const size_type tile_row = tile / cols_;
- const size_type tile_col = tile % cols_;
+ const size_type tile_row = is_row_major_v ? tile / cols_ : tile % rows_;
+ const size_type tile_col = is_row_major_v ? tile % cols_ : tile / rows_;
// Compute process coordinate of tile in the process grid
const size_type proc_row = tile_row % proc_rows_;
const size_type proc_col = tile_col % proc_cols_;
// Compute the process that owns tile
- const size_type proc = proc_row * proc_cols_ + proc_col;
+ const size_type proc = is_row_major_v ?
+ proc_row * proc_cols_ + proc_col : proc_row + proc_col * proc_rows_;
TA_ASSERT(proc < procs_);
@@ -149,48 +166,59 @@ class CyclicPmap : public Pmap {
}
private:
+#if 0
virtual void advance(size_type& value, bool increment) const {
- if (increment) {
- auto row = value / cols_;
- const auto row_end = (row + 1) * cols_;
- value += proc_cols_;
- if (value >= row_end) { // if past the end of row ...
- row += proc_rows_;
- if (row < rows_) { // still have tiles
- value = row * cols_ + rank_col_; // first tile in this row
- } else // done
- value = size_;
- }
- } else { // decrement
- auto row = value / cols_;
- const auto row_begin = row * cols_;
- if (value < proc_cols_) { // protect against unsigned wraparound
- return;
- }
- value -= proc_cols_;
- if (value < row_begin) { // if past the beginning of row ...
- if (row < proc_rows_) // protect against unsigned wraparound
+ if constexpr (is_row_major_v) {
+ if (increment) {
+ auto row = value / cols_;
+ const auto row_end = (row + 1) * cols_;
+ value += proc_cols_;
+ if (value >= row_end) { // if past the end of row ...
+ row += proc_rows_;
+ if (row < rows_) { // still have tiles
+ value = coordinate_to_index(row, rank_col_); // first tile in this row
+ } else // done
+ value = size_;
+ }
+ } else { // decrement
+ auto row = value / cols_;
+ const auto row_begin = row * cols_;
+ if (value < proc_cols_) { // protect against unsigned wraparound
return;
- row -= proc_rows_;
- value = row * cols_ + rank_col_ +
- (local_cols_ - 1) * proc_cols_; // last tile in this row
+ }
+ value -= proc_cols_;
+ if (value < row_begin) { // if past the beginning of row ...
+ if (row < proc_rows_) // protect against unsigned wraparound
+ return;
+ row -= proc_rows_;
+ value = coordinate_to_index(row, rank_col_) +
+ (local_cols_ - 1) * proc_cols_; // last tile in this row
+ }
}
+ } else {
+ Pmap::advance(value, increment);
}
}
+#endif
public:
+
+#if 0
virtual const_iterator begin() const {
+ const auto proc_index = coordinate_to_index(rank_row_, rank_col_);
return this->local_size_ > 0
- ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_,
- rank_row_ * cols_ + rank_col_, false, true)
+ ? Iterator(*this, proc_index, this->size_, proc_index,
+ false, is_row_major_v)
: end(); // make end() if empty
}
virtual const_iterator end() const {
+ const auto proc_index = coordinate_to_index(rank_row_, rank_col_);
return this->local_size_ > 0
- ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_,
- this->size_, false, true)
- : Iterator(*this, 0, this->size_, this->size_, false, true);
+ ? Iterator(*this, proc_index, this->size_,
+ this->size_, false, is_row_major_v)
+ : Iterator(*this, 0, this->size_, this->size_, false, is_row_major_v);
}
+#endif
}; // class CyclicPmap
diff --git a/src/TiledArray/pmap/pmap.h b/src/TiledArray/pmap/pmap.h
index 0682901bab..528321f3f9 100644
--- a/src/TiledArray/pmap/pmap.h
+++ b/src/TiledArray/pmap/pmap.h
@@ -150,7 +150,7 @@ class Pmap {
/// \name Iteration
/// @{
- private:
+ protected:
/// customizes how to iterate over local indices
/// overload this and construct Iterator with `use_pmap_advance=true`
diff --git a/src/TiledArray/pmap/user_pmap.h b/src/TiledArray/pmap/user_pmap.h
index 50966f5744..0a74b660d7 100644
--- a/src/TiledArray/pmap/user_pmap.h
+++ b/src/TiledArray/pmap/user_pmap.h
@@ -87,12 +87,12 @@ class UserPmap : public Pmap {
virtual bool known_local_size() const { return known_local_size_; }
- virtual const_iterator begin() const {
- return Iterator(*this, 0, this->size_, 0, false);
- }
- virtual const_iterator end() const {
- return Iterator(*this, 0, this->size_, this->size_, false);
- }
+ //virtual const_iterator begin() const {
+ // return Iterator(*this, 0, this->size_, 0, false);
+ //}
+ //virtual const_iterator end() const {
+ // return Iterator(*this, 0, this->size_, this->size_, false);
+ //}
private:
bool known_local_size_ = false;
diff --git a/src/TiledArray/proc_grid.h b/src/TiledArray/proc_grid.h
index a401e0ac1e..4594553700 100644
--- a/src/TiledArray/proc_grid.h
+++ b/src/TiledArray/proc_grid.h
@@ -515,7 +515,7 @@ class ProcGrid {
std::shared_ptr make_pmap() const {
TA_ASSERT(world_);
- return std::make_shared(*world_, rows_, cols_, proc_rows_,
+ return std::make_shared>(*world_, rows_, cols_, proc_rows_,
proc_cols_);
}
@@ -528,7 +528,7 @@ class ProcGrid {
std::shared_ptr make_col_phase_pmap(const size_type rows) const {
TA_ASSERT(world_);
- return std::make_shared(*world_, rows, cols_, proc_rows_,
+ return std::make_shared>(*world_, rows, cols_, proc_rows_,
proc_cols_);
}
@@ -541,7 +541,7 @@ class ProcGrid {
std::shared_ptr make_row_phase_pmap(const size_type cols) const {
TA_ASSERT(world_);
- return std::make_shared(*world_, rows_, cols, proc_rows_,
+ return std::make_shared>(*world_, rows_, cols, proc_rows_,
proc_cols_);
}
}; // class Grid
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 217c522018..6360082c6c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -98,9 +98,16 @@ set(ta_test_src_files ta_test.cpp
# t_tot_tot_contract_.cpp
# tot_tot_tot_contract_.cpp
einsum.cpp
- linalg.cpp
+ linalg/linalg.cpp
+ linalg/non_dist.cpp
cp.cpp
)
+if(ENABLE_SCALAPACK)
+ list(APPEND ta_test_src_files linalg/scalapack.cpp)
+endif()
+if(ENABLE_SLATE)
+ list(APPEND ta_test_src_files linalg/slate.cpp)
+endif()
if(CUDA_FOUND)
list(APPEND ta_test_src_files librett.cpp expressions_cuda_um.cpp tensor_um.cpp)
diff --git a/tests/cyclic_pmap.cpp b/tests/cyclic_pmap.cpp
index 509b9f92bf..aa387150a2 100644
--- a/tests/cyclic_pmap.cpp
+++ b/tests/cyclic_pmap.cpp
@@ -32,7 +32,21 @@ struct CyclicPmapFixture {
BOOST_FIXTURE_TEST_SUITE(cyclic_pmap_suite, CyclicPmapFixture)
-BOOST_AUTO_TEST_CASE(constructor) {
+template
+struct cyclic_pmap_order_wrapper {
+ static constexpr auto value = Order;
+};
+
+
+using cyclic_pmap_orders = boost::mpl::list<
+ cyclic_pmap_order_wrapper,
+ cyclic_pmap_order_wrapper
+>;
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(constructor, Order, cyclic_pmap_orders) {
+
+ using pmap_type = TiledArray::detail::CyclicPmap;
+
for (ProcessID x = 1ul; x <= GlobalFixture::world->size(); ++x) {
for (ProcessID y = 1ul; y <= GlobalFixture::world->size(); ++y) {
// Compute the limits for process rows
@@ -48,10 +62,9 @@ BOOST_AUTO_TEST_CASE(constructor) {
max_proc_rows));
const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
- BOOST_REQUIRE_NO_THROW(TiledArray::detail::CyclicPmap pmap(
- *GlobalFixture::world, x, y, p_rows, p_cols));
- TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
- p_cols);
+ BOOST_REQUIRE_NO_THROW( pmap_type pmap( *GlobalFixture::world, x, y,
+ p_rows, p_cols));
+ pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
BOOST_CHECK_EQUAL(pmap.rank(), GlobalFixture::world->rank());
BOOST_CHECK_EQUAL(pmap.procs(), GlobalFixture::world->size());
BOOST_CHECK_EQUAL(pmap.size(), x * y);
@@ -60,32 +73,27 @@ BOOST_AUTO_TEST_CASE(constructor) {
ProcessID size = GlobalFixture::world->size();
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
- 0ul, 10ul, 1, 1),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 0ul, 10ul, 1, 1),
TiledArray::Exception);
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
- 10ul, 0ul, 1, 1),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 0ul, 1, 1),
TiledArray::Exception);
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
- 10ul, 10ul, 0, 1),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 0, 1),
TiledArray::Exception);
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
- 10ul, 10ul, 1, 0),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, 0),
TiledArray::Exception);
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
- *GlobalFixture::world, 10ul, 10ul, size * 2, 1),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size * 2, 1),
TiledArray::Exception);
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
- *GlobalFixture::world, 10ul, 10ul, 1, size * 2),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, size * 2),
TiledArray::Exception);
if (size > 1) {
- BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
- *GlobalFixture::world, 10ul, 10ul, size, size),
+ BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size, size),
TiledArray::Exception);
}
}
-BOOST_AUTO_TEST_CASE(owner) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(owner, Order, cyclic_pmap_orders) {
+
+ using pmap_type = TiledArray::detail::CyclicPmap;
const std::size_t rank = GlobalFixture::world->rank();
const std::size_t size = GlobalFixture::world->size();
@@ -108,8 +116,7 @@ BOOST_AUTO_TEST_CASE(owner) {
const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
const std::size_t tiles = x * y;
- TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
- p_cols);
+ pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
for (std::size_t tile = 0; tile < tiles; ++tile) {
std::fill_n(p_owner, size, 0);
@@ -121,6 +128,18 @@ BOOST_AUTO_TEST_CASE(owner) {
// Make sure everyone agrees on who owns what.
for (std::size_t p = 0ul; p < size; ++p)
BOOST_CHECK_EQUAL(p_owner[p], p_owner[rank]);
+
+ size_t true_owner;
+ if(Order::value == TiledArray::detail::CyclicPmapOrder::RowMajor) {
+ auto proc_row = (tile / pmap.ncols()) % pmap.nrows_proc();
+ auto proc_col = (tile % pmap.ncols()) % pmap.ncols_proc();
+ true_owner = proc_row * pmap.ncols_proc() + proc_col;
+ } else {
+ auto proc_row = (tile % pmap.nrows()) % pmap.nrows_proc();
+ auto proc_col = (tile / pmap.nrows()) % pmap.ncols_proc();
+ true_owner = proc_row + proc_col * pmap.nrows_proc();
+ }
+ BOOST_CHECK_EQUAL(p_owner[rank], true_owner);
}
}
}
@@ -128,7 +147,8 @@ BOOST_AUTO_TEST_CASE(owner) {
delete[] p_owner;
}
-BOOST_AUTO_TEST_CASE(local_size) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(local_size, Order, cyclic_pmap_orders) {
+ using pmap_type = TiledArray::detail::CyclicPmap;
for (std::size_t x = 1ul; x < 10ul; ++x) {
for (std::size_t y = 1ul; y < 10ul; ++y) {
// Compute the limits for process rows
@@ -145,8 +165,7 @@ BOOST_AUTO_TEST_CASE(local_size) {
const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
const std::size_t tiles = x * y;
- TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
- p_cols);
+ pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
std::size_t total_size = pmap.local_size();
GlobalFixture::world->gop.sum(total_size);
@@ -159,7 +178,8 @@ BOOST_AUTO_TEST_CASE(local_size) {
}
}
-BOOST_AUTO_TEST_CASE(local_group) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(local_group, Order, cyclic_pmap_orders) {
+ using pmap_type = TiledArray::detail::CyclicPmap;
ProcessID tile_owners[100];
for (std::size_t x = 1ul; x < 10ul; ++x) {
@@ -178,18 +198,15 @@ BOOST_AUTO_TEST_CASE(local_group) {
const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
const std::size_t tiles = x * y;
- TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
- p_cols);
+ pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
// Check that all local elements map to this rank
- for (detail::CyclicPmap::const_iterator it = pmap.begin();
- it != pmap.end(); ++it) {
+ for (auto it = pmap.begin(); it != pmap.end(); ++it) {
BOOST_CHECK_EQUAL(pmap.owner(*it), GlobalFixture::world->rank());
}
std::fill_n(tile_owners, tiles, 0);
- for (detail::CyclicPmap::const_iterator it = pmap.begin();
- it != pmap.end(); ++it) {
+ for (auto it = pmap.begin(); it != pmap.end(); ++it) {
tile_owners[*it] += GlobalFixture::world->rank();
}
diff --git a/tests/linalg.cpp b/tests/linalg.cpp
deleted file mode 100644
index 5c84d0b5e4..0000000000
--- a/tests/linalg.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-#include
-#include
-#include "TiledArray/config.h"
-//#include "range_fixture.h"
-#include "unit_test_config.h"
-
-#include "TiledArray/math/linalg/non-distributed/cholesky.h"
-#include "TiledArray/math/linalg/non-distributed/heig.h"
-#include "TiledArray/math/linalg/non-distributed/lu.h"
-#include "TiledArray/math/linalg/non-distributed/svd.h"
-
-#include "TiledArray/math/linalg/cholesky.h"
-#include "TiledArray/math/linalg/heig.h"
-#include "TiledArray/math/linalg/lu.h"
-#include "TiledArray/math/linalg/svd.h"
-
-namespace TA = TiledArray;
-namespace non_dist = TA::math::linalg::non_distributed;
-
-#if TILEDARRAY_HAS_SCALAPACK
-namespace scalapack = TA::math::linalg::scalapack;
-#include "TiledArray/math/linalg/scalapack/all.h"
-#define TILEDARRAY_SCALAPACK_TEST(F, E) \
- GlobalFixture::world->gop.fence(); \
- compare("TiledArray::scalapack", non_dist::F, scalapack::F, E); \
- GlobalFixture::world->gop.fence(); \
- compare("TiledArray", non_dist::F, TiledArray::F, E);
-#else
-#define TILEDARRAY_SCALAPACK_TEST(...)
-#endif
-
-#if TILEDARRAY_HAS_TTG
-#include "TiledArray/math/linalg/ttg/cholesky.h"
-#define TILEDARRAY_TTG_TEST(F, E) \
- GlobalFixture::world->gop.fence(); \
- compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E);
-#else
-#define TILEDARRAY_TTG_TEST(...)
-#endif
-
-struct ReferenceFixture {
- size_t N;
- std::vector htoeplitz_vector;
- std::vector exact_evals;
-
- inline double matrix_element_generator(int64_t i, int64_t j) {
-#if 0
- // Generates a Hankel matrix: absurd condition number
- return i+j;
-#else
- // Generates a Circulant matrix: good condition number
- return htoeplitz_vector[std::abs(i - j)];
-#endif
- }
-
- template
- inline double make_ta_reference(Tile& t, TA::Range const& range) {
- t = Tile(range, 0.0);
- auto lo = range.lobound_data();
- auto up = range.upbound_data();
- for (auto m = lo[0]; m < up[0]; ++m) {
- for (auto n = lo[1]; n < up[1]; ++n) {
- t(m, n) = matrix_element_generator(m, n);
- }
- }
-
- return norm(t);
- };
-
- ReferenceFixture(int64_t N = 1000)
- : N(N), htoeplitz_vector(N), exact_evals(N) {
- // Generate an hermitian Circulant vector
- std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0);
- htoeplitz_vector[0] = 100;
- std::default_random_engine gen(0);
- std::uniform_real_distribution<> dist(0., 1.);
- for (int64_t i = 1; i <= (N / 2); ++i) {
- double val = dist(gen);
- htoeplitz_vector[i] = val;
- htoeplitz_vector[N - i] = val;
- }
-
- // Compute exact eigenvalues
- const double ff = 2. * M_PI / N;
- for (int64_t j = 0; j < N; ++j) {
- double val = htoeplitz_vector[0];
- ;
- for (int64_t k = 1; k < N; ++k)
- val += htoeplitz_vector[N - k] * std::cos(ff * j * k);
- exact_evals[j] = val;
- }
-
- std::sort(exact_evals.begin(), exact_evals.end());
- }
-};
-
-struct LinearAlgebraFixture : ReferenceFixture {
-#if TILEDARRAY_HAS_SCALAPACK
-
- blacspp::Grid grid;
- scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine?
-
- LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128)
- : ReferenceFixture(N),
- grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe?
- ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) {
- for (size_t i = 0; i < N; ++i) {
- for (size_t j = 0; j < N; ++j) {
- if (ref_matrix.dist().i_own(i, j)) {
- auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j);
- ref_matrix.local_mat()(i_local, j_local) =
- matrix_element_generator(i, j);
- }
- }
- }
- }
-#endif
-
- template
- static void compare(const char* context, const A& non_dist, const A& result,
- double e) {
- // clang-format off
- BOOST_TEST_CONTEXT(context)
- ;
- // clang-format on
- auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get();
- BOOST_CHECK_SMALL(diff_with_non_dist, e);
- }
-
- template
- static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f,
- std::integer_sequence) {
- auto l = {(f(std::get(t1), std::get(t2)), 0)...};
- }
-
- template
- static void for_each_pair_of_tuples(std::tuple const& t1,
- std::tuple const& t2, F f) {
- for_each_pair_of_tuples_impl(
- t1, t2, f, std::make_integer_sequence());
- }
-
- template
- static void compare(const char* context, const std::tuple& non_dist,
- const std::tuple& result, double e) {
- for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) {
- compare(context, arg1, arg2, e);
- });
- }
-};
-
-TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) {
- TA_ASSERT(TA_NBs.size() > 0);
-
- std::default_random_engine gen(0);
- std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1);
- auto rand_indx = [&]() { return dist(gen); };
- auto rand_nb = [&]() { return TA_NBs[rand_indx()]; };
-
- std::vector t_boundaries = {0};
- auto TA_NB = rand_nb();
- while (t_boundaries.back() + TA_NB < N) {
- t_boundaries.emplace_back(t_boundaries.back() + TA_NB);
- TA_NB = rand_nb();
- }
- t_boundaries.emplace_back(N);
-
- std::vector ranges(
- 2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end()));
-
- return TA::TiledRange(ranges.begin(), ranges.end());
-};
-
-BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture)
-
-#if TILEDARRAY_HAS_SCALAPACK
-
-BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB)});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_ta =
- scalapack::block_cyclic_to_array>(ref_matrix, trange);
- GlobalFixture::world->gop.fence();
-
- auto norm_diff =
- (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB / 2)});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_ta =
- scalapack::block_cyclic_to_array>(ref_matrix, trange);
- GlobalFixture::world->gop.fence();
-
- auto norm_diff =
- (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB)});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
- GlobalFixture::world->gop.fence();
-
- double local_norm_diff =
- (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
- local_norm_diff *= local_norm_diff;
-
- double norm_diff;
- MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
- MPI_COMM_WORLD);
-
- norm_diff = std::sqrt(norm_diff);
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- [[maybe_unused]] auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_ta =
- scalapack::block_cyclic_to_array>(ref_matrix, trange);
- GlobalFixture::world->gop.fence();
-
- auto norm_diff =
- (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
- GlobalFixture::world->gop.fence();
-
- double local_norm_diff =
- (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
- local_norm_diff *= local_norm_diff;
-
- double norm_diff;
- MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
- MPI_COMM_WORLD);
-
- norm_diff = std::sqrt(norm_diff);
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB)});
-
- // test with TA and btas tile
- using typelist_t =
- std::tuple, btas::Tensor>;
- typelist_t typevals;
-
- auto test = [&](const auto& typeval_ref) {
- using Tile = std::decay_t;
- using Array = TA::DistArray;
-
- auto ref_ta = TA::make_array(
- *GlobalFixture::world, trange,
- [this](Tile& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_ta = scalapack::block_cyclic_to_array(ref_matrix, trange);
- GlobalFixture::world->gop.fence();
-
- auto norm_diff =
- (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
- };
-
- test(std::get<0>(typevals));
- test(std::get<1>(typevals));
-};
-
-BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB)});
-
- // test with TA and btas tile
- using typelist_t =
- std::tuple, btas::Tensor>;
- typelist_t typevals;
-
- auto test = [&](const auto& typeval_ref) {
- using Tile = std::decay_t;
- using Array = TA::DistArray;
-
- auto ref_ta = TA::make_array(
- *GlobalFixture::world, trange,
- [this](Tile& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
- GlobalFixture::world->gop.fence();
-
- double local_norm_diff =
- (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
- local_norm_diff *= local_norm_diff;
-
- double norm_diff;
- MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
- MPI_COMM_WORLD);
-
- norm_diff = std::sqrt(norm_diff);
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
- };
-
- test(std::get<0>(typevals));
- test(std::get<1>(typevals));
-};
-
-BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
- GlobalFixture::world->gop.fence();
-
- auto [M, N] = ref_matrix.dims();
- BOOST_REQUIRE_EQUAL(M, N);
-
- auto NB = ref_matrix.dist().nb();
-
- auto trange = gen_trange(N, {static_cast(NB)});
-
- const TA::TArray ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- GlobalFixture::world->gop.fence();
- auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
- GlobalFixture::world->gop.fence();
-
- double local_norm_diff =
- (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
- local_norm_diff *= local_norm_diff;
-
- double norm_diff;
- MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
- MPI_COMM_WORLD);
-
- norm_diff = std::sqrt(norm_diff);
-
- BOOST_CHECK_SMALL(norm_diff, std::numeric_limits::epsilon());
-
- GlobalFixture::world->gop.fence();
-};
-
-#endif // TILEDARRAY_HAS_SCALAPACK
-
-BOOST_AUTO_TEST_CASE(heig_same_tiling) {
- GlobalFixture::world->gop.fence();
-
- auto trange = gen_trange(N, {128ul});
-
- const auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- auto [evals, evecs] = non_dist::heig(ref_ta);
- auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta);
- // auto evals = heig( ref_ta );
-
- BOOST_CHECK(evecs.trange() == ref_ta.trange());
-
- // check eigenvectors against non_dist only, for now ...
- decltype(evecs) evecs_error;
- evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j");
- // TODO need to fix phases of the eigenvectors to be able to compare ...
- // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
- // N * N * std::numeric_limits::epsilon());
-
- // Check eigenvalue correctness
- double tol = N * N * std::numeric_limits::epsilon();
- for (int64_t i = 0; i < N; ++i) {
- BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
- BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol);
- }
-
- GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(heig_diff_tiling) {
- GlobalFixture::world->gop.fence();
- auto trange = gen_trange(N, {128ul});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- auto new_trange = gen_trange(N, {64ul});
- auto [evals, evecs] = non_dist::heig(ref_ta, new_trange);
- auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta, new_trange);
-
- BOOST_CHECK(evecs.trange() == new_trange);
-
- // check eigenvectors against non_dist only, for now ...
- decltype(evecs) evecs_error;
- evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j");
- // TODO need to fix phases of the eigenvectors to be able to compare ...
- // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
- // N * N * std::numeric_limits::epsilon());
-
- // Check eigenvalue correctness
- double tol = N * N * std::numeric_limits::epsilon();
- for (int64_t i = 0; i < N; ++i) {
- BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
- BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol);
- }
-
- GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(heig_generalized) {
- GlobalFixture::world->gop.fence();
-
- auto trange = gen_trange(N, {128ul});
-
- auto ref_ta = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- auto dense_iden = TA::make_array>(
- *GlobalFixture::world, trange,
- [](TA::Tensor& t, TA::Range const& range) -> double {
- t = TA::Tensor(range, 0.0);
- auto lo = range.lobound_data();
- auto up = range.upbound_data();
- for (auto m = lo[0]; m < up[0]; ++m)
- for (auto n = lo[1]; n < up[1]; ++n)
- if (m == n) t(m, n) = 1.;
-
- return t.norm();
- });
-
- GlobalFixture::world->gop.fence();
- auto [evals, evecs] = non_dist::heig(ref_ta, dense_iden);
- // auto evals = heig( ref_ta );
-
- BOOST_CHECK(evecs.trange() == ref_ta.trange());
-
- // TODO: Check validity of eigenvectors, not crucial for the time being
-
- // Check eigenvalue correctness
- double tol = N * N * std::numeric_limits::epsilon();
- for (int64_t i = 0; i < N; ++i)
- BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
-
- GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(cholesky) {
- GlobalFixture::world->gop.fence();
-
- auto trange = gen_trange(N, {128ul});
-
- auto A = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
-
- auto L = non_dist::cholesky(A);
-
- BOOST_CHECK(L.trange() == A.trange());
-
- decltype(A) A_minus_LLt;
- A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj();
-
- const double epsilon = N * N * std::numeric_limits::epsilon();
-
- BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon);
-
- // check against NON_DIST also
- auto L_ref = non_dist::cholesky(A);
- decltype(L) L_diff;
- L_diff("i,j") = L("i,j") - L_ref("i,j");
-
- BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), epsilon);
-
- TILEDARRAY_SCALAPACK_TEST(cholesky(A), epsilon);
-
- TILEDARRAY_TTG_TEST(cholesky(A), epsilon);
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_linv) {
- GlobalFixture::world->gop.fence();
-
- auto trange = gen_trange(N, {128ul});
-
- auto A = TA::make_array>(
- *GlobalFixture::world, trange,
- [this](TA::Tensor& t, TA::Range const& range) -> double {
- return this->make_ta_reference(t, range);
- });
- decltype(A) Acopy = A.clone();
-
- auto Linv = TA::cholesky_linv(A);
-
- BOOST_CHECK(Linv.trange() == A.trange());
-
- TA::TArray tmp(*GlobalFixture::world, trange);
- tmp("i,j") = Linv("i,k") * A("k,j");
- A("i,j") = tmp("i,k") * Linv("j,k");
-
- TA::foreach_inplace(A, [](TA::Tensor& tile) {
- auto range = tile.range();
- auto lo = range.lobound_data();
- auto up = range.upbound_data();
- for (auto m = lo[0]; m < up[0]; ++m)
- for (auto n = lo[1]; n < up[1]; ++n)
- if (m == n) {
- tile(m, n) -= 1.;
- }
- });
-
- double epsilon = N * N * std::numeric_limits::epsilon();
- double norm = A("i,j").norm().get();
-
- BOOST_CHECK_SMALL(norm, epsilon);
-
- TILEDARRAY_SCALAPACK_TEST(cholesky_linv