diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index fd9c49aefa..b6fe18c009 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,6 +7,7 @@ default:
 
 variables:
   MAD_NUM_THREADS : 2
+  OMP_NUM_THREADS : 1
   TA_TARGETS : "tiledarray examples-tiledarray ta_test check-tiledarray"
   # Debug builds with ScaLAPACK=ON need increased TA_UT_CTEST_TIMEOUT
   TA_CONFIG : >
@@ -18,6 +19,7 @@ variables:
     ${BLA_VENDOR}
     ${BLA_THREADS}
     ${ENABLE_SCALAPACK}
+    ${ENABLE_SLATE}
 
 before_script:
   # NB: if CMAKE_BUILD_PARALLEL_LEVEL is not set (i.e. using shared runner), use 1 to ensure we have enough memory
@@ -35,6 +37,7 @@ ubuntu:
   variables:
     TA_PYTHON : "TA_PYTHON=ON"
     ENABLE_SCALAPACK : "ENABLE_SCALAPACK=OFF"
+    ENABLE_SLATE     : "ENABLE_SLATE=OFF"
   script:
     - ./ci/.build-project
         --build ./build
@@ -69,6 +72,7 @@ ubuntu:
         CXX: [ g++, clang++-13 ]
         BUILD_TYPE : [ "Release", "Debug" ]
         ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ]
+        ENABLE_SLATE : [ "ENABLE_SLATE=ON", "ENABLE_SLATE=OFF" ]
         RUNNER_TAGS: [ linux ]
       - IMAGE : [ "ubuntu:22.04", "ubuntu:20.04" ]
         CXX: [ g++ ]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a50f0a789f..cd0c2759fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,6 +121,9 @@ add_feature_info(MPI ENABLE_MPI "Message-Passing Interface supports distributed-
 option(ENABLE_SCALAPACK "Enable ScaLAPACK Bindings in TiledArray" OFF)
 add_feature_info(ScaLAPACK ENABLE_SCALAPACK "ScaLAPACK provides distributed linear algebra")
 
+option(ENABLE_SLATE "Enable SLATE Bindings in TiledArray" OFF)
+add_feature_info(SLATE ENABLE_SLATE "SLATE provides distributed linear algebra on GPU architectures")
+
 option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON)
 add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules")
 
@@ -338,6 +341,9 @@ include(${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchBoost.cmake)
 if(ENABLE_SCALAPACK)
   include(external/scalapackpp.cmake)
 endif()
+if(ENABLE_SLATE)
+  include(external/slate.cmake)
+endif()
 
 # optional deps:
 # 1. ccache
diff --git a/cmake/modules/FindOrFetchSLATE.cmake b/cmake/modules/FindOrFetchSLATE.cmake
new file mode 100644
index 0000000000..d5a34c2b5a
--- /dev/null
+++ b/cmake/modules/FindOrFetchSLATE.cmake
@@ -0,0 +1,32 @@
+# Try find_package
+if (NOT TARGET slate)
+    find_package(slate QUIET CONFIG)
+    if (TARGET slate)
+        message(STATUS "Found SLATE CONFIG at ${slate_CONFIG}")
+    endif (TARGET slate)
+endif (NOT TARGET slate)
+
+# If not found, build via FetchContent
+if (NOT TARGET slate)
+
+    # Make sure BLAS++/LAPACK++ are already in place
+    # (will typically be loaded from BTAS)
+    include(${vg_cmake_kit_SOURCE_DIR}/modules/FindOrFetchLinalgPP.cmake)
+    
+    if (NOT TILEDARRAY_HAS_CUDA)
+        set(gpu_backend none CACHE STRING "Device Backend for ICL Linalg++/SLATE")
+    endif (NOT TILEDARRAY_HAS_CUDA)
+
+    include(FetchContent)
+    FetchContent_Declare(
+        slate
+        GIT_REPOSITORY  https://github.com/icl-utk-edu/slate.git
+        GIT_TAG         ${TA_TRACKED_SLATE_TAG}
+    )
+    FetchContent_MakeAvailable(slate)
+
+endif (NOT TARGET slate)
+
+if (NOT TARGET slate)
+    message( FATAL_ERROR "FindOrFetchSLATE could not make slate target available")
+endif (NOT TARGET slate)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f74d35345a..d670a22015 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -33,6 +33,7 @@ add_subdirectory (cuda)
 add_subdirectory (dgemm)
 add_subdirectory (demo)
 add_subdirectory (scalapack)
+add_subdirectory (slate)
 add_subdirectory (fock)
 add_subdirectory (mpi_tests)
 add_subdirectory (pmap_test)
diff --git a/examples/slate/CMakeLists.txt b/examples/slate/CMakeLists.txt
new file mode 100644
index 0000000000..e704ac94e4
--- /dev/null
+++ b/examples/slate/CMakeLists.txt
@@ -0,0 +1,38 @@
+#
+#  This file is a part of TiledArray.
+#  Copyright (C) 2016  Virginia Tech
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#  Drew Lewis
+#  Department of Chemistry, Virginia Tech
+#
+#  CMakeLists.txt
+#  Dec 6th, 2016
+#
+
+# Create example executable
+
+if(ENABLE_SLATE)
+
+foreach(_exec conversion)
+
+  # Add executable
+  add_executable(slate-${_exec} EXCLUDE_FROM_ALL ${_exec}.cpp)
+  target_link_libraries(slate-${_exec} PRIVATE tiledarray)
+  add_dependencies(examples-tiledarray slate-${_exec})
+
+endforeach()
+
+endif(ENABLE_SLATE)
diff --git a/examples/slate/conversion.cpp b/examples/slate/conversion.cpp
new file mode 100644
index 0000000000..851f066743
--- /dev/null
+++ b/examples/slate/conversion.cpp
@@ -0,0 +1,140 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2020  Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Computational Research Division, Lawrence Berkeley National Laboratory
+ *
+ *  conversion.cpp
+ *  Created: 7  Feb, 2020
+ *  Edited:  13 May, 2020
+ *
+ */
+
+#include <TiledArray/conversions/slate.h>
+
+#include <tiledarray.h>
+#include <random>
+#include <slate/slate.hh>
+#include <TiledArray/pmap/user_pmap.h>
+
+
+template <typename Integral1, typename Integral2>
+int64_t div_ceil(Integral1 x, Integral2 y) {
+  int64_t x_ll = x;
+  int64_t y_ll = y;
+
+  auto d = std::div(x_ll, y_ll);
+  return d.quot + !!d.rem;
+}
+
+TA::TiledRange gen_trange(size_t N, const std::vector<size_t>& TA_NBs) {
+  assert(TA_NBs.size() > 0);
+
+  std::default_random_engine gen(0);
+  std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1);
+  auto rand_indx = [&]() { return dist(gen); };
+  auto rand_nb = [&]() { return TA_NBs[rand_indx()]; };
+
+  std::vector<size_t> t_boundaries = {0};
+  auto TA_NB = rand_nb();
+  while (t_boundaries.back() + TA_NB < N) {
+    t_boundaries.emplace_back(t_boundaries.back() + TA_NB);
+    TA_NB = rand_nb();
+  }
+  t_boundaries.emplace_back(N);
+
+  std::vector<TA::TiledRange1> ranges(
+      2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end()));
+
+  return TA::TiledRange(ranges.begin(), ranges.end());
+};
+
+
+
+auto make_square_proc_grid(MPI_Comm comm) {
+    int mpi_size; MPI_Comm_size(comm, &mpi_size);
+    int p,q;
+    for(p = int( sqrt( mpi_size ) ); p > 0; --p) {
+        q = int( mpi_size / p );
+        if(p*q == mpi_size) break;
+    }
+    return std::make_pair(p,q);
+}
+
+
+
+
+
+
+
+
+
+int main(int argc, char** argv) {
+  auto& world = TA::initialize(argc, argv);
+  {
+    int64_t N = argc > 1 ? std::stoi(argv[1]) : 1000;
+    size_t NB = argc > 2 ? std::stoi(argv[2]) : 128;
+
+    auto make_ta_reference = 
+      [&](TA::Tensor<double>& t, TA::Range const& range) {
+
+        t = TA::Tensor<double>(range, 0.0);
+        auto lo = range.lobound_data();
+        auto up = range.upbound_data();
+        for (int m = lo[0]; m < up[0]; ++m) {
+          for (int n = lo[1]; n < up[1]; ++n) {
+            t(m, n) = m - n;
+          }
+        }
+
+        return t.norm();
+      };
+
+    // Generate Reference TA tensor.
+    auto trange = gen_trange(N, {NB});
+    auto ref_ta =
+        TA::make_array<TA::TArray<double> >(world, trange, make_ta_reference);
+
+    // Do Conversion 
+    auto A = TA::array_to_slate( ref_ta );
+    auto A_ta = TA::slate_to_array<TA::TArray<double>>(A, world);
+    world.gop.fence();
+
+    // Slate matrix to eigen
+    Eigen::MatrixXd slate_eigen = Eigen::MatrixXd::Zero(N,N);
+    for (int64_t j = 0; j < A.nt(); ++j) 
+    for (int64_t i = 0; i < A.mt(); ++i) {
+        A.tileBcast(i,j, A, slate::Layout::ColMajor);
+        auto T = A(i,j);
+        Eigen::Map<Eigen::MatrixXd> T_map( T.data(), T.mb(), T.nb() );
+        slate_eigen.block(i*NB,j*NB,T.mb(), T.nb()) = T_map; 
+    }
+    //if(!world.rank()) {
+    //std::cout << "SLATE\n" << slate_eigen << std::endl;
+    //}
+
+    A_ta.make_replicated();
+    world.gop.fence();
+    auto A_eigen = TA::array_to_eigen(A_ta);
+    //if(!world.rank()) std::cout << "TA\n" << A_eigen << std::endl;
+    std::cout << (A_eigen - slate_eigen).norm() << std::endl;
+
+    
+  }
+
+  TA::finalize();
+}
diff --git a/external/slate.cmake b/external/slate.cmake
new file mode 100644
index 0000000000..9c177d32af
--- /dev/null
+++ b/external/slate.cmake
@@ -0,0 +1,21 @@
+if (NOT TARGET slate)
+  set(VGCMAKEKIT_TRACKED_SLATE_TAG ${TA_TRACKED_SLATE_TAG} CACHE STRING "slate tag")
+  include(FindOrFetchSLATE)
+endif()
+
+# built {blacs,scalapack}pp as a subproject? install as part of tiledarray export as well
+# to be able to use TiledArray_SLATE from the build tree
+if (TARGET slate)
+    install( TARGETS slate EXPORT tiledarray COMPONENT tiledarray )
+    # Add these dependencies to External
+    add_dependencies(External-tiledarray slate )
+endif()
+
+if (TARGET slate)
+  add_library( TiledArray_SLATE INTERFACE )
+  target_link_libraries( TiledArray_SLATE INTERFACE slate )
+
+  install( TARGETS TiledArray_SLATE EXPORT tiledarray COMPONENT tiledarray )
+
+  set( TILEDARRAY_HAS_SLATE 1 )
+endif()
diff --git a/external/versions.cmake b/external/versions.cmake
index 6f6c05c977..19c50d392b 100644
--- a/external/versions.cmake
+++ b/external/versions.cmake
@@ -36,6 +36,9 @@ set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0)
 set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81)
 set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf )
 
+set(TA_TRACKED_SLATE_TAG 04d552f12e0d181c27652ecd3ebedb265b7eec07)
+set(TA_TRACKED_SLATE_PREVIOUS_TAG 8651441aa87cd69b560d4dac8c5ceb0e7f8c32a4)
+
 set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864)
 set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006)
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a8b3d6f4fd..b5ecc25f4b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -274,6 +274,9 @@ endif(CUDA_FOUND)
 if( TARGET TiledArray_SCALAPACK )
   list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SCALAPACK)
 endif()
+if( TARGET TiledArray_SLATE )
+  list(APPEND _TILEDARRAY_DEPENDENCIES TiledArray_SLATE)
+endif()
 list(APPEND _TILEDARRAY_DEPENDENCIES "${LAPACK_LIBRARIES}")
 
 if( TARGET ttg-parsec )
diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in
index 0c4d5d5cbc..f136ea523c 100644
--- a/src/TiledArray/config.h.in
+++ b/src/TiledArray/config.h.in
@@ -74,6 +74,9 @@
 /* Define if TA has enabled ScaLAPACK Bindings */
 #cmakedefine TILEDARRAY_HAS_SCALAPACK 1
 
+/* Define if TA has enabled SLATE Bindings */
+#cmakedefine TILEDARRAY_HAS_SLATE 1
+
 /* Define if TiledArray configured with CUDA support */
 #cmakedefine TILEDARRAY_HAS_CUDA @TILEDARRAY_HAS_CUDA@
 #cmakedefine TILEDARRAY_CHECK_CUDA_ERROR @TILEDARRAY_CHECK_CUDA_ERROR@
diff --git a/src/TiledArray/conversions/slate.h b/src/TiledArray/conversions/slate.h
new file mode 100644
index 0000000000..3212b763f6
--- /dev/null
+++ b/src/TiledArray/conversions/slate.h
@@ -0,0 +1,367 @@
+#ifndef TILEDARRAY_CONVERSIONS_SLATE_H
+#define TILEDARRAY_CONVERSIONS_SLATE_H
+
+#include <TiledArray/config.h> // TILEDARRAY_HAS_SLATE
+#if TILEDARRAY_HAS_SLATE
+
+#include <slate/slate.hh>                // slate::Matrix
+#include <TiledArray/type_traits.h>      // TA::numeric_type
+#include <TiledArray/shape.h>            // is_dense
+#include <TiledArray/pmap/user_pmap.h>   // {,User}Pmap
+#include <Eigen/Core>                    // Eigen::{Matrix,Map}
+#include <TiledArray/external/madness.h> // MADNESS
+
+namespace TiledArray {
+namespace detail {
+
+/// C++14-esque typename wrapper for `numeric_type
+template <typename Array>
+using numeric_type_t = typename numeric_type<Array>::type;
+
+/// Deduce SLATE Matrix type from Array type
+template <typename Array>
+using slate_type_from_array_t =
+    typename slate::Matrix<numeric_type_t<Array>>;
+
+
+template <typename Tile, typename T>
+auto slate_to_ta_tile(slate::Tile<T> slate_tile, TA::Range const& range) {
+
+  using col_major_mat_t = Eigen::Matrix<T,-1,-1,Eigen::ColMajor>;
+  using row_major_mat_t = Eigen::Matrix<T,-1,-1,Eigen::RowMajor>;
+
+  using col_major_map_t = Eigen::Map<const col_major_mat_t>;
+  using row_major_map_t = Eigen::Map<row_major_mat_t>;
+
+  Tile tile(range, 0.0);
+
+  // Create Maps                
+  auto local_m = slate_tile.mb();
+  auto local_n = slate_tile.nb();
+  col_major_map_t slate_map(slate_tile.data(), local_m, local_n);
+
+  auto  local_m_ta = range.dim(0).extent();
+  auto  local_n_ta = range.dim(1).extent();
+  row_major_map_t ta_map(tile.data(), local_m_ta, local_n_ta);
+
+  // Copy data
+  ta_map = slate_map;
+
+  return tile;
+}
+
+template <typename Array, typename Matrix>
+Array make_array_from_slate_dense(Matrix& matrix, TA::TiledRange const& trange,
+   std::shared_ptr<TA::Pmap> pmap, TA::World& world) {
+
+  using value_type = typename Array::value_type; // Tile type
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+
+
+  using col_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::ColMajor>;
+  using row_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::RowMajor>;
+
+  using col_major_map_t = Eigen::Map<const col_major_mat_t>;
+  using row_major_map_t = Eigen::Map<row_major_mat_t>;
+
+  Array array(world, trange, pmap);
+  for (int64_t it = 0; it < matrix.mt(); ++it)
+  for (int64_t jt = 0; jt < matrix.nt(); ++jt) 
+  if( matrix.tileIsLocal(it,jt) ) {
+    auto local_ordinal = trange.tiles_range().ordinal(it,jt);
+
+    auto tile = world.taskq.add(slate_to_ta_tile<value_type,element_type>,
+        matrix(it,jt), trange.make_tile_range(local_ordinal));
+    
+    array.set(local_ordinal, tile);
+  }
+
+  return array;
+}
+
+template <typename Array, typename Matrix>
+Array make_array_from_slate_sparse(Matrix& matrix, TA::TiledRange const& trange,
+   std::shared_ptr<TA::Pmap> pmap, TA::World& world) {
+
+  typedef typename Array::value_type value_type;
+  typedef typename Array::ordinal_type ordinal_type;
+  typedef std::pair<ordinal_type, Future<value_type> > datum_type;
+
+  // Create a vector to hold local tiles
+  std::vector<datum_type> tiles;
+  tiles.reserve(pmap->size());
+
+  // Construct a tensor to hold updated tile norms for the result shape.
+  TA::Tensor<typename detail::shape_t<Array>::value_type> tile_norms(
+      trange.tiles_range(), 0);
+
+  // Construct the task function used to construct the result tiles.
+  madness::AtomicInt counter;
+  counter = 0;
+  int task_count = 0;
+  auto task = [&](const ordinal_type index) -> value_type {
+
+    value_type tile;
+    auto coords = trange.tiles_range().idx(index);
+    auto it = coords[0];
+    auto jt = coords[1];
+    if(!matrix.tileIsLocal(it,jt)) {
+      tile_norms[index] = 0.0;
+    } else {
+      tile = slate_to_ta_tile<value_type>(matrix(it,jt), trange.make_tile_range(index));
+      tile_norms[index] = norm(tile);
+    }
+    ++counter;
+    return tile;
+
+  };
+
+  for (const auto index : *pmap) {
+    auto result_tile = world.taskq.add(task, index);
+    ++task_count;
+    tiles.emplace_back(index, std::move(result_tile));
+  }
+
+  // Wait for tile norm data to be collected.
+  if (task_count > 0)
+    world.await(
+        [&counter, task_count]() -> bool { return counter == task_count; });
+
+  // Construct the new array
+  Array result(world, trange,
+               typename Array::shape_type(world, tile_norms, trange), pmap);
+  for (auto& it : tiles) {
+    const auto index = it.first;
+    if (!result.is_zero(index)) result.set(it.first, it.second);
+  }
+
+  return result;
+
+}
+      
+} // namespace TiledArray::detail
+
+class SlateFunctors {
+
+public:
+
+    using slate_int = int64_t;
+    using slate_process_idx = std::tuple<slate_int, slate_int>;
+    using dim_functor_t  = std::function<slate_int(slate_int)>;
+    using tile_functor_t = std::function<int(slate_process_idx)>;
+
+    SlateFunctors( const dim_functor_t& Mb, const dim_functor_t& Nb,
+      const tile_functor_t& Rank, const tile_functor_t& Dev ) :
+      tileMb_(Mb), tileNb_(Nb), tileRank_(Rank), tileDevice_(Dev) { }
+
+    template <typename PMapInterfacePointer>
+    SlateFunctors( TiledRange trange, PMapInterfacePointer pmap_ptr ) {
+      if( trange.rank() != 2 )
+          throw std::runtime_error("Cannot Convert General Tensor to SLATE (RANK != 2)");
+
+      // Tile row dimension (MB)
+      tileMb_ = [trange](slate_int i) { return trange.dim(0).tile(i).extent(); };
+
+      // Tile col dimension (NB)
+      tileNb_ = [trange](slate_int i) { return trange.dim(1).tile(i).extent(); };
+
+      // Tile rank assignment
+      tileRank_ = [pmap_ptr, trange] (slate_process_idx ij) {
+        auto [i,j] = ij;
+        return pmap_ptr->owner(trange.tiles_range().ordinal(i,j));
+      };
+
+      // Tile device assignment
+      // TODO: Needs to be more robust
+      tileDevice_ = [](slate_process_idx) { return 0; };
+     
+    }
+
+  auto& tileMb() { return tileMb_; }
+  auto& tileNb() { return tileNb_; }
+  auto& tileRank() { return tileRank_; }
+  auto& tileDevice() { return tileDevice_; }
+
+
+  template <typename SlateMatrixType>
+  SlateMatrixType make_matrix(int64_t M, int64_t N, MPI_Comm comm) {
+    return SlateMatrixType(M, N, tileMb_, tileNb_, tileRank_, tileDevice_, comm);
+  }
+
+private:
+
+    dim_functor_t tileMb_, tileNb_;
+    tile_functor_t tileRank_, tileDevice_;
+};
+
+
+
+
+
+template <typename SlateMatrixType>
+std::shared_ptr<TA::Pmap> make_pmap_from_slate( SlateMatrixType&& matrix, World& world ) {
+
+    // Compute SLATE Tile Statistics
+    size_t total_tiles = matrix.nt() * matrix.mt();
+    size_t local_tiles = 0;
+
+    // Create a map from tile ordinal to rank
+    // to avoid lifetime issues in the internal
+    // TA Pmap
+    std::vector<size_t> tile2rank(total_tiles);
+    for (int64_t it = 0; it < matrix.mt(); ++it)
+    for (int64_t jt = 0; jt < matrix.nt(); ++jt) {
+        size_t ordinal = it*matrix.nt() + jt; // TODO: Use Range
+        tile2rank[ordinal] = matrix.tileRank( it, jt );
+        if(matrix.tileIsLocal(it,jt)) local_tiles++;
+    }
+    
+
+    // Create TA PMap
+    std::function<size_t(size_t)> ta_tile_functor = 
+        [t2r = std::move(tile2rank)](size_t ordinal) {
+            return t2r[ordinal];
+        };
+
+    return std::make_shared<TA::detail::UserPmap>(world, total_tiles, local_tiles, 
+            ta_tile_functor);
+}
+
+
+
+
+
+/**
+ * @brief Convert Array to SLATE matrix
+ *
+ * @tparam Array Type of input Array
+ *
+ * @param[in] array Array to convert to SLATE. Must be rank-2.
+ * @returns SLATE representation `array`
+ */
+template <typename Array>
+detail::slate_type_from_array_t<Array>
+array_to_slate( const Array& array ) {
+
+    using element_type   = typename std::remove_cv_t<Array>::element_type;
+    using slate_matrix_t = typename slate::Matrix<element_type>;
+
+    using col_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::ColMajor>;
+    using row_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::RowMajor>;
+
+    using col_major_map_t = Eigen::Map<col_major_mat_t>;
+    using row_major_map_t = Eigen::Map<const row_major_mat_t>;
+
+    /*******************************/
+    /*** Generate SLATE Functors ***/
+    /*******************************/
+    auto&       world  = array.world();
+    const auto& trange = array.trange();
+    auto        pmap   = array.pmap();
+
+    SlateFunctors slate_functors( trange, pmap );
+
+
+    /*********************************/
+    /*** Create empty slate matrix ***/
+    /*********************************/
+    const auto M = trange.dim(0).extent();
+    const auto N = trange.dim(1).extent();
+    auto matrix = slate_functors.make_matrix<slate_matrix_t>(M, N,
+          world.mpi.comm().Get_mpi_comm());
+    
+    /************************/
+    /*** Copy TA -> SLATE ***/
+    /************************/
+    matrix.insertLocalTiles();
+
+    // Loop over local tiles via ordinal
+    // TODO: Make async
+    for( auto local_ordinal : *pmap ) {
+        // Compute coordinate of tile ordinal
+        auto local_coordinate = trange.tiles_range().idx(local_ordinal);
+        const auto it = local_coordinate[0];
+        const auto jt = local_coordinate[1];
+
+        // Sanity Check
+        if(!matrix.tileIsLocal(it,jt))
+            throw std::runtime_error("SLATE PMAP is not valid");
+
+        // Extract shallow copy of local SLATE tile and create
+        // data map
+        auto local_tile_slate = matrix(it,jt);
+        auto local_m = local_tile_slate.mb();
+        auto local_n = local_tile_slate.nb();
+        col_major_map_t slate_map(local_tile_slate.data(), local_m, local_n);
+
+        // Create data map for TA tile
+        // TODO: This should be async in a MADNESS task
+        auto& local_tile = array.find_local(local_ordinal).get();
+        auto  local_m_ta = local_tile.range().dim(0).extent();
+        auto  local_n_ta = local_tile.range().dim(1).extent();
+        row_major_map_t ta_map(local_tile.data(), local_m_ta, local_n_ta);
+
+        // Copy TA tile to SLATE tile
+        slate_map = ta_map;
+    } // Loop over local tiles
+
+    return matrix;
+} // array_to_slate
+
+/**
+ *  @brief Convert a SLATE matrix to an Array
+ */
+template <typename Array>
+auto slate_to_array( /*const*/ detail::slate_type_from_array_t<Array>& matrix, World& world ) {
+    // TODO: SLATE Tile accessor is not const-accessible 
+    // https://github.com/icl-utk-edu/slate/issues/59
+
+    using value_type = typename Array::value_type; // Tile type
+    using element_type   = typename std::remove_cv_t<Array>::element_type;
+    using slate_matrix_t = typename slate::Matrix<element_type>;
+
+    using col_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::ColMajor>;
+    using row_major_mat_t = Eigen::Matrix<element_type,-1,-1,Eigen::RowMajor>;
+
+    using col_major_map_t = Eigen::Map<const col_major_mat_t>;
+    using row_major_map_t = Eigen::Map<row_major_mat_t>;
+
+    // Create TA PMap from SLATE metadata
+    auto slate_pmap = make_pmap_from_slate(matrix, world);
+
+    // Create TiledRange
+    std::vector<size_t> row_tiling(matrix.mt()+1), col_tiling(matrix.nt()+1);
+
+    row_tiling[0] = 0;
+    for(auto i = 0; i < matrix.mt(); ++i) 
+        row_tiling[i+1] = row_tiling[i] + matrix.tileMb(i);
+    
+    col_tiling[0] = 0;
+    for(auto i = 0; i < matrix.nt(); ++i) 
+        col_tiling[i+1] = col_tiling[i] + matrix.tileNb(i);
+
+
+    std::vector<TA::TiledRange1> ranges = {
+        TA::TiledRange1(row_tiling.begin(), row_tiling.end()),
+        TA::TiledRange1(col_tiling.begin(), col_tiling.end())
+    };
+    TA::TiledRange trange(ranges.begin(), ranges.end());
+
+    // Create TArray
+    Array array;
+    if constexpr (is_dense<Array>::value) {
+      array = detail::make_array_from_slate_dense<Array>(matrix, 
+          trange, slate_pmap, world);
+    } else {
+      array = detail::make_array_from_slate_sparse<Array>(matrix, 
+          trange, slate_pmap, world);
+    }
+
+    world.gop.fence();
+    return array;
+}
+
+} // namespace TiledArray
+
+#endif // TILEDARRAY_HAS_SLATE
+#endif // TILEDARRAY_CONVERSIONS_SLATE_H
diff --git a/src/TiledArray/math/linalg/slate/cholesky.h b/src/TiledArray/math/linalg/slate/cholesky.h
new file mode 100644
index 0000000000..6f1cb17b28
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/cholesky.h
@@ -0,0 +1,183 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  cholesky.h
+ *  Created:    24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/util.h>
+namespace TiledArray::math::linalg::slate {
+
+/**
+ *  @brief Compute the Cholesky factorization of a HPD rank-2 tensor
+ *
+ *  A(i,j) = L(i,k) * conj(L(j,k))
+ *
+ *  Example Usage:
+ *
+ *  auto L = cholesky(A, ...)
+ *
+ *  @tparam Array Input array type, must be convertible to BlockCyclicMatrix
+ *
+ *  @param[in] A Input array to be factorized. Must be rank-2
+ *
+ *  @returns The lower triangular Cholesky factor L in TA format
+ */
+template <typename Array>
+auto cholesky(const Array& A) {
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = A.world();
+  // Convert to SLATE
+  auto matrix = array_to_slate(A);
+
+  // Perform POTRF
+  world.gop.fence();  // stage SLATE execution
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, matrix);
+  ::slate::potrf(AH);
+  zero_triangle(::slate::Uplo::Upper, matrix);
+  world.gop.fence();  // stage SLATE execution
+
+  // Convert L to TA and return 
+  return slate_to_array<Array>(matrix, world);
+
+}
+
+
+
+template <bool Both, typename Array>
+auto cholesky_linv(const Array& A) {
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = A.world();
+  auto matrix = array_to_slate(A);
+
+  // Perform POTRF
+  world.gop.fence();  // stage SLATE execution
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, matrix);
+  ::slate::potrf(AH);
+  zero_triangle(::slate::Uplo::Upper, matrix);
+
+  // Copy L if needed
+  using matrix_type = std::decay_t<decltype(matrix)>;
+  std::shared_ptr<Array> L_ptr = nullptr;
+  if constexpr (Both) {
+    L_ptr = std::make_shared<Array>(slate_to_array<Array>(matrix,world));
+    world.gop.fence();  // Make sure copy is done before inverting L 
+  }
+
+  // Perform TRTRI
+  ::slate::TriangularMatrix<element_type> L_slate(::slate::Uplo::Lower, 
+    ::slate::Diag::NonUnit, matrix);
+  ::slate::trtri(L_slate);
+
+  // Convert Linv to TA
+  auto Linv = slate_to_array<Array>(matrix, world);
+  world.gop.fence();  // Make sure copy is done before return
+
+  // Return Linv or L + Linv (in that order)
+  if constexpr (Both) {
+    return std::make_tuple( *L_ptr, Linv );
+  } else {
+    return Linv;
+  }
+
+}
+
+
+
+
+template <typename AArray, typename BArray>
+auto cholesky_solve(const AArray& A, const BArray& B) {
+
+  using element_type   = typename std::remove_cv_t<AArray>::element_type;
+  auto& world = A.world();
+  /*
+  if( world != B.world() ) {
+    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+  }
+  */
+
+  // Convert to SLATE
+  auto A_slate = array_to_slate(A);
+  auto B_slate = array_to_slate(B);
+
+  // Solve linear system
+  world.gop.fence();  // stage SLATE execution
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, A_slate);
+  ::slate::posv( AH, B_slate );
+
+  // Convert solution to TA
+  return slate_to_array<BArray>(B_slate, world);
+
+}
+
+
+
+template <typename AArray, typename BArray>
+auto cholesky_lsolve(Op trans, const AArray& A, const BArray& B) {
+
+  using element_type   = typename std::remove_cv_t<AArray>::element_type;
+  auto& world = A.world();
+  /*
+  if( world != B.world() ) {
+    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+  }
+  */
+
+  // Convert to SLATE
+  auto A_slate = array_to_slate(A);
+  auto B_slate = array_to_slate(B);
+  world.gop.fence();  // stage SLATE execution
+
+  // Factorize A
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, A_slate);
+  ::slate::potrf(AH);
+
+  // Solve linear system OP(L) * X = B
+  ::slate::TriangularMatrix<element_type> L_slate(::slate::Uplo::Lower, 
+    ::slate::Diag::NonUnit, A_slate);
+  if( trans == Op::Trans )     L_slate = ::slate::transpose(L_slate);
+  if( trans == Op::ConjTrans ) L_slate = ::slate::conj_transpose(L_slate);
+  ::slate::trsm( ::slate::Side::Left, 1.0, L_slate, B_slate );
+    
+  // Zero out the upper triangle
+  zero_triangle(::slate::Uplo::Upper, A_slate);
+  
+  // Convert solution and L to TA
+  auto L = slate_to_array<AArray>(A_slate, world);
+  auto X = slate_to_array<BArray>(B_slate, world);
+  return std::make_tuple(L, X);
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_CHOL_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/heig.h b/src/TiledArray/math/linalg/slate/heig.h
new file mode 100644
index 0000000000..d0bbc483f0
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/heig.h
@@ -0,0 +1,115 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  cholesky.h
+ *  Created:    24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/util.h>
+#include <TiledArray/math/linalg/forward.h>
+
+namespace TiledArray::math::linalg::slate {
+
+template <typename Array>
+auto heig(const Array& A, TiledRange evec_trange = TiledRange()) {
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = A.world();
+
+
+  // Convert to SLATE
+  auto matrix = array_to_slate(A);
+
+  // Allocate space for singular values
+  const auto M = matrix.m();
+  const auto N = matrix.n();
+  if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
+
+  std::vector<::blas::real_type<element_type>> W(N);
+
+  // Perform Eigenvalue Decomposition
+  world.gop.fence();  // stage SLATE execution
+
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, matrix);
+  auto Z = matrix.emptyLike(); Z.insertLocalTiles();
+  ::slate::eig(AH, W, Z); 
+
+
+  // Convert eigenvectors back to TA
+  auto Z_ta = slate_to_array<Array>(Z, world);
+  if(evec_trange.rank() != 0 and evec_trange != A.trange()) {
+    Z_ta = retile(Z_ta, evec_trange);
+  }
+  world.gop.fence(); // Maintain lifetimes of SLATE data
+
+  return std::tuple(W, Z_ta);
+}
+
+
+template <typename ArrayA, typename ArrayB>
+auto heig(const ArrayA& A, const ArrayB& B) {
+
+  using element_type   = typename std::remove_cv_t<ArrayB>::element_type;
+  auto& world = A.world();
+
+  // Convert to SLATE
+  auto matrix_A = array_to_slate(A);
+  auto matrix_B = array_to_slate(B);
+
+  // Allocate space for singular values
+  const auto M = matrix_A.m();
+  const auto N = matrix_A.n();
+  if (M != N) TA_EXCEPTION("Matrix must be square for EVP");
+  if (matrix_B.m() != matrix_B.n())
+    TA_EXCEPTION("Metric must be square for EVP");
+  if(matrix_B.m() != M)
+    TA_EXCEPTION("Matrix and Metric must be the same size");
+
+  std::vector<::blas::real_type<element_type>> W(N);
+
+  // Perform Eigenvalue Decomposition
+  world.gop.fence();  // stage SLATE execution
+
+  ::slate::HermitianMatrix<element_type> AH(::slate::Uplo::Lower, matrix_A);
+  ::slate::HermitianMatrix<element_type> BH(::slate::Uplo::Lower, matrix_B);
+  auto Z = matrix_A.emptyLike(); Z.insertLocalTiles();
+  ::slate::eig(1, AH, BH, W, Z); // AX = BXE 
+
+
+  // Convert eigenvectors back to TA
+  auto Z_ta = slate_to_array<ArrayA>(Z, world);
+  world.gop.fence(); // Maintain lifetimes of SLATE data
+
+  return std::tuple(W, Z_ta);
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_HEIG_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/lu.h b/src/TiledArray/math/linalg/slate/lu.h
new file mode 100644
index 0000000000..0c3e675d0c
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/lu.h
@@ -0,0 +1,91 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  cholesky.h
+ *  Created:    24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_LU_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/util.h>
+namespace TiledArray::math::linalg::slate {
+
+template <typename ArrayA, typename ArrayB>
+auto lu_solve(const ArrayA& A, const ArrayB& B) {
+
+  using element_type   = typename std::remove_cv_t<ArrayA>::element_type;
+  auto& world = A.world();
+  /*
+  if( world != B.world() ) {
+    TA_EXCEPTION("A and B must be distributed on same MADWorld context");
+  }
+  */
+
+  // Convert to SLATE
+  world.gop.fence();  // stage SLATE execution
+  auto A_slate = array_to_slate(A);
+  auto B_slate = array_to_slate(B);
+  world.gop.fence();  // stage SLATE execution
+
+  // Solve Linear System
+  ::slate::lu_solve( A_slate, B_slate );
+
+  // Convert solution to TA
+  auto X = slate_to_array<ArrayB>(B_slate, world);
+  world.gop.fence();  // stage SLATE execution
+
+  return X;
+}
+
+template <typename Array>
+auto lu_inv(const Array& A) {
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = A.world();
+
+  // Convert to SLATE
+  world.gop.fence();  // stage SLATE execution
+  auto A_slate = array_to_slate(A);
+  world.gop.fence();  // stage SLATE execution
+
+  // Perform LU Factorization 
+  ::slate::Pivots pivots;
+  ::slate::lu_factor(A_slate, pivots);
+
+  // Invert from factors
+  ::slate::lu_inverse_using_factor(A_slate, pivots);
+
+  // Convert inverse to TA
+  auto X = slate_to_array<Array>(A_slate, world);
+  world.gop.fence();  // stage SLATE execution
+
+  return X;
+}
+
+}  // namespace TiledArray::math::linalg::scalapack
+
+#endif  // TILEDARRAY_HAS_SCALAPACK
+#endif  // TILEDARRAY_MATH_LINALG_SCALAPACK_LU_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/qr.h b/src/TiledArray/math/linalg/slate/qr.h
new file mode 100644
index 0000000000..4e8096c3fc
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/qr.h
@@ -0,0 +1,100 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  qr.h
+ *  Created:    2 August, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/util.h>
+
+namespace TiledArray::math::linalg::slate {
+
+template <bool QOnly, typename Array>
+auto householder_qr( const Array& V, TiledRange q_trange = TiledRange(),
+                     TiledRange r_trange = TiledRange() ) {
+
+  if(q_trange.rank() == 0) {
+    q_trange = V.trange();
+  }
+
+  if(r_trange.rank() == 0) {
+    auto col_tiling = V.trange().dim(1);
+    r_trange = TiledRange( {col_tiling, col_tiling} );
+  }
+
+  // SLATE does not yet have ORGQR/UNGQR
+  // https://github.com/icl-utk-edu/slate/issues/80
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = V.world();
+
+  // Convert to SLATE
+  auto matrix = array_to_slate(V);
+
+  // Perform GETRF
+  ::slate::TriangularFactors<element_type> T;
+  ::slate::geqrf(matrix, T);
+
+  // Form Q
+  auto Q = matrix.emptyLike(); Q.insertLocalTiles();
+  ::slate::set(0.0, 1.0, Q);
+  ::slate::unmqr(::slate::Side::Left, ::slate::Op::NoTrans, matrix, T, Q);
+
+  auto Q_ta = slate_to_array<Array>(Q, world);
+
+  if constexpr (QOnly) {
+    return Q_ta;
+  } else {
+    SlateFunctors r_functors( r_trange, V.pmap() );
+    const auto N = V.trange().dim(1).extent();
+    auto comm = world.mpi.comm().Get_mpi_comm();
+    auto R = r_functors.make_matrix<::slate::Matrix<element_type>>(N,N,comm);
+    R.insertLocalTiles();
+    ::slate::set(0.0, 0.0, R);
+
+    // Triangular views of target operand matrices
+    ::slate::TriangularMatrix<element_type> 
+      R_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, R);
+    ::slate::TriangularMatrix<element_type> 
+      A_tri(::slate::Uplo::Upper, ::slate::Diag::NonUnit, matrix);
+
+    // Copy upper triangle of QR factors into R
+    ::slate::copy(A_tri, R_tri);
+
+    // Convert to TA
+    auto R_ta = slate_to_array<Array>(R, world);
+    return std::tuple(Q_ta, R_ta);
+  }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_QR_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/svd.h b/src/TiledArray/math/linalg/slate/svd.h
new file mode 100644
index 0000000000..652dc67e17
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/svd.h
@@ -0,0 +1,102 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  cholesky.h
+ *  Created:    24 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/util.h>
+#include <TiledArray/math/linalg/forward.h>
+
+namespace TiledArray::math::linalg::slate {
+
+template <SVD::Vectors Vectors, typename Array>
+auto svd(const Array& A, TA::TiledRange u_trange, TA::TiledRange vt_trange) {
+
+  constexpr bool need_uv = (Vectors == SVD::AllVectors);
+  constexpr bool need_u = (Vectors == SVD::LeftVectors) or need_uv;
+  constexpr bool need_vt = (Vectors == SVD::RightVectors) or need_uv;
+  constexpr bool vals_only = not need_u and not need_vt;
+
+  using element_type   = typename std::remove_cv_t<Array>::element_type;
+  auto& world = A.world();
+  auto comm   = world.mpi.comm().Get_mpi_comm();
+
+  // Convert to SLATE
+  auto matrix = array_to_slate(A);
+  using slate_matrix_t = std::decay_t<decltype(matrix)>;
+
+  // Allocate space for singular values
+  const auto M = matrix.m();
+  const auto N = matrix.n();
+  const auto SVD_SIZE = std::min(M,N);
+  std::vector<::blas::real_type<element_type>> S(SVD_SIZE);
+
+  world.gop.fence();  // stage SLATE execution
+
+  // Generate functors
+  SlateFunctors u_functors(u_trange, A.pmap());
+  SlateFunctors vt_functors(vt_trange, A.pmap());
+
+  slate_matrix_t U, VT;
+
+  // Allocate if required 
+  if(need_u) {
+    U = u_functors.make_matrix<slate_matrix_t>(M, SVD_SIZE, comm);
+    U.insertLocalTiles();
+  }
+  if(need_vt) {
+    VT = vt_functors.make_matrix<slate_matrix_t>(SVD_SIZE, N, comm);
+    VT.insertLocalTiles();
+  }
+
+  // Do SVD
+  // If U/VT are default state, they will not be used
+  ::slate::svd(matrix, S, U, VT); 
+
+  Array U_ta, VT_ta;
+  if(need_u)  { U_ta  = slate_to_array<Array>(U,  world); }
+  if(need_vt) { VT_ta = slate_to_array<Array>(VT, world); } 
+
+  if constexpr (need_uv) {
+    return std::tuple(S, U_ta, VT_ta);
+  } else if constexpr (need_u) {
+    return std::tuple(S, U_ta);
+  } else if constexpr (need_vt) {
+    return std::tuple(S, VT_ta);
+  } else { 
+    return S;
+  }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_SVD_H__INCLUDED
diff --git a/src/TiledArray/math/linalg/slate/util.h b/src/TiledArray/math/linalg/slate/util.h
new file mode 100644
index 0000000000..ca65fe3dce
--- /dev/null
+++ b/src/TiledArray/math/linalg/slate/util.h
@@ -0,0 +1,91 @@
+/*
+ *  This file is a part of TiledArray.
+ *  Copyright (C) 2023 Virginia Tech
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  David Williams-Young
+ *  Applied Mathematics and Computational Research Division,
+ *  Lawrence Berkeley National Laboratory
+ *
+ *  util.h
+ *  Created:    25 July, 2023
+ *
+ */
+#ifndef TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
+#define TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
+
+#include <TiledArray/config.h>
+#if TILEDARRAY_HAS_SLATE
+
+#include <slate/slate.hh>
+namespace TiledArray::math::linalg::slate {
+
+template <typename SlateMatrixType>
+void zero_triangle(::slate::Uplo tri, SlateMatrixType& A, bool zero_diag = false ) {
+
+  const auto nt = A.nt(); // Number of column tiles
+  const auto mt = A.mt(); // Number of row tiles
+
+  auto zero_block = [&](auto it, auto jt) {
+    if( A.tileIsLocal(it,jt) ) {
+      auto tile = A(it,jt);
+      const auto stride = tile.stride();
+      const auto mb = tile.mb();
+      const auto nb = tile.nb();
+      auto* data = tile.data();
+      for(int j = 0; j < nb; ++j)
+      for(int i = 0; i < mb; ++i) {
+        data[i + j*stride] = 0.0;
+      }
+    }
+  };
+
+  auto zero_tri = [&](auto it, auto jt) {
+    if( A.tileIsLocal(it,jt) ) {
+      auto tile = A(it,jt);
+      const auto stride = tile.stride();
+      const auto mb = tile.mb();
+      const auto nb = tile.nb();
+      auto* data = tile.data();
+      if( tri == ::slate::Uplo::Lower ) {
+        for(int j = 0;   j < nb; ++j)
+        for(int i = j+1; i < mb; ++i) {
+          data[i + j*stride] = 0.0;
+        }
+      } else {
+        for(int j = 0; j < nb; ++j)
+        for(int i = 0; i < j;  ++i) {
+          data[i + j*stride] = 0.0;
+        }
+      }
+    }
+  };
+
+  // TODO: Should be done in parallel
+  for(auto jt = 0; jt < nt; ++jt) {
+    zero_tri(jt, jt); // Handle diagonal block 
+    for(auto it = jt + 1; it < mt; ++it) {
+      if( tri == ::slate::Uplo::Lower ) zero_block(it,jt);
+      else                              zero_block(jt,it);
+    }
+  }
+
+}
+
+} // namespace TiledArray::math::linalg::slate
+
+#endif // TILEDARRAY_HAS_SLATE
+
+#endif // TILEDARRAY_MATH_LINALG_SLATE_UTIL_H__INCLUDED
diff --git a/src/TiledArray/pmap/cyclic_pmap.h b/src/TiledArray/pmap/cyclic_pmap.h
index 6d2df0088b..43357e8f30 100644
--- a/src/TiledArray/pmap/cyclic_pmap.h
+++ b/src/TiledArray/pmap/cyclic_pmap.h
@@ -31,6 +31,11 @@
 namespace TiledArray {
 namespace detail {
 
+enum class CyclicPmapOrder {
+  RowMajor,
+  ColMajor
+};
+
 /// Maps cyclically a sequence of indices onto a 2-d matrix of processes
 
 /// Consider a sequence of indices \f$ \{ k | k \in [0,N) \} \f$,
@@ -45,6 +50,7 @@ namespace detail {
 /// col} \} \f$
 ///
 /// \note This class is used to map <em>tile</em> indices to processes.
+template <CyclicPmapOrder Order = CyclicPmapOrder::RowMajor>
 class CyclicPmap : public Pmap {
  protected:
   // Import Pmap protected variables
@@ -63,7 +69,12 @@ class CyclicPmap : public Pmap {
   size_type local_cols_ =
       0;  ///< The number of columns that belong to this rank
 
+  inline size_type coordinate_to_index(size_type i, size_type j) const noexcept {
+    if constexpr (is_row_major_v) return i*cols_ + j;
+    else                          return i + j*rows_;
+  }
  public:
+  static constexpr bool is_row_major_v = (Order == CyclicPmapOrder::RowMajor);
   typedef Pmap::size_type size_type;  ///< Size type
 
   /// Construct process map
@@ -96,8 +107,13 @@ class CyclicPmap : public Pmap {
     // Compute local size_, if have any
     if (rank_ < (proc_rows_ * proc_cols_)) {
       // Compute rank coordinates
-      rank_row_ = rank_ / proc_cols_;
-      rank_col_ = rank_ % proc_cols_;
+      if constexpr (is_row_major_v) {
+        rank_row_ = rank_ / proc_cols_;
+        rank_col_ = rank_ % proc_cols_;
+      } else {
+        rank_row_ = rank_ % proc_rows_;
+        rank_col_ = rank_ / proc_rows_;
+      }
 
       local_rows_ =
           (rows_ / proc_rows_) + ((rows_ % proc_rows_) > rank_row_ ? 1ul : 0ul);
@@ -127,13 +143,14 @@ class CyclicPmap : public Pmap {
   virtual size_type owner(const size_type tile) const {
     TA_ASSERT(tile < size_);
     // Compute tile coordinate in tile grid
-    const size_type tile_row = tile / cols_;
-    const size_type tile_col = tile % cols_;
+    const size_type tile_row = is_row_major_v ? tile / cols_ : tile % rows_;
+    const size_type tile_col = is_row_major_v ? tile % cols_ : tile / rows_;
     // Compute process coordinate of tile in the process grid
     const size_type proc_row = tile_row % proc_rows_;
     const size_type proc_col = tile_col % proc_cols_;
     // Compute the process that owns tile
-    const size_type proc = proc_row * proc_cols_ + proc_col;
+    const size_type proc = is_row_major_v ? 
+        proc_row * proc_cols_ + proc_col : proc_row + proc_col * proc_rows_;
 
     TA_ASSERT(proc < procs_);
 
@@ -149,48 +166,59 @@ class CyclicPmap : public Pmap {
   }
 
  private:
+#if 0
   virtual void advance(size_type& value, bool increment) const {
-    if (increment) {
-      auto row = value / cols_;
-      const auto row_end = (row + 1) * cols_;
-      value += proc_cols_;
-      if (value >= row_end) {  // if past the end of row ...
-        row += proc_rows_;
-        if (row < rows_) {                  // still have tiles
-          value = row * cols_ + rank_col_;  // first tile in this row
-        } else                              // done
-          value = size_;
-      }
-    } else {  // decrement
-      auto row = value / cols_;
-      const auto row_begin = row * cols_;
-      if (value < proc_cols_) {  // protect against unsigned wraparound
-        return;
-      }
-      value -= proc_cols_;
-      if (value < row_begin) {  // if past the beginning of row ...
-        if (row < proc_rows_)   // protect against unsigned wraparound
+    if constexpr (is_row_major_v) {
+      if (increment) {
+        auto row = value / cols_;
+        const auto row_end = (row + 1) * cols_;
+        value += proc_cols_;
+        if (value >= row_end) {  // if past the end of row ...
+          row += proc_rows_;
+          if (row < rows_) {                              // still have tiles
+            value = coordinate_to_index(row, rank_col_);  // first tile in this row
+          } else                                          // done
+            value = size_;
+        }
+      } else {  // decrement
+        auto row = value / cols_;
+        const auto row_begin = row * cols_;
+        if (value < proc_cols_) {  // protect against unsigned wraparound
           return;
-        row -= proc_rows_;
-        value = row * cols_ + rank_col_ +
-                (local_cols_ - 1) * proc_cols_;  // last tile in this row
+        }
+        value -= proc_cols_;
+        if (value < row_begin) {  // if past the beginning of row ...
+          if (row < proc_rows_)   // protect against unsigned wraparound
+            return;
+          row -= proc_rows_;
+          value = coordinate_to_index(row, rank_col_) +
+                  (local_cols_ - 1) * proc_cols_;  // last tile in this row
+        }
       }
+    } else {
+      Pmap::advance(value, increment);
     }
   }
+#endif
 
  public:
+
+#if 0
   virtual const_iterator begin() const {
+    const auto proc_index = coordinate_to_index(rank_row_, rank_col_);
     return this->local_size_ > 0
-               ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_,
-                          rank_row_ * cols_ + rank_col_, false, true)
+               ? Iterator(*this, proc_index, this->size_, proc_index, 
+                          false, is_row_major_v)
                : end();  // make end() if empty
   }
   virtual const_iterator end() const {
+    const auto proc_index = coordinate_to_index(rank_row_, rank_col_);
     return this->local_size_ > 0
-               ? Iterator(*this, rank_row_ * cols_ + rank_col_, this->size_,
-                          this->size_, false, true)
-               : Iterator(*this, 0, this->size_, this->size_, false, true);
+               ? Iterator(*this, proc_index, this->size_,
+                          this->size_, false, is_row_major_v)
+               : Iterator(*this, 0, this->size_, this->size_, false, is_row_major_v);
   }
+#endif
 
 };  // class CyclicPmap
 
diff --git a/src/TiledArray/pmap/pmap.h b/src/TiledArray/pmap/pmap.h
index 0682901bab..528321f3f9 100644
--- a/src/TiledArray/pmap/pmap.h
+++ b/src/TiledArray/pmap/pmap.h
@@ -150,7 +150,7 @@ class Pmap {
   /// \name Iteration
   /// @{
 
- private:
+ protected:
   /// customizes how to iterate over local indices
 
   /// overload this and construct Iterator with `use_pmap_advance=true`
diff --git a/src/TiledArray/pmap/user_pmap.h b/src/TiledArray/pmap/user_pmap.h
index 50966f5744..0a74b660d7 100644
--- a/src/TiledArray/pmap/user_pmap.h
+++ b/src/TiledArray/pmap/user_pmap.h
@@ -87,12 +87,12 @@ class UserPmap : public Pmap {
 
   virtual bool known_local_size() const { return known_local_size_; }
 
-  virtual const_iterator begin() const {
-    return Iterator(*this, 0, this->size_, 0, false);
-  }
-  virtual const_iterator end() const {
-    return Iterator(*this, 0, this->size_, this->size_, false);
-  }
+  //virtual const_iterator begin() const {
+  //  return Iterator(*this, 0, this->size_, 0, false);
+  //}
+  //virtual const_iterator end() const {
+  //  return Iterator(*this, 0, this->size_, this->size_, false);
+  //}
 
  private:
   bool known_local_size_ = false;
diff --git a/src/TiledArray/proc_grid.h b/src/TiledArray/proc_grid.h
index a401e0ac1e..4594553700 100644
--- a/src/TiledArray/proc_grid.h
+++ b/src/TiledArray/proc_grid.h
@@ -515,7 +515,7 @@ class ProcGrid {
   std::shared_ptr<Pmap> make_pmap() const {
     TA_ASSERT(world_);
 
-    return std::make_shared<CyclicPmap>(*world_, rows_, cols_, proc_rows_,
+    return std::make_shared<CyclicPmap<>>(*world_, rows_, cols_, proc_rows_,
                                         proc_cols_);
   }
 
@@ -528,7 +528,7 @@ class ProcGrid {
   std::shared_ptr<Pmap> make_col_phase_pmap(const size_type rows) const {
     TA_ASSERT(world_);
 
-    return std::make_shared<CyclicPmap>(*world_, rows, cols_, proc_rows_,
+    return std::make_shared<CyclicPmap<>>(*world_, rows, cols_, proc_rows_,
                                         proc_cols_);
   }
 
@@ -541,7 +541,7 @@ class ProcGrid {
   std::shared_ptr<Pmap> make_row_phase_pmap(const size_type cols) const {
     TA_ASSERT(world_);
 
-    return std::make_shared<CyclicPmap>(*world_, rows_, cols, proc_rows_,
+    return std::make_shared<CyclicPmap<>>(*world_, rows_, cols, proc_rows_,
                                         proc_cols_);
   }
 };  // class Grid
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 217c522018..6360082c6c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -98,9 +98,16 @@ set(ta_test_src_files  ta_test.cpp
   # t_tot_tot_contract_.cpp
   # tot_tot_tot_contract_.cpp
     einsum.cpp
-    linalg.cpp
+    linalg/linalg.cpp
+    linalg/non_dist.cpp
     cp.cpp
 )
+if(ENABLE_SCALAPACK)
+    list(APPEND ta_test_src_files linalg/scalapack.cpp)
+endif()
+if(ENABLE_SLATE)
+    list(APPEND ta_test_src_files linalg/slate.cpp)
+endif()
 
 if(CUDA_FOUND)
     list(APPEND ta_test_src_files librett.cpp expressions_cuda_um.cpp tensor_um.cpp)
diff --git a/tests/cyclic_pmap.cpp b/tests/cyclic_pmap.cpp
index 509b9f92bf..aa387150a2 100644
--- a/tests/cyclic_pmap.cpp
+++ b/tests/cyclic_pmap.cpp
@@ -32,7 +32,21 @@ struct CyclicPmapFixture {
 
 BOOST_FIXTURE_TEST_SUITE(cyclic_pmap_suite, CyclicPmapFixture)
 
-BOOST_AUTO_TEST_CASE(constructor) {
+template <TiledArray::detail::CyclicPmapOrder Order>
+struct cyclic_pmap_order_wrapper {
+  static constexpr auto value = Order;
+};
+
+
+using cyclic_pmap_orders = boost::mpl::list<
+    cyclic_pmap_order_wrapper<TiledArray::detail::CyclicPmapOrder::RowMajor>,
+    cyclic_pmap_order_wrapper<TiledArray::detail::CyclicPmapOrder::ColMajor>
+>;
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(constructor, Order, cyclic_pmap_orders) {
+
+  using pmap_type = TiledArray::detail::CyclicPmap<Order::value>;
+
   for (ProcessID x = 1ul; x <= GlobalFixture::world->size(); ++x) {
     for (ProcessID y = 1ul; y <= GlobalFixture::world->size(); ++y) {
       // Compute the limits for process rows
@@ -48,10 +62,9 @@ BOOST_AUTO_TEST_CASE(constructor) {
                                 max_proc_rows));
       const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
 
-      BOOST_REQUIRE_NO_THROW(TiledArray::detail::CyclicPmap pmap(
-          *GlobalFixture::world, x, y, p_rows, p_cols));
-      TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
-                                          p_cols);
+      BOOST_REQUIRE_NO_THROW( pmap_type pmap( *GlobalFixture::world, x, y, 
+          p_rows, p_cols));
+      pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
       BOOST_CHECK_EQUAL(pmap.rank(), GlobalFixture::world->rank());
       BOOST_CHECK_EQUAL(pmap.procs(), GlobalFixture::world->size());
       BOOST_CHECK_EQUAL(pmap.size(), x * y);
@@ -60,32 +73,27 @@ BOOST_AUTO_TEST_CASE(constructor) {
 
   ProcessID size = GlobalFixture::world->size();
 
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
-                                                        0ul, 10ul, 1, 1),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 0ul, 10ul, 1, 1),
                     TiledArray::Exception);
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
-                                                        10ul, 0ul, 1, 1),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 0ul, 1, 1),
                     TiledArray::Exception);
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
-                                                        10ul, 10ul, 0, 1),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 0, 1),
                     TiledArray::Exception);
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world,
-                                                        10ul, 10ul, 1, 0),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, 0),
                     TiledArray::Exception);
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
-                        *GlobalFixture::world, 10ul, 10ul, size * 2, 1),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size * 2, 1),
                     TiledArray::Exception);
-  BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
-                        *GlobalFixture::world, 10ul, 10ul, 1, size * 2),
+  BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, 1, size * 2),
                     TiledArray::Exception);
   if (size > 1) {
-    BOOST_CHECK_THROW(TiledArray::detail::CyclicPmap pmap(
-                          *GlobalFixture::world, 10ul, 10ul, size, size),
+    BOOST_CHECK_THROW(pmap_type pmap(*GlobalFixture::world, 10ul, 10ul, size, size),
                       TiledArray::Exception);
   }
 }
 
-BOOST_AUTO_TEST_CASE(owner) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(owner, Order, cyclic_pmap_orders) {
+
+  using pmap_type = TiledArray::detail::CyclicPmap<Order::value>;
   const std::size_t rank = GlobalFixture::world->rank();
   const std::size_t size = GlobalFixture::world->size();
 
@@ -108,8 +116,7 @@ BOOST_AUTO_TEST_CASE(owner) {
       const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
 
       const std::size_t tiles = x * y;
-      TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
-                                          p_cols);
+      pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
 
       for (std::size_t tile = 0; tile < tiles; ++tile) {
         std::fill_n(p_owner, size, 0);
@@ -121,6 +128,18 @@ BOOST_AUTO_TEST_CASE(owner) {
         // Make sure everyone agrees on who owns what.
         for (std::size_t p = 0ul; p < size; ++p)
           BOOST_CHECK_EQUAL(p_owner[p], p_owner[rank]);
+
+        size_t true_owner;
+        if(Order::value == TiledArray::detail::CyclicPmapOrder::RowMajor) {
+          auto proc_row = (tile / pmap.ncols()) % pmap.nrows_proc();
+          auto proc_col = (tile % pmap.ncols()) % pmap.ncols_proc();
+          true_owner = proc_row * pmap.ncols_proc() + proc_col;
+        } else {
+          auto proc_row = (tile % pmap.nrows()) % pmap.nrows_proc();
+          auto proc_col = (tile / pmap.nrows()) % pmap.ncols_proc();
+          true_owner = proc_row + proc_col * pmap.nrows_proc();
+        }
+        BOOST_CHECK_EQUAL(p_owner[rank], true_owner);
       }
     }
   }
@@ -128,7 +147,8 @@ BOOST_AUTO_TEST_CASE(owner) {
   delete[] p_owner;
 }
 
-BOOST_AUTO_TEST_CASE(local_size) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(local_size, Order, cyclic_pmap_orders) {
+  using pmap_type = TiledArray::detail::CyclicPmap<Order::value>;
   for (std::size_t x = 1ul; x < 10ul; ++x) {
     for (std::size_t y = 1ul; y < 10ul; ++y) {
       // Compute the limits for process rows
@@ -145,8 +165,7 @@ BOOST_AUTO_TEST_CASE(local_size) {
       const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
 
       const std::size_t tiles = x * y;
-      TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
-                                          p_cols);
+      pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
 
       std::size_t total_size = pmap.local_size();
       GlobalFixture::world->gop.sum(total_size);
@@ -159,7 +178,8 @@ BOOST_AUTO_TEST_CASE(local_size) {
   }
 }
 
-BOOST_AUTO_TEST_CASE(local_group) {
+BOOST_AUTO_TEST_CASE_TEMPLATE(local_group, Order, cyclic_pmap_orders) {
+  using pmap_type = TiledArray::detail::CyclicPmap<Order::value>;
   ProcessID tile_owners[100];
 
   for (std::size_t x = 1ul; x < 10ul; ++x) {
@@ -178,18 +198,15 @@ BOOST_AUTO_TEST_CASE(local_group) {
       const std::size_t p_cols = GlobalFixture::world->size() / p_rows;
 
       const std::size_t tiles = x * y;
-      TiledArray::detail::CyclicPmap pmap(*GlobalFixture::world, x, y, p_rows,
-                                          p_cols);
+      pmap_type pmap(*GlobalFixture::world, x, y, p_rows, p_cols);
 
       // Check that all local elements map to this rank
-      for (detail::CyclicPmap::const_iterator it = pmap.begin();
-           it != pmap.end(); ++it) {
+      for (auto it = pmap.begin(); it != pmap.end(); ++it) {
         BOOST_CHECK_EQUAL(pmap.owner(*it), GlobalFixture::world->rank());
       }
 
       std::fill_n(tile_owners, tiles, 0);
-      for (detail::CyclicPmap::const_iterator it = pmap.begin();
-           it != pmap.end(); ++it) {
+      for (auto it = pmap.begin(); it != pmap.end(); ++it) {
         tile_owners[*it] += GlobalFixture::world->rank();
       }
 
diff --git a/tests/linalg.cpp b/tests/linalg.cpp
deleted file mode 100644
index 5c84d0b5e4..0000000000
--- a/tests/linalg.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-#include <tiledarray.h>
-#include <random>
-#include "TiledArray/config.h"
-//#include "range_fixture.h"
-#include "unit_test_config.h"
-
-#include "TiledArray/math/linalg/non-distributed/cholesky.h"
-#include "TiledArray/math/linalg/non-distributed/heig.h"
-#include "TiledArray/math/linalg/non-distributed/lu.h"
-#include "TiledArray/math/linalg/non-distributed/svd.h"
-
-#include "TiledArray/math/linalg/cholesky.h"
-#include "TiledArray/math/linalg/heig.h"
-#include "TiledArray/math/linalg/lu.h"
-#include "TiledArray/math/linalg/svd.h"
-
-namespace TA = TiledArray;
-namespace non_dist = TA::math::linalg::non_distributed;
-
-#if TILEDARRAY_HAS_SCALAPACK
-namespace scalapack = TA::math::linalg::scalapack;
-#include "TiledArray/math/linalg/scalapack/all.h"
-#define TILEDARRAY_SCALAPACK_TEST(F, E)                           \
-  GlobalFixture::world->gop.fence();                              \
-  compare("TiledArray::scalapack", non_dist::F, scalapack::F, E); \
-  GlobalFixture::world->gop.fence();                              \
-  compare("TiledArray", non_dist::F, TiledArray::F, E);
-#else
-#define TILEDARRAY_SCALAPACK_TEST(...)
-#endif
-
-#if TILEDARRAY_HAS_TTG
-#include "TiledArray/math/linalg/ttg/cholesky.h"
-#define TILEDARRAY_TTG_TEST(F, E)    \
-  GlobalFixture::world->gop.fence(); \
-  compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E);
-#else
-#define TILEDARRAY_TTG_TEST(...)
-#endif
-
-struct ReferenceFixture {
-  size_t N;
-  std::vector<double> htoeplitz_vector;
-  std::vector<double> exact_evals;
-
-  inline double matrix_element_generator(int64_t i, int64_t j) {
-#if 0
-    // Generates a Hankel matrix: absurd condition number
-    return i+j;
-#else
-    // Generates a Circulant matrix: good condition number
-    return htoeplitz_vector[std::abs(i - j)];
-#endif
-  }
-
-  template <typename Tile>
-  inline double make_ta_reference(Tile& t, TA::Range const& range) {
-    t = Tile(range, 0.0);
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m) {
-      for (auto n = lo[1]; n < up[1]; ++n) {
-        t(m, n) = matrix_element_generator(m, n);
-      }
-    }
-
-    return norm(t);
-  };
-
-  ReferenceFixture(int64_t N = 1000)
-      : N(N), htoeplitz_vector(N), exact_evals(N) {
-    // Generate an hermitian Circulant vector
-    std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0);
-    htoeplitz_vector[0] = 100;
-    std::default_random_engine gen(0);
-    std::uniform_real_distribution<> dist(0., 1.);
-    for (int64_t i = 1; i <= (N / 2); ++i) {
-      double val = dist(gen);
-      htoeplitz_vector[i] = val;
-      htoeplitz_vector[N - i] = val;
-    }
-
-    // Compute exact eigenvalues
-    const double ff = 2. * M_PI / N;
-    for (int64_t j = 0; j < N; ++j) {
-      double val = htoeplitz_vector[0];
-      ;
-      for (int64_t k = 1; k < N; ++k)
-        val += htoeplitz_vector[N - k] * std::cos(ff * j * k);
-      exact_evals[j] = val;
-    }
-
-    std::sort(exact_evals.begin(), exact_evals.end());
-  }
-};
-
-struct LinearAlgebraFixture : ReferenceFixture {
-#if TILEDARRAY_HAS_SCALAPACK
-
-  blacspp::Grid grid;
-  scalapack::BlockCyclicMatrix<double> ref_matrix;  // XXX: Just double is fine?
-
-  LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128)
-      : ReferenceFixture(N),
-        grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)),  // XXX: Is this safe?
-        ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) {
-    for (size_t i = 0; i < N; ++i) {
-      for (size_t j = 0; j < N; ++j) {
-        if (ref_matrix.dist().i_own(i, j)) {
-          auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j);
-          ref_matrix.local_mat()(i_local, j_local) =
-              matrix_element_generator(i, j);
-        }
-      }
-    }
-  }
-#endif
-
-  template <class A>
-  static void compare(const char* context, const A& non_dist, const A& result,
-                      double e) {
-    // clang-format off
-    BOOST_TEST_CONTEXT(context)
-    ;
-    // clang-format on
-    auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get();
-    BOOST_CHECK_SMALL(diff_with_non_dist, e);
-  }
-
-  template <typename T, typename F, int... Is>
-  static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f,
-                                           std::integer_sequence<int, Is...>) {
-    auto l = {(f(std::get<Is>(t1), std::get<Is>(t2)), 0)...};
-  }
-
-  template <typename... Ts, typename F>
-  static void for_each_pair_of_tuples(std::tuple<Ts...> const& t1,
-                                      std::tuple<Ts...> const& t2, F f) {
-    for_each_pair_of_tuples_impl(
-        t1, t2, f, std::make_integer_sequence<int, sizeof...(Ts)>());
-  }
-
-  template <class... As>
-  static void compare(const char* context, const std::tuple<As...>& non_dist,
-                      const std::tuple<As...>& result, double e) {
-    for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) {
-      compare(context, arg1, arg2, e);
-    });
-  }
-};
-
-TA::TiledRange gen_trange(size_t N, const std::vector<size_t>& TA_NBs) {
-  TA_ASSERT(TA_NBs.size() > 0);
-
-  std::default_random_engine gen(0);
-  std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1);
-  auto rand_indx = [&]() { return dist(gen); };
-  auto rand_nb = [&]() { return TA_NBs[rand_indx()]; };
-
-  std::vector<size_t> t_boundaries = {0};
-  auto TA_NB = rand_nb();
-  while (t_boundaries.back() + TA_NB < N) {
-    t_boundaries.emplace_back(t_boundaries.back() + TA_NB);
-    TA_NB = rand_nb();
-  }
-  t_boundaries.emplace_back(N);
-
-  std::vector<TA::TiledRange1> ranges(
-      2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end()));
-
-  return TA::TiledRange(ranges.begin(), ranges.end());
-};
-
-BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture)
-
-#if TILEDARRAY_HAS_SCALAPACK
-
-BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_ta =
-      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
-  GlobalFixture::world->gop.fence();
-
-  auto norm_diff =
-      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB / 2)});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_ta =
-      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
-  GlobalFixture::world->gop.fence();
-
-  auto norm_diff =
-      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
-  GlobalFixture::world->gop.fence();
-
-  double local_norm_diff =
-      (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
-  local_norm_diff *= local_norm_diff;
-
-  double norm_diff;
-  MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
-                MPI_COMM_WORLD);
-
-  norm_diff = std::sqrt(norm_diff);
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  [[maybe_unused]] auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_ta =
-      scalapack::block_cyclic_to_array<TA::TArray<double>>(ref_matrix, trange);
-  GlobalFixture::world->gop.fence();
-
-  auto norm_diff =
-      (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
-  GlobalFixture::world->gop.fence();
-
-  double local_norm_diff =
-      (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
-  local_norm_diff *= local_norm_diff;
-
-  double norm_diff;
-  MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
-                MPI_COMM_WORLD);
-
-  norm_diff = std::sqrt(norm_diff);
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
-
-  // test with TA and btas tile
-  using typelist_t =
-      std::tuple<TA::Tensor<double>, btas::Tensor<double, TA::Range>>;
-  typelist_t typevals;
-
-  auto test = [&](const auto& typeval_ref) {
-    using Tile = std::decay_t<decltype(typeval_ref)>;
-    using Array = TA::DistArray<Tile, TA::SparsePolicy>;
-
-    auto ref_ta = TA::make_array<Array>(
-        *GlobalFixture::world, trange,
-        [this](Tile& t, TA::Range const& range) -> double {
-          return this->make_ta_reference(t, range);
-        });
-
-    GlobalFixture::world->gop.fence();
-    auto test_ta = scalapack::block_cyclic_to_array<Array>(ref_matrix, trange);
-    GlobalFixture::world->gop.fence();
-
-    auto norm_diff =
-        (ref_ta("i,j") - test_ta("i,j")).norm(*GlobalFixture::world).get();
-
-    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-    GlobalFixture::world->gop.fence();
-  };
-
-  test(std::get<0>(typevals));
-  test(std::get<1>(typevals));
-};
-
-BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
-
-  // test with TA and btas tile
-  using typelist_t =
-      std::tuple<TA::Tensor<double>, btas::Tensor<double, TA::Range>>;
-  typelist_t typevals;
-
-  auto test = [&](const auto& typeval_ref) {
-    using Tile = std::decay_t<decltype(typeval_ref)>;
-    using Array = TA::DistArray<Tile, TA::SparsePolicy>;
-
-    auto ref_ta = TA::make_array<Array>(
-        *GlobalFixture::world, trange,
-        [this](Tile& t, TA::Range const& range) -> double {
-          return this->make_ta_reference(t, range);
-        });
-
-    GlobalFixture::world->gop.fence();
-    auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
-    GlobalFixture::world->gop.fence();
-
-    double local_norm_diff =
-        (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
-    local_norm_diff *= local_norm_diff;
-
-    double norm_diff;
-    MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
-                  MPI_COMM_WORLD);
-
-    norm_diff = std::sqrt(norm_diff);
-
-    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-    GlobalFixture::world->gop.fence();
-  };
-
-  test(std::get<0>(typevals));
-  test(std::get<1>(typevals));
-};
-
-BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) {
-  GlobalFixture::world->gop.fence();
-
-  auto [M, N] = ref_matrix.dims();
-  BOOST_REQUIRE_EQUAL(M, N);
-
-  auto NB = ref_matrix.dist().nb();
-
-  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
-
-  const TA::TArray<double> ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
-  GlobalFixture::world->gop.fence();
-
-  double local_norm_diff =
-      (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
-  local_norm_diff *= local_norm_diff;
-
-  double norm_diff;
-  MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
-                MPI_COMM_WORLD);
-
-  norm_diff = std::sqrt(norm_diff);
-
-  BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-};
-
-#endif  // TILEDARRAY_HAS_SCALAPACK
-
-BOOST_AUTO_TEST_CASE(heig_same_tiling) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  const auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto [evals, evecs] = non_dist::heig(ref_ta);
-  auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta);
-  // auto evals = heig( ref_ta );
-
-  BOOST_CHECK(evecs.trange() == ref_ta.trange());
-
-  // check eigenvectors against non_dist only, for now ...
-  decltype(evecs) evecs_error;
-  evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j");
-  // TODO need to fix phases of the eigenvectors to be able to compare ...
-  // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
-  //                  N * N * std::numeric_limits<double>::epsilon());
-
-  // Check eigenvalue correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i) {
-    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
-    BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol);
-  }
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(heig_diff_tiling) {
-  GlobalFixture::world->gop.fence();
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto new_trange = gen_trange(N, {64ul});
-  auto [evals, evecs] = non_dist::heig(ref_ta, new_trange);
-  auto [evals_non_dist, evecs_non_dist] = non_dist::heig(ref_ta, new_trange);
-
-  BOOST_CHECK(evecs.trange() == new_trange);
-
-  // check eigenvectors against non_dist only, for now ...
-  decltype(evecs) evecs_error;
-  evecs_error("i,j") = evecs_non_dist("i,j") - evecs("i,j");
-  // TODO need to fix phases of the eigenvectors to be able to compare ...
-  // BOOST_CHECK_SMALL(evecs_error("i,j").norm().get(),
-  //                  N * N * std::numeric_limits<double>::epsilon());
-
-  // Check eigenvalue correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i) {
-    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
-    BOOST_CHECK_SMALL(std::abs(evals_non_dist[i] - exact_evals[i]), tol);
-  }
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(heig_generalized) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TSpArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto dense_iden = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        t = TA::Tensor<double>(range, 0.0);
-        auto lo = range.lobound_data();
-        auto up = range.upbound_data();
-        for (auto m = lo[0]; m < up[0]; ++m)
-          for (auto n = lo[1]; n < up[1]; ++n)
-            if (m == n) t(m, n) = 1.;
-
-        return t.norm();
-      });
-
-  GlobalFixture::world->gop.fence();
-  auto [evals, evecs] = non_dist::heig(ref_ta, dense_iden);
-  // auto evals = heig( ref_ta );
-
-  BOOST_CHECK(evecs.trange() == ref_ta.trange());
-
-  // TODO: Check validity of eigenvectors, not crucial for the time being
-
-  // Check eigenvalue correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i)
-    BOOST_CHECK_SMALL(std::abs(evals[i] - exact_evals[i]), tol);
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(cholesky) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto A = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto L = non_dist::cholesky(A);
-
-  BOOST_CHECK(L.trange() == A.trange());
-
-  decltype(A) A_minus_LLt;
-  A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj();
-
-  const double epsilon = N * N * std::numeric_limits<double>::epsilon();
-
-  BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon);
-
-  // check against NON_DIST also
-  auto L_ref = non_dist::cholesky(A);
-  decltype(L) L_diff;
-  L_diff("i,j") = L("i,j") - L_ref("i,j");
-
-  BOOST_CHECK_SMALL(L_diff("i,j").norm().get(), epsilon);
-
-  TILEDARRAY_SCALAPACK_TEST(cholesky(A), epsilon);
-
-  TILEDARRAY_TTG_TEST(cholesky(A), epsilon);
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_linv) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto A = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-  decltype(A) Acopy = A.clone();
-
-  auto Linv = TA::cholesky_linv(A);
-
-  BOOST_CHECK(Linv.trange() == A.trange());
-
-  TA::TArray<double> tmp(*GlobalFixture::world, trange);
-  tmp("i,j") = Linv("i,k") * A("k,j");
-  A("i,j") = tmp("i,k") * Linv("j,k");
-
-  TA::foreach_inplace(A, [](TA::Tensor<double>& tile) {
-    auto range = tile.range();
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m)
-      for (auto n = lo[1]; n < up[1]; ++n)
-        if (m == n) {
-          tile(m, n) -= 1.;
-        }
-  });
-
-  double epsilon = N * N * std::numeric_limits<double>::epsilon();
-  double norm = A("i,j").norm().get();
-
-  BOOST_CHECK_SMALL(norm, epsilon);
-
-  TILEDARRAY_SCALAPACK_TEST(cholesky_linv<false>(Acopy), epsilon);
-
-  TILEDARRAY_TTG_TEST(cholesky_linv<false>(Acopy), epsilon);
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_linv_retl) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto A = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto [L, Linv] = TA::cholesky_linv<true>(A);
-
-  BOOST_CHECK(Linv.trange() == A.trange());
-  BOOST_CHECK(L.trange() == A.trange());
-
-  TA::TArray<double> tmp(*GlobalFixture::world, trange);
-  tmp("i,j") = Linv("i,k") * L("k,j");
-
-  TA::foreach_inplace(tmp, [](TA::Tensor<double>& tile) {
-    auto range = tile.range();
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m)
-      for (auto n = lo[1]; n < up[1]; ++n)
-        if (m == n) {
-          tile(m, n) -= 1.;
-        }
-  });
-
-  double epsilon = N * N * std::numeric_limits<double>::epsilon();
-  double norm = tmp("i,j").norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm, epsilon);
-
-  TILEDARRAY_SCALAPACK_TEST(cholesky_linv<true>(A), epsilon);
-
-  TILEDARRAY_TTG_TEST(cholesky_linv<true>(A), epsilon);
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_solve) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto A = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto iden = non_dist::cholesky_solve(A, A);
-  BOOST_CHECK(iden.trange() == A.trange());
-
-  auto iden_non_dist = non_dist::cholesky_solve(A, A);
-  decltype(iden) iden_error;
-  iden_error("i,j") = iden("i,j") - iden_non_dist("i,j");
-  BOOST_CHECK_SMALL(iden_error("i,j").norm().get(),
-                    N * N * std::numeric_limits<double>::epsilon());
-
-  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
-    auto range = tile.range();
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m)
-      for (auto n = lo[1]; n < up[1]; ++n)
-        if (m == n) {
-          tile(m, n) -= 1.;
-        }
-  });
-
-  double norm = iden("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_lsolve) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto A = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  // Should produce X = L**H
-  auto [L, X] = non_dist::cholesky_lsolve(TA::NoTranspose, A, A);
-  BOOST_CHECK(X.trange() == A.trange());
-  BOOST_CHECK(L.trange() == A.trange());
-
-  // first, test against NON_DIST
-  auto [L_non_dist, X_non_dist] =
-      non_dist::cholesky_lsolve(TA::NoTranspose, A, A);
-  decltype(L) L_error;
-  L_error("i,j") = L("i,j") - L_non_dist("i,j");
-  BOOST_CHECK_SMALL(L_error("i,j").norm().get(),
-                    N * N * std::numeric_limits<double>::epsilon());
-  decltype(X) X_error;
-  X_error("i,j") = X("i,j") - X_non_dist("i,j");
-  BOOST_CHECK_SMALL(X_error("i,j").norm().get(),
-                    N * N * std::numeric_limits<double>::epsilon());
-
-  X("i,j") -= L("j,i");
-
-  double norm = X("i,j").norm(*GlobalFixture::world).get();
-  BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits<double>::epsilon());
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(lu_solve) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto iden = non_dist::lu_solve(ref_ta, ref_ta);
-
-  BOOST_CHECK(iden.trange() == ref_ta.trange());
-
-  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
-    auto range = tile.range();
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m)
-      for (auto n = lo[1]; n < up[1]; ++n)
-        if (m == n) {
-          tile(m, n) -= 1.;
-        }
-  });
-
-  double epsilon = N * N * std::numeric_limits<double>::epsilon();
-  double norm = iden("i,j").norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm, epsilon);
-  TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon);
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(lu_inv) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  TA::TArray<double> iden(*GlobalFixture::world, trange);
-
-  auto Ainv = non_dist::lu_inv(ref_ta);
-  iden("i,j") = Ainv("i,k") * ref_ta("k,j");
-
-  BOOST_CHECK(iden.trange() == ref_ta.trange());
-
-  TA::foreach_inplace(iden, [](TA::Tensor<double>& tile) {
-    auto range = tile.range();
-    auto lo = range.lobound_data();
-    auto up = range.upbound_data();
-    for (auto m = lo[0]; m < up[0]; ++m)
-      for (auto n = lo[1]; n < up[1]; ++n)
-        if (m == n) {
-          tile(m, n) -= 1.;
-        }
-  });
-
-  double epsilon = N * N * std::numeric_limits<double>::epsilon();
-  double norm = iden("i,j").norm(*GlobalFixture::world).get();
-
-  BOOST_CHECK_SMALL(norm, epsilon);
-  TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon);
-
-  GlobalFixture::world->gop.fence();
-}
-
-#if 1
-BOOST_AUTO_TEST_CASE(svd_values_only) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto S = non_dist::svd<TA::SVD::ValuesOnly>(ref_ta, trange, trange);
-
-  std::vector exact_singular_values = exact_evals;
-  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
-            std::greater<double>());
-
-  // Check singular value correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i)
-    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(svd_leftvectors) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto [S, U] = non_dist::svd<TA::SVD::LeftVectors>(ref_ta, trange, trange);
-
-  std::vector exact_singular_values = exact_evals;
-  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
-            std::greater<double>());
-
-  // Check singular value correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i)
-    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(svd_rightvectors) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto [S, VT] = non_dist::svd<TA::SVD::RightVectors>(ref_ta, trange, trange);
-
-  std::vector exact_singular_values = exact_evals;
-  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
-            std::greater<double>());
-
-  // Check singular value correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i)
-    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(svd_allvectors) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  auto [S, U, VT] = non_dist::svd<TA::SVD::AllVectors>(ref_ta, trange, trange);
-
-  std::vector exact_singular_values = exact_evals;
-  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
-            std::greater<double>());
-
-  // Check singular value correctness
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  for (int64_t i = 0; i < N; ++i)
-    BOOST_CHECK_SMALL(std::abs(S[i] - exact_singular_values[i]), tol);
-  GlobalFixture::world->gop.fence();
-}
-#endif
-
-template <bool use_scalapack, typename ArrayT>
-void householder_qr_q_only_test(const ArrayT& A, double tol) {
-  using value_type = typename ArrayT::element_type;
-
-#if TILEDARRAY_HAS_SCALAPACK
-  auto Q = use_scalapack ? scalapack::householder_qr<true>(A)
-                         : non_dist::householder_qr<true>(A);
-#else
-  static_assert(not use_scalapack);
-  auto Q = non_dist::householder_qr<true>(A);
-#endif
-
-  // Make sure the Q is orthogonal at least
-  TA::TArray<double> Iden;
-  Iden("i,j") = Q("k,i") * Q("k,j");
-  Iden.make_replicated();
-  auto I_eig = TA::array_to_eigen(Iden);
-  const auto N = A.trange().dim(1).extent();
-  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
-}
-
-template <bool use_scalapack, typename ArrayT>
-void householder_qr_test(const ArrayT& A, double tol) {
-#if TILEDARRAY_HAS_SCALAPACK
-  auto [Q, R] = use_scalapack ? scalapack::householder_qr<false>(A)
-                              : non_dist::householder_qr<false>(A);
-#else
-  static_assert(not use_scalapack);
-  auto [Q, R] = non_dist::householder_qr<false>(A);
-#endif
-
-  // Check reconstruction error
-  TA::TArray<double> QR_ERROR;
-  QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j");
-  BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol);
-
-  // Check orthonormality of Q
-  TA::TArray<double> Iden;
-  Iden("i,j") = Q("k,i") * Q("k,j");
-  Iden.make_replicated();
-  auto I_eig = TA::array_to_eigen(Iden);
-  const auto N = A.trange().dim(1).extent();
-  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
-}
-
-BOOST_AUTO_TEST_CASE(householder_qr_q_only) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  householder_qr_q_only_test<false>(ref_ta, tol);
-#if TILEDARRAY_HAS_SCALAPACK
-  householder_qr_q_only_test<true>(ref_ta, tol);
-#endif
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(householder_qr) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  householder_qr_test<false>(ref_ta, tol);
-#if TILEDARRAY_HAS_SCALAPACK
-  householder_qr_test<true>(ref_ta, tol);
-#endif
-
-  GlobalFixture::world->gop.fence();
-}
-
-template <typename ArrayT>
-void cholesky_qr_q_only_test(const ArrayT& A, double tol) {
-  using value_type = typename ArrayT::element_type;
-
-  auto Q = TiledArray::math::linalg::cholesky_qr<true>(A);
-
-  // Make sure the Q is orthogonal at least
-  TA::TArray<double> Iden;
-  Iden("i,j") = Q("k,i") * Q("k,j");
-  Iden.make_replicated();
-  auto I_eig = TA::array_to_eigen(Iden);
-  const auto N = A.trange().dim(1).extent();
-  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
-}
-
-template <typename ArrayT>
-void cholesky_qr_test(const ArrayT& A, double tol) {
-  auto [Q, R] = TiledArray::math::linalg::cholesky_qr<false>(A);
-
-  // Check reconstruction error
-  TA::TArray<double> QR_ERROR;
-  QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j");
-  BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol);
-
-  // Check orthonormality of Q
-  TA::TArray<double> Iden;
-  Iden("i,j") = Q("k,i") * Q("k,j");
-  Iden.make_replicated();
-  auto I_eig = TA::array_to_eigen(Iden);
-  const auto N = A.trange().dim(1).extent();
-  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  cholesky_qr_q_only_test(ref_ta, tol);
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_CASE(cholesky_qr) {
-  GlobalFixture::world->gop.fence();
-
-  auto trange = gen_trange(N, {128ul});
-
-  auto ref_ta = TA::make_array<TA::TArray<double>>(
-      *GlobalFixture::world, trange,
-      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
-        return this->make_ta_reference(t, range);
-      });
-
-  double tol = N * N * std::numeric_limits<double>::epsilon();
-  cholesky_qr_test(ref_ta, tol);
-
-  GlobalFixture::world->gop.fence();
-}
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/linalg/cholesky_tests.h b/tests/linalg/cholesky_tests.h
new file mode 100644
index 0000000000..7612a0a57a
--- /dev/null
+++ b/tests/linalg/cholesky_tests.h
@@ -0,0 +1,138 @@
+#pragma once
+#include "linalg_fixture.h"
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+#include "misc_util.h"         // Misc utilities
+
+// Cholesky (POTRF) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::cholesky_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto L = Derived::cholesky(A);
+
+  BOOST_CHECK(L.trange() == A.trange());
+
+  decltype(A) A_minus_LLt;
+  A_minus_LLt("i,j") = A("i,j") - L("i,k") * L("j,k").conj();
+
+  const double epsilon = N * N * std::numeric_limits<double>::epsilon();
+
+  BOOST_CHECK_SMALL(A_minus_LLt("i,j").norm().get(), epsilon);
+
+  world.gop.fence();
+}
+
+
+
+// Cholesky LINV (POTRF + TRTRI) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::cholesky_linv_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto Linv = Derived::template cholesky_linv<false>(A);
+  BOOST_CHECK(Linv.trange() == A.trange());
+
+  TA::TArray<double> tmp(world, trange);
+  tmp("i,j") = Linv("i,k") * A("k,j");
+  A("i,j") = tmp("i,k") * Linv("j,k");
+  subtract_identity_inplace(A); // A -= I
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = A("i,j").norm().get();
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
+
+
+
+// Cholesky LINV (POTRF + TRTRI) + L Return Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::cholesky_linv_retl_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto [L, Linv] = Derived::template cholesky_linv<true>(A);
+
+  BOOST_CHECK(Linv.trange() == A.trange());
+  BOOST_CHECK(L.trange() == A.trange());
+
+  TA::TArray<double> tmp(world, trange);
+  tmp("i,j") = Linv("i,k") * L("k,j");
+  subtract_identity_inplace(tmp); // tmp -= I
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = tmp("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
+
+
+
+// Cholesky Solve (POSV) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::cholesky_solve_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto iden = Derived::cholesky_solve(A, A);
+  BOOST_CHECK(iden.trange() == A.trange());
+  subtract_identity_inplace(iden); // iden -= I
+
+  const auto epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = iden("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
+
+
+
+// Cholesky L-Solve (POTRF + TRSM) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::cholesky_lsolve_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  // Should produce X = L**H
+  auto [L, X] = Derived::cholesky_lsolve(TA::NoTranspose, A, A);
+  BOOST_CHECK(X.trange() == A.trange());
+  BOOST_CHECK(L.trange() == A.trange());
+
+  X("i,j") -= L("j,i");
+
+  const auto epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = X("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
diff --git a/tests/linalg/compare_utilities.h b/tests/linalg/compare_utilities.h
new file mode 100644
index 0000000000..d16f7d842f
--- /dev/null
+++ b/tests/linalg/compare_utilities.h
@@ -0,0 +1,112 @@
+#pragma once
+#include <tiledarray.h>
+#include "unit_test_config.h"
+
+
+template <class A>
+static void compare_replicated_vector(const char* context, const A& S_nd, const A& S,
+                        double e) {
+  // clang-format off
+  BOOST_TEST_CONTEXT(context)
+  ;
+  // clang-format on
+
+  const size_t n = S.size();
+  BOOST_REQUIRE_EQUAL(n, S_nd.size());
+  for(size_t i = 0; i < n; ++i) {
+    BOOST_CHECK_SMALL(std::abs(S[i] - S_nd[i]), e);
+  }
+}
+
+template <class A>
+static void compare_subspace(const char* context, const A& non_dist, const A& result,
+                             double e) {
+
+  namespace TA = TiledArray;
+  // clang-format off
+  BOOST_TEST_CONTEXT(context)
+  ;
+  // clang-format on
+
+  auto nd_eigen = TA::array_to_eigen(non_dist);
+  auto rs_eigen = TA::array_to_eigen(result);
+
+  Eigen::MatrixXd G; G = nd_eigen.adjoint() * rs_eigen;
+  Eigen::MatrixXd G2; G2 = G.adjoint() * G; // Accounts for phase-flips
+  const auto n = G.rows();
+  auto G2_mI_nrm = (G2 - Eigen::MatrixXd::Identity(n,n)).norm();
+  BOOST_CHECK_SMALL(G2_mI_nrm, e);
+}
+
+template <class A>
+static void compare_eig(const char* context, const A& non_dist, const A& result,
+                        double e) {
+  // clang-format off
+  BOOST_TEST_CONTEXT(context)
+  ;
+  // clang-format on
+
+  auto [evals_nd, evecs_nd] = non_dist;
+  auto [evals,    evecs   ] = result;
+
+  compare_replicated_vector(context, evals_nd, evals, e);
+
+  // The test problem for the unit tests has a non-degenerate spectrum
+  // we only need to check for phase-flips in this check
+  evecs.make_replicated(); // Need to be replicated for Eigen conversion
+  evecs_nd.make_replicated();
+  compare_subspace(context, evecs_nd, evecs, e);
+}
+
+template <TA::SVD::Vectors Vectors, class A>
+static void compare_svd(const char* context, const A& non_dist, const A& result,
+                        double e) {
+  namespace TA = TiledArray;
+
+  // clang-format off
+  BOOST_TEST_CONTEXT(context)
+  ;
+  // clang-format on
+
+  if constexpr (Vectors == TA::SVD::ValuesOnly) {
+    compare_replicated_vector(context, non_dist, result, e);
+    return;
+  } else {
+    const auto& S = std::get<0>(result);
+    const auto& S_nd = std::get<0>(non_dist);
+    compare_replicated_vector(context, S_nd, S, e);
+  }
+
+}
+template <class A>
+static void compare(const char* context, const A& non_dist, const A& result,
+                    double e) {
+  // clang-format off
+  BOOST_TEST_CONTEXT(context)
+  ;
+  // clang-format on
+  auto diff_with_non_dist = (non_dist("i,j") - result("i,j")).norm().get();
+  BOOST_CHECK_SMALL(diff_with_non_dist, e);
+}
+
+template <typename T, typename F, int... Is>
+static void for_each_pair_of_tuples_impl(T&& t1, T&& t2, F f,
+                                         std::integer_sequence<int, Is...>) {
+  auto l = {(f(std::get<Is>(t1), std::get<Is>(t2)), 0)...};
+}
+
+template <typename... Ts, typename F>
+static void for_each_pair_of_tuples(std::tuple<Ts...> const& t1,
+                                    std::tuple<Ts...> const& t2, F f) {
+  for_each_pair_of_tuples_impl(
+      t1, t2, f, std::make_integer_sequence<int, sizeof...(Ts)>());
+}
+
+template <class... As>
+static void compare(const char* context, const std::tuple<As...>& non_dist,
+                    const std::tuple<As...>& result, double e) {
+  for_each_pair_of_tuples(non_dist, result, [&](auto& arg1, auto& arg2) {
+    compare(context, arg1, arg2, e);
+  });
+}
+
diff --git a/tests/linalg/gen_trange.h b/tests/linalg/gen_trange.h
new file mode 100644
index 0000000000..41bda9e796
--- /dev/null
+++ b/tests/linalg/gen_trange.h
@@ -0,0 +1,25 @@
+#pragma once
+#include <tiledarray.h>
+#include <random>
+
+inline TA::TiledRange gen_trange(size_t N, const std::vector<size_t>& TA_NBs) {
+  TA_ASSERT(TA_NBs.size() > 0);
+
+  std::default_random_engine gen(0);
+  std::uniform_int_distribution<> dist(0, TA_NBs.size() - 1);
+  auto rand_indx = [&]() { return dist(gen); };
+  auto rand_nb = [&]() { return TA_NBs[rand_indx()]; };
+
+  std::vector<size_t> t_boundaries = {0};
+  auto TA_NB = rand_nb();
+  while (t_boundaries.back() + TA_NB < N) {
+    t_boundaries.emplace_back(t_boundaries.back() + TA_NB);
+    TA_NB = rand_nb();
+  }
+  t_boundaries.emplace_back(N);
+
+  std::vector<TA::TiledRange1> ranges(
+      2, TA::TiledRange1(t_boundaries.begin(), t_boundaries.end()));
+
+  return TA::TiledRange(ranges.begin(), ranges.end());
+};
diff --git a/tests/linalg/heig_tests.h b/tests/linalg/heig_tests.h
new file mode 100644
index 0000000000..8a96f4d382
--- /dev/null
+++ b/tests/linalg/heig_tests.h
@@ -0,0 +1,108 @@
+#pragma once
+#include "linalg_fixture.h"
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+#include "misc_util.h"         // Misc utilities
+
+// HEIG Test - INPUT/OUTPUT have the same tiling
+template <typename Derived>
+void ReferenceFixture<Derived>::heig_same_tiling_test(TA::World& world) {
+  world.gop.fence(); // Start epoch
+
+  auto trange = gen_trange(N, {128ul});
+  std::cout << "TRANGE = " << trange << std::endl;
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  // Solve EVP
+  auto [evals, evecs] = Derived::heig(A);
+  BOOST_CHECK(evecs.trange() == A.trange()); // Check for correct trange
+
+  // Check eigenvalue correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol);
+
+  // Check eigenvectors
+  TA::TArray<double> tmp;
+  tmp("i,j") = A("i,k") * evecs("k,j");
+  A("i,j")   = evecs("k,i").conj() * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A, evals);
+
+  const auto norm = A("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+  world.gop.fence(); // End epoch
+}
+
+
+
+// HEIG Test - INPUT/OUTPUT have different tilings
+template <typename Derived>
+void ReferenceFixture<Derived>::heig_diff_tiling_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange     = gen_trange(N, {128ul});
+  auto new_trange = gen_trange(N, {64ul} );
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(*GlobalFixture::world, trange);
+  auto A_new = generate_ta_reference<array_type>(*GlobalFixture::world, new_trange);
+
+  // Solve EVP
+  auto [evals, evecs] = Derived::heig(A, new_trange);
+  BOOST_CHECK(evecs.trange() == new_trange);
+
+  // Check eigenvalue correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol);
+
+  // Check eigenvectors
+  TA::TArray<double> tmp;
+  tmp("i,j")   = A_new("i,k") * evecs("k,j");
+  A_new("i,j") = evecs("k,i").conj() * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A_new, evals);
+
+  const auto norm = A_new("i,j").norm(*GlobalFixture::world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+  
+  GlobalFixture::world->gop.fence();
+}
+
+
+
+// Generalized HEIG Test
+template <typename Derived>
+void ReferenceFixture<Derived>::heig_generalized_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  // Generate Identity Tensor in TA 
+  auto dense_iden = generate_ta_identity<array_type>(world, trange);
+
+  // Solve EVP
+  auto [evals, evecs] = Derived::heig(A, dense_iden);
+  BOOST_CHECK(evecs.trange() == A.trange());
+
+  // Check eigenvalue correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::non_dist", exact_evals, evals, tol);
+
+  // Check eigenvectors
+  TA::TArray<double> tmp;
+  tmp("i,j") = A("i,k") * evecs("k,j");
+  A("i,j")   = evecs("k,i").conj() * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A, evals);
+
+  const auto norm = A("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+  world.gop.fence();
+}
diff --git a/tests/linalg/linalg.cpp b/tests/linalg/linalg.cpp
new file mode 100644
index 0000000000..dbb379037e
--- /dev/null
+++ b/tests/linalg/linalg.cpp
@@ -0,0 +1,147 @@
+#include <tiledarray.h>
+#include <random>
+#include "TiledArray/config.h"
+//#include "range_fixture.h"
+#include "unit_test_config.h"
+
+#include "linalg_fixture.h"    // ReferenceFixture
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+
+#include "TiledArray/math/linalg/non-distributed/cholesky.h"
+#include "TiledArray/math/linalg/non-distributed/heig.h"
+#include "TiledArray/math/linalg/non-distributed/lu.h"
+#include "TiledArray/math/linalg/non-distributed/svd.h"
+
+#include "TiledArray/math/linalg/cholesky.h"
+#include "TiledArray/math/linalg/heig.h"
+#include "TiledArray/math/linalg/lu.h"
+#include "TiledArray/math/linalg/svd.h"
+
+namespace TA = TiledArray;
+namespace non_dist = TA::math::linalg::non_distributed;
+
+#if TILEDARRAY_HAS_TTG
+#include "TiledArray/math/linalg/ttg/cholesky.h"
+#define TILEDARRAY_TTG_TEST(F, E)    \
+  GlobalFixture::world->gop.fence(); \
+  compare("TiledArray::ttg", non_dist::F, TiledArray::math::linalg::ttg::F, E);
+#else
+#define TILEDARRAY_TTG_TEST(...)
+#endif
+
+
+struct LinearAlgebraFixture : ReferenceFixture<> { };
+
+
+BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture)
+
+#if TILEDARRAY_HAS_TTG
+BOOST_AUTO_TEST_CASE(cholesky) {
+  GlobalFixture::world->gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(*GlobalFixture::world, trange);
+
+  const double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  TILEDARRAY_TTG_TEST(cholesky(A), epsilon);
+  GlobalFixture::world->gop.fence();
+}
+
+BOOST_AUTO_TEST_CASE(cholesky_linv) {
+  GlobalFixture::world->gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(*GlobalFixture::world, trange);
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  TILEDARRAY_TTG_TEST(cholesky_linv<false>(A), epsilon);
+  GlobalFixture::world->gop.fence();
+}
+
+BOOST_AUTO_TEST_CASE(cholesky_linv_retl) {
+  GlobalFixture::world->gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(*GlobalFixture::world, trange);
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  TILEDARRAY_TTG_TEST(cholesky_linv<true>(A), epsilon);
+  GlobalFixture::world->gop.fence();
+}
+#endif
+
+template <typename ArrayT>
+void cholesky_qr_q_only_test(const ArrayT& A, double tol) {
+  using value_type = typename ArrayT::element_type;
+
+  auto Q = TiledArray::math::linalg::cholesky_qr<true>(A);
+
+  // Make sure the Q is orthogonal at least
+  TA::TArray<double> Iden;
+  Iden("i,j") = Q("k,i") * Q("k,j");
+  Iden.make_replicated();
+  auto I_eig = TA::array_to_eigen(Iden);
+  const auto N = A.trange().dim(1).extent();
+  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
+}
+
+template <typename ArrayT>
+void cholesky_qr_test(const ArrayT& A, double tol) {
+  auto [Q, R] = TiledArray::math::linalg::cholesky_qr<false>(A);
+
+  // Check reconstruction error
+  TA::TArray<double> QR_ERROR;
+  QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j");
+  BOOST_CHECK_SMALL(QR_ERROR("i,j").norm().get(), tol);
+
+  // Check orthonormality of Q
+  TA::TArray<double> Iden;
+  Iden("i,j") = Q("k,i") * Q("k,j");
+  Iden.make_replicated();
+  auto I_eig = TA::array_to_eigen(Iden);
+  const auto N = A.trange().dim(1).extent();
+  BOOST_CHECK_SMALL((I_eig - decltype(I_eig)::Identity(N, N)).norm(), tol);
+}
+
+BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) {
+  GlobalFixture::world->gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
+      *GlobalFixture::world, trange,
+      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
+        return this->make_ta_reference(t, range);
+      });
+
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  cholesky_qr_q_only_test(ref_ta, tol);
+
+  GlobalFixture::world->gop.fence();
+}
+
+BOOST_AUTO_TEST_CASE(cholesky_qr) {
+  GlobalFixture::world->gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  auto ref_ta = TA::make_array<TA::TArray<double>>(
+      *GlobalFixture::world, trange,
+      [this](TA::Tensor<double>& t, TA::Range const& range) -> double {
+        return this->make_ta_reference(t, range);
+      });
+
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  cholesky_qr_test(ref_ta, tol);
+
+  GlobalFixture::world->gop.fence();
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/linalg/linalg_fixture.h b/tests/linalg/linalg_fixture.h
new file mode 100644
index 0000000000..3578783cd8
--- /dev/null
+++ b/tests/linalg/linalg_fixture.h
@@ -0,0 +1,120 @@
+#pragma once
+#include <tiledarray.h>
+
+template <typename Derived = void>
+struct ReferenceFixture {
+  size_t N;
+  std::vector<double> htoeplitz_vector;
+  std::vector<double> exact_evals;
+
+  inline double matrix_element_generator(int64_t i, int64_t j) {
+#if 0
+    // Generates a Hankel matrix: absurd condition number
+    return i+j;
+#else
+    // Generates a Circulant matrix: good condition number
+    return htoeplitz_vector[std::abs(i - j)];
+#endif
+  }
+
+  template <typename Tile>
+  inline auto make_ta_reference(Tile& t, TA::Range const& range) {
+    t = Tile(range, 0.0);
+    auto lo = range.lobound_data();
+    auto up = range.upbound_data();
+    for (auto m = lo[0]; m < up[0]; ++m) {
+      for (auto n = lo[1]; n < up[1]; ++n) {
+        t(m, n) = matrix_element_generator(m, n);
+      }
+    }
+
+    return norm(t);
+  };
+
+  template <typename Array>
+  inline auto generate_ta_reference(TA::World& world, TA::TiledRange trange) {
+    return TA::make_array<Array>(world, trange,
+      [this](auto& t, TA::Range const& range) -> auto { 
+        return this->make_ta_reference(t,range);
+      });
+  }
+
+  template <typename Tile>
+  inline double make_ta_identity(Tile& t, TA::Range const& range) {
+    t = Tile(range, 0.0);
+    auto lo = range.lobound_data();
+    auto up = range.upbound_data();
+    for (auto m = lo[0]; m < up[0]; ++m)
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) t(m, n) = 1.;
+
+    return t.norm();
+  }
+
+  template <typename Array>
+  inline auto generate_ta_identity(TA::World& world, TA::TiledRange trange) {
+    return TA::make_array<Array>(world, trange,
+      [this](auto& t, TA::Range const& range) -> auto { 
+        return this->make_ta_identity(t,range);
+      });
+  }
+
+  ReferenceFixture(int64_t N = 1000)
+      : N(N), htoeplitz_vector(N), exact_evals(N) {
+    // Generate an hermitian Circulant vector
+    std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0);
+    htoeplitz_vector[0] = 100;
+    std::default_random_engine gen(0);
+    std::uniform_real_distribution<> dist(0., 1.);
+    for (int64_t i = 1; i <= (N / 2); ++i) {
+      double val = dist(gen);
+      htoeplitz_vector[i] = val;
+      htoeplitz_vector[N - i] = val;
+    }
+
+    // Compute exact eigenvalues
+    const double ff = 2. * M_PI / N;
+    for (int64_t j = 0; j < N; ++j) {
+      double val = htoeplitz_vector[0];
+      ;
+      for (int64_t k = 1; k < N; ++k)
+        val += htoeplitz_vector[N - k] * std::cos(ff * j * k);
+      exact_evals[j] = val;
+    }
+
+    std::sort(exact_evals.begin(), exact_evals.end());
+  }
+
+
+  void heig_same_tiling_test(TA::World& world); 
+  void heig_diff_tiling_test(TA::World& world); 
+  void heig_generalized_test(TA::World& world); 
+
+  void cholesky_test(TA::World& world); 
+  void cholesky_linv_test(TA::World& world); 
+  void cholesky_linv_retl_test(TA::World& world); 
+  void cholesky_solve_test(TA::World& world); 
+  void cholesky_lsolve_test(TA::World& world); 
+
+  void lu_solve_test(TA::World& world); 
+  void lu_inv_test(TA::World& world); 
+
+  void svd_values_only_test(TA::World& world);
+  void svd_leftvectors_test(TA::World& world);
+  void svd_rightvectors_test(TA::World& world);
+  void svd_allvectors_test(TA::World& world);
+
+  void householder_qr_q_only_test(TA::World& world);
+  void householder_qr_test(TA::World& world);
+};
+
+// Macro to generate tests
+#define LINALG_TEST_IMPL_MPI_SAFE(NAME, SERIAL_ONLY)    \
+BOOST_AUTO_TEST_CASE(NAME) {                            \
+  const auto world_size = GlobalFixture::world->size(); \
+  if(SERIAL_ONLY and  world_size > 1) return;           \
+  NAME##_##test(*GlobalFixture::world);                 \
+}
+
+#define LINALG_TEST_IMPL(NAME) LINALG_TEST_IMPL_MPI_SAFE(NAME,false)
+
diff --git a/tests/linalg/lu_tests.h b/tests/linalg/lu_tests.h
new file mode 100644
index 0000000000..b05dc701db
--- /dev/null
+++ b/tests/linalg/lu_tests.h
@@ -0,0 +1,56 @@
+#pragma once
+#include "linalg_fixture.h"
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+#include "misc_util.h"         // Misc utilities
+
+// LU Solve (GESV) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::lu_solve_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto iden = Derived::lu_solve(A, A);
+  BOOST_CHECK(iden.trange() == A.trange());
+  subtract_identity_inplace(iden); // iden -= I
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = iden("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
+
+
+
+// LU Inverse (GETRF + GETRI) Test 
+template <typename Derived>
+void ReferenceFixture<Derived>::lu_inv_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  TA::TArray<double> iden(world, trange);
+
+  auto Ainv = Derived::lu_inv(A);
+  iden("i,j") = Ainv("i,k") * A("k,j");
+
+  BOOST_CHECK(iden.trange() == A.trange());
+  subtract_identity_inplace(iden); // iden -= I
+
+  double epsilon = N * N * std::numeric_limits<double>::epsilon();
+  double norm = iden("i,j").norm(world).get();
+
+  BOOST_CHECK_SMALL(norm, epsilon);
+
+  world.gop.fence();
+}
diff --git a/tests/linalg/misc_util.h b/tests/linalg/misc_util.h
new file mode 100644
index 0000000000..e6d8c673dc
--- /dev/null
+++ b/tests/linalg/misc_util.h
@@ -0,0 +1,52 @@
+#pragma once
+#include <tiledarray.h>
+
+template <typename Array, typename ReplicatedDiag>
+void subtract_diagonal_tensor_inplace(Array& A, const ReplicatedDiag& D) {
+
+  TiledArray::foreach_inplace( A, [=](auto& tile) {
+    auto range = tile.range();
+    auto lo = range.lobound_data();
+    auto up = range.upbound_data();
+    for (auto m = lo[0]; m < up[0]; ++m)
+      for (auto n = lo[1]; n < up[1]; ++n)
+        if (m == n) { tile(m, n) -= D[m]; }
+  });
+
+}
+
+template <typename Array>
+void subtract_identity_inplace(Array& A) {
+  using element_type = typename Array::element_type;
+  const auto M = A.trange().dim(0).extent();
+  const auto N = A.trange().dim(1).extent();
+  BOOST_CHECK(M == N);
+  std::vector<element_type> D(N,1.0);
+  subtract_diagonal_tensor_inplace(A, D);
+}
+
+template <typename Array, typename ReplicatedDiag>
+void multiply_tensor_by_diag_inplace(char SIDE, Array& A, const ReplicatedDiag& D) {
+
+  TiledArray::foreach_inplace( A, [=](auto& tile) {
+    auto range = tile.range();
+    auto lo = range.lobound_data();
+    auto up = range.upbound_data();
+    // A(i,j) = D(i,i) * A(i,j)
+    if(SIDE == 'L') {
+      for (auto m = lo[0]; m < up[0]; ++m) {
+        const auto d = D[m];
+        for (auto n = lo[1]; n < up[1]; ++n) {
+          tile(m, n) *= d;
+        }
+      }
+    // A(i,j) = A(i,j) * D(j,j)
+    } else {
+      for (auto m = lo[0]; m < up[0]; ++m)
+        for (auto n = lo[1]; n < up[1]; ++n) {
+          tile(m, n) *= D[n];
+        }
+    }
+  });
+
+}
diff --git a/tests/linalg/non_dist.cpp b/tests/linalg/non_dist.cpp
new file mode 100644
index 0000000000..1dcb0344fb
--- /dev/null
+++ b/tests/linalg/non_dist.cpp
@@ -0,0 +1,96 @@
+#include "heig_tests.h"     // EVP tests
+#include "cholesky_tests.h" // Cholesky tests
+#include "lu_tests.h"       // LU tests
+#include "svd_tests.h"      // SVD tests
+#include "qr_tests.h"       // QR tests
+
+// Non-distributed linear algebra utilities
+#include "TiledArray/math/linalg/non-distributed/cholesky.h"
+#include "TiledArray/math/linalg/non-distributed/heig.h"
+#include "TiledArray/math/linalg/non-distributed/lu.h"
+#include "TiledArray/math/linalg/non-distributed/svd.h"
+
+namespace TA = TiledArray;
+namespace non_dist = TA::math::linalg::non_distributed;
+
+struct NonDistLinearAlgebraFixture : ReferenceFixture<NonDistLinearAlgebraFixture> {
+
+  NonDistLinearAlgebraFixture(int64_t N = 1000) : 
+    ReferenceFixture<NonDistLinearAlgebraFixture>(N) {}
+
+  template <typename... Args>
+  static auto heig(Args&&... args) { 
+    return non_dist::heig(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky(Args&&... args) { 
+    return non_dist::cholesky(std::forward<Args>(args)...); 
+  }
+
+  template <bool RetL, typename... Args>
+  static auto cholesky_linv(Args&&... args) { 
+    return non_dist::cholesky_linv<RetL>(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_solve(Args&&... args) { 
+    return non_dist::cholesky_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_lsolve(Args&&... args) { 
+    return non_dist::cholesky_lsolve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_solve(Args&&... args) { 
+    return non_dist::lu_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_inv(Args&&... args) { 
+    return non_dist::lu_inv(std::forward<Args>(args)...); 
+  }
+
+  template<TA::SVD::Vectors Vectors, typename... Args>
+  static auto svd(Args&&... args) {
+    return non_dist::svd<Vectors>(std::forward<Args>(args)...);
+  }
+
+  template <bool QOnly, typename... Args>
+  static auto householder_qr(Args&&... args) { 
+    return non_dist::householder_qr<QOnly>(std::forward<Args>(args)...); 
+  }
+};
+
+
+BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_non_dist, NonDistLinearAlgebraFixture)
+
+// HEIG tests
+LINALG_TEST_IMPL(heig_same_tiling);
+LINALG_TEST_IMPL(heig_diff_tiling);
+LINALG_TEST_IMPL(heig_generalized);
+
+// Cholesky tests
+LINALG_TEST_IMPL(cholesky);
+LINALG_TEST_IMPL(cholesky_linv);
+LINALG_TEST_IMPL(cholesky_linv_retl);
+LINALG_TEST_IMPL(cholesky_solve);
+LINALG_TEST_IMPL(cholesky_lsolve);
+
+// LU tests
+LINALG_TEST_IMPL(lu_solve);
+LINALG_TEST_IMPL(lu_inv);
+
+// SVD tests
+LINALG_TEST_IMPL(svd_values_only);
+LINALG_TEST_IMPL(svd_leftvectors);
+LINALG_TEST_IMPL(svd_rightvectors);
+LINALG_TEST_IMPL(svd_allvectors);
+
+// QR tests
+LINALG_TEST_IMPL(householder_qr_q_only);
+LINALG_TEST_IMPL(householder_qr);
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/linalg/qr_tests.h b/tests/linalg/qr_tests.h
new file mode 100644
index 0000000000..7f1adf43b6
--- /dev/null
+++ b/tests/linalg/qr_tests.h
@@ -0,0 +1,61 @@
+
+#pragma once
+#include "linalg_fixture.h"
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+#include "misc_util.h"         // Misc utilities
+
+
+template <typename Derived>
+void ReferenceFixture<Derived>::householder_qr_q_only_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+  
+  // Compute Q
+  auto Q = Derived::template householder_qr<true>(A);
+
+  // Make sure the Q is orthogonal at least
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  TA::TArray<double> Iden;
+  Iden("i,j") = Q("k,i") * Q("k,j");
+  subtract_identity_inplace(Iden);
+  const auto norm = Iden("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+  world.gop.fence();
+}
+
+template <typename Derived>
+void ReferenceFixture<Derived>::householder_qr_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+  
+  // Compute QR
+  auto [Q, R] = Derived::template householder_qr<false>(A);
+
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+
+  // Check reconstruction error
+  TA::TArray<double> QR_ERROR;
+  QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j");
+  BOOST_CHECK_SMALL(QR_ERROR("i,j").norm(world).get(), tol);
+
+  // Check orthonormality of Q
+  TA::TArray<double> Iden;
+  Iden("i,j") = Q("k,i") * Q("k,j");
+  subtract_identity_inplace(Iden);
+  const auto norm = Iden("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+  world.gop.fence();
+}
diff --git a/tests/linalg/scalapack.cpp b/tests/linalg/scalapack.cpp
new file mode 100644
index 0000000000..618890771f
--- /dev/null
+++ b/tests/linalg/scalapack.cpp
@@ -0,0 +1,244 @@
+#include "heig_tests.h"     // EVP tests
+#include "cholesky_tests.h" // Cholesky tests
+#include "lu_tests.h"       // LU tests
+#include "svd_tests.h"      // SVD tests
+#include "qr_tests.h"       // QR tests
+
+// ScaLAPACK linear algebra utilities
+#include "TiledArray/math/linalg/scalapack/all.h"
+
+namespace TA = TiledArray;
+namespace scalapack = TA::math::linalg::scalapack;
+
+struct ScaLAPACKLinearAlgebraFixture : 
+  ReferenceFixture<ScaLAPACKLinearAlgebraFixture> {
+
+  blacspp::Grid grid;
+  scalapack::BlockCyclicMatrix<double> ref_matrix;  // XXX: Just double is fine?
+
+  ScaLAPACKLinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : 
+    ReferenceFixture<ScaLAPACKLinearAlgebraFixture>(N),
+    grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)),  // XXX: Is this safe?
+    ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) {
+
+    for (size_t i = 0; i < N; ++i) {
+      for (size_t j = 0; j < N; ++j) {
+        if (ref_matrix.dist().i_own(i, j)) {
+          auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j);
+          ref_matrix.local_mat()(i_local, j_local) =
+              matrix_element_generator(i, j);
+        }
+      }
+    }
+
+  }
+
+  template <typename... Args>
+  static auto heig(Args&&... args) { 
+    return scalapack::heig(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky(Args&&... args) { 
+    return scalapack::cholesky(std::forward<Args>(args)...); 
+  }
+
+  template <bool RetL, typename... Args>
+  static auto cholesky_linv(Args&&... args) { 
+    return scalapack::cholesky_linv<RetL>(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_solve(Args&&... args) { 
+    return scalapack::cholesky_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_lsolve(Args&&... args) { 
+    return scalapack::cholesky_lsolve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_solve(Args&&... args) { 
+    return scalapack::lu_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_inv(Args&&... args) { 
+    return scalapack::lu_inv(std::forward<Args>(args)...); 
+  }
+
+  template<TA::SVD::Vectors Vectors, typename... Args>
+  static auto svd(Args&&... args) {
+    return scalapack::svd<Vectors>(std::forward<Args>(args)...);
+  }
+
+  template <bool QOnly, typename... Args>
+  static auto householder_qr(Args&&... args) { 
+    return scalapack::householder_qr<QOnly>(std::forward<Args>(args)...); 
+  }
+
+
+
+
+  template <typename Array>
+  void block_cyclic_to_tiled_array_test(TA::TiledRange& trange, TA::World& world) {
+
+    world.gop.fence();
+
+    // Generate Reference Tensor
+    auto ref_ta = generate_ta_reference<Array>(world, trange);
+    world.gop.fence();
+
+    // Convert reference matrix to Tensor
+    auto test_ta =
+        scalapack::block_cyclic_to_array<Array>(ref_matrix, trange);
+    world.gop.fence();
+
+    auto norm_diff =
+        (ref_ta("i,j") - test_ta("i,j")).norm(world).get();
+
+    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
+
+    world.gop.fence();
+
+  }
+
+  template <typename Array>
+  void tiled_array_to_block_cyclic_test(TA::TiledRange& trange, TA::World& world) {
+
+    world.gop.fence();
+
+    // Generate Reference Tensor
+    auto ref_ta = generate_ta_reference<Array>(world, trange);
+    world.gop.fence();
+
+    // Convert reference tensor to matrix
+    auto NB = ref_matrix.dist().nb();
+    auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB);
+    world.gop.fence();
+
+    double local_norm_diff =
+        (test_matrix.local_mat() - ref_matrix.local_mat()).norm();
+    local_norm_diff *= local_norm_diff;
+
+    double norm_diff;
+    MPI_Allreduce(&local_norm_diff, &norm_diff, 1, MPI_DOUBLE, MPI_SUM,
+                  MPI_COMM_WORLD);
+
+    norm_diff = std::sqrt(norm_diff);
+
+    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
+
+    world.gop.fence();
+
+  }
+
+};
+
+
+BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_scalapack, ScaLAPACKLinearAlgebraFixture)
+
+using ta_test_types = boost::mpl::list<
+    TA::DistArray<TA::Tensor<double>,              TA::DensePolicy>,
+    TA::DistArray<btas::Tensor<double, TA::Range>, TA::DensePolicy>,
+    TA::DistArray<TA::Tensor<double>,              TA::SparsePolicy>,
+    TA::DistArray<btas::Tensor<double, TA::Range>, TA::SparsePolicy>
+>;
+
+// ScaLAPACK -> TA, tilings equal
+BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_equal, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  auto NB     = ref_matrix.dist().nb();
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
+  block_cyclic_to_tiled_array_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// ScaLAPACK -> TA, tiled range smaller than NB
+BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_all_small, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  auto NB     = ref_matrix.dist().nb();
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {static_cast<size_t>(NB/2)});
+  block_cyclic_to_tiled_array_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// ScaLAPACK -> TA, random tiling
+BOOST_AUTO_TEST_CASE_TEMPLATE(block_cyclic_to_tiled_array_random, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
+  block_cyclic_to_tiled_array_test<array_type>(trange, *GlobalFixture::world);
+};
+
+
+
+// TA -> ScaLAPACK: tilings equal
+BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_equal, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  auto NB     = ref_matrix.dist().nb();
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {static_cast<size_t>(NB)});
+  tiled_array_to_block_cyclic_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// TA -> ScaLAPACK, tiled range smaller than NB
+BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_all_small, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  auto NB     = ref_matrix.dist().nb();
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {static_cast<size_t>(NB/2)});
+  tiled_array_to_block_cyclic_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// TA -> ScaLAPACK, random tiling
+BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_block_cyclic_random, array_type, ta_test_types) {
+  auto [M, N] = ref_matrix.dims(); 
+  BOOST_REQUIRE_EQUAL(M,N); // TiledRangeRange only for square
+
+  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
+  tiled_array_to_block_cyclic_test<array_type>(trange, *GlobalFixture::world);
+};
+
+
+BOOST_AUTO_TEST_CASE(const_tiled_array_to_block_cyclic) {
+  // Just check that it compiles, meat is tested elsewhere
+  using array_type = const TA::TArray<double>;
+  using my_t = decltype(scalapack::array_to_block_cyclic(std::declval<array_type>(), std::declval<blacspp::Grid>(), std::declval<int64_t>(), std::declval<int64_t>()));
+  constexpr auto my_bool = std::is_same_v<my_t,scalapack::BlockCyclicMatrix<double>>;
+  BOOST_REQUIRE(my_bool);
+};
+
+// HEIG tests
+LINALG_TEST_IMPL(heig_same_tiling);
+LINALG_TEST_IMPL(heig_diff_tiling);
+LINALG_TEST_IMPL(heig_generalized);
+
+// Cholesky tests
+LINALG_TEST_IMPL(cholesky);
+LINALG_TEST_IMPL(cholesky_linv);
+LINALG_TEST_IMPL(cholesky_linv_retl);
+LINALG_TEST_IMPL(cholesky_solve);
+LINALG_TEST_IMPL(cholesky_lsolve);
+
+// LU tests
+LINALG_TEST_IMPL(lu_solve);
+LINALG_TEST_IMPL(lu_inv);
+
+// SVD tests
+LINALG_TEST_IMPL(svd_values_only);
+LINALG_TEST_IMPL(svd_leftvectors);
+LINALG_TEST_IMPL(svd_rightvectors);
+LINALG_TEST_IMPL(svd_allvectors);
+
+// QR tests
+LINALG_TEST_IMPL(householder_qr_q_only);
+LINALG_TEST_IMPL(householder_qr);
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/linalg/slate.cpp b/tests/linalg/slate.cpp
new file mode 100644
index 0000000000..d58a81b6cd
--- /dev/null
+++ b/tests/linalg/slate.cpp
@@ -0,0 +1,213 @@
+#include "heig_tests.h"     // EVP tests
+#include "cholesky_tests.h" // Cholesky tests
+#include "lu_tests.h"       // LU tests
+#include "svd_tests.h"      // SVD tests
+#include "qr_tests.h"       // QR tests
+
+// SLATE linear algebra utilities
+#include <TiledArray/conversions/slate.h>
+#include <TiledArray/math/linalg/slate/cholesky.h>
+#include <TiledArray/math/linalg/slate/lu.h>
+#include <TiledArray/math/linalg/slate/heig.h>
+#include <TiledArray/math/linalg/slate/svd.h>
+#include <TiledArray/math/linalg/slate/qr.h>
+
+namespace TA = TiledArray;
+namespace slate_la = TA::math::linalg::slate;
+
+struct SLATELinearAlgebraFixture : 
+  ReferenceFixture<SLATELinearAlgebraFixture> {
+
+  SLATELinearAlgebraFixture(int64_t N = 1000) : 
+    ReferenceFixture<SLATELinearAlgebraFixture>(N) {}
+
+  slate::Matrix<double> make_ref_slate(int64_t N, TA::SlateFunctors& slate_functors,
+    MPI_Comm comm) {
+
+    slate::Matrix<double> A(N, N, slate_functors.tileMb(), slate_functors.tileNb(), 
+      slate_functors.tileRank(), slate_functors.tileDevice(), comm);
+    
+    A.insertLocalTiles();
+    int64_t j_off = 0;
+    for (int64_t j = 0; j < A.nt(); ++j) {
+
+      int64_t i_off = 0;
+      for (int64_t i = 0; i < A.mt(); ++i) {
+
+        if(A.tileIsLocal(i,j)) {
+          auto T = A(i,j);
+          for(auto jj = 0; jj < T.nb(); ++jj)
+          for(auto ii = 0; ii < T.mb(); ++ii) {
+            T.at(ii,jj) = matrix_element_generator(i_off+ii,j_off+jj);
+          }
+        }
+
+        i_off += A.tileMbFunc()(i);
+      }
+
+      j_off += A.tileNbFunc()(j);
+    }
+
+    return A;
+  }
+
+  template <typename... Args>
+  static auto heig(Args&&... args) { 
+    return slate_la::heig(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky(Args&&... args) { 
+    return slate_la::cholesky(std::forward<Args>(args)...); 
+  }
+
+  template <bool RetL, typename... Args>
+  static auto cholesky_linv(Args&&... args) { 
+    return slate_la::cholesky_linv<RetL>(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_solve(Args&&... args) { 
+    return slate_la::cholesky_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto cholesky_lsolve(Args&&... args) { 
+    return slate_la::cholesky_lsolve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_solve(Args&&... args) { 
+    return slate_la::lu_solve(std::forward<Args>(args)...); 
+  }
+
+  template <typename... Args>
+  static auto lu_inv(Args&&... args) { 
+    return slate_la::lu_inv(std::forward<Args>(args)...); 
+  }
+
+  template<TA::SVD::Vectors Vectors, typename... Args>
+  static auto svd(Args&&... args) {
+    return slate_la::svd<Vectors>(std::forward<Args>(args)...);
+  }
+
+  template <bool QOnly, typename... Args>
+  static auto householder_qr(Args&&... args) { 
+    return slate_la::householder_qr<QOnly>(std::forward<Args>(args)...); 
+  }
+
+
+  template <typename Array>
+  void tiled_array_to_slate_test(TA::TiledRange& trange, TA::World& world) {
+  
+    world.gop.fence();
+
+    auto ref_ta = generate_ta_reference<Array>(world, trange);
+    world.gop.fence();
+
+    auto slate_matrix = TA::array_to_slate(ref_ta);
+    world.gop.fence();
+    BOOST_CHECK( slate_matrix.mt() == trange.dim(0).tile_extent() );
+    BOOST_CHECK( slate_matrix.nt() == trange.dim(1).tile_extent() );
+    BOOST_CHECK( slate_matrix.m()  == N );
+    BOOST_CHECK( slate_matrix.n()  == N );
+
+    TA::SlateFunctors slate_functors( trange, ref_ta.pmap() );
+    auto ref_slate = this->make_ref_slate(N, slate_functors, MPI_COMM_WORLD);
+
+    slate::add( 1.0, ref_slate, -1.0, slate_matrix );
+    auto norm_diff = slate::norm(slate::Norm::Fro, slate_matrix);
+    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
+
+    world.gop.fence();
+
+  }
+
+  template <typename Array>
+  void slate_to_tiled_array_test(TA::TiledRange& trange, TA::World& world) {
+  
+    world.gop.fence();
+
+    auto ref_ta = generate_ta_reference<Array>(world, trange);
+    world.gop.fence();
+
+    TA::SlateFunctors slate_functors( trange, ref_ta.pmap() );
+    auto ref_slate = this->make_ref_slate(N, slate_functors, MPI_COMM_WORLD);
+
+    world.gop.fence();
+    auto test_ta = TA::slate_to_array<Array>(ref_slate, world);
+    world.gop.fence();
+
+    auto norm_diff = (ref_ta("i,j") - test_ta("i,j")).norm(world).get();
+    BOOST_CHECK_SMALL(norm_diff, std::numeric_limits<double>::epsilon());
+
+    world.gop.fence();
+
+  }
+};
+
+
+BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite_slate, SLATELinearAlgebraFixture)
+
+using ta_test_types = boost::mpl::list<
+    TA::DistArray<TA::Tensor<double>,              TA::DensePolicy>,
+    TA::DistArray<btas::Tensor<double, TA::Range>, TA::DensePolicy>,
+    TA::DistArray<TA::Tensor<double>,              TA::SparsePolicy>,
+    TA::DistArray<btas::Tensor<double, TA::Range>, TA::SparsePolicy>
+>;
+
+// SLATE -> TA: tilings equal
+BOOST_AUTO_TEST_CASE_TEMPLATE(slate_to_tiled_array_equal, array_type, ta_test_types) {
+  auto trange = gen_trange(N, {static_cast<size_t>(128)});
+  slate_to_tiled_array_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// SLATE -> TA, random tiling
+BOOST_AUTO_TEST_CASE_TEMPLATE(slate_to_tiled_array_random, array_type, ta_test_types) {
+  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
+  slate_to_tiled_array_test<array_type>(trange, *GlobalFixture::world);
+};
+
+
+// TA -> SLATE: tilings equal
+BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_slate_equal, array_type, ta_test_types) {
+  auto trange = gen_trange(N, {static_cast<size_t>(128)});
+  tiled_array_to_slate_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// TA -> SLATE, random tiling
+BOOST_AUTO_TEST_CASE_TEMPLATE(tiled_array_to_slate_random, array_type, ta_test_types) {
+  auto trange = gen_trange(N, {107ul, 113ul, 211ul, 151ul});
+  tiled_array_to_slate_test<array_type>(trange, *GlobalFixture::world);
+};
+
+// HEIG tests (serial only)
+// TODO: Can make parallel-capable when the following issue is closed
+// https://github.com/icl-utk-edu/slate/issues/102
+LINALG_TEST_IMPL_MPI_SAFE(heig_same_tiling, true);
+LINALG_TEST_IMPL_MPI_SAFE(heig_diff_tiling, true);
+LINALG_TEST_IMPL_MPI_SAFE(heig_generalized, true);
+
+// Cholesky tests
+LINALG_TEST_IMPL(cholesky);
+LINALG_TEST_IMPL(cholesky_linv);
+LINALG_TEST_IMPL(cholesky_linv_retl);
+LINALG_TEST_IMPL(cholesky_solve);
+LINALG_TEST_IMPL(cholesky_lsolve);
+
+// LU tests
+LINALG_TEST_IMPL(lu_solve);
+LINALG_TEST_IMPL(lu_inv);
+
+// SVD tests
+LINALG_TEST_IMPL(svd_values_only);
+LINALG_TEST_IMPL(svd_leftvectors);
+LINALG_TEST_IMPL(svd_rightvectors);
+LINALG_TEST_IMPL(svd_allvectors);
+
+// QR tests
+LINALG_TEST_IMPL(householder_qr_q_only);
+LINALG_TEST_IMPL(householder_qr);
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/tests/linalg/svd_tests.h b/tests/linalg/svd_tests.h
new file mode 100644
index 0000000000..f3f01e6399
--- /dev/null
+++ b/tests/linalg/svd_tests.h
@@ -0,0 +1,131 @@
+#pragma once
+#include "linalg_fixture.h"
+#include "compare_utilities.h" // Tensor comparison utilities
+#include "gen_trange.h"        // TiledRange generator
+#include "misc_util.h"         // Misc utilities
+
+
+template <typename Derived>
+void ReferenceFixture<Derived>::svd_values_only_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto S = Derived::template svd<TA::SVD::ValuesOnly>(A, trange, trange);
+
+  std::vector exact_singular_values = exact_evals;
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
+
+  // Check singular value correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol);
+
+  world.gop.fence();
+}
+
+template <typename Derived>
+void ReferenceFixture<Derived>::svd_leftvectors_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto [S, U] = Derived::template svd<TA::SVD::LeftVectors>(A, trange, trange);
+
+  std::vector exact_singular_values = exact_evals;
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
+
+  // Check singular value correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol);
+
+  // Since A is Hermitian, U is also A's eigenvectors
+  // A <- U**H * A * U
+  TA::TArray<double> tmp;
+  tmp("i,j") = A("i,k") * U("k,j");
+  A("i,j")   = U("k,i").conj() * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA
+
+  const auto norm = A("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+  world.gop.fence();
+}
+
+template <typename Derived>
+void ReferenceFixture<Derived>::svd_rightvectors_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto [S, VT] = Derived::template svd<TA::SVD::RightVectors>(A, trange, trange);
+
+  std::vector exact_singular_values = exact_evals;
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
+
+  // Check singular value correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol);
+
+
+  // Since A is Hermitian, VT is also (the c-transpose) A's eigenvectors
+  // A <- VT * A * VT**H
+  TA::TArray<double> tmp;
+  tmp("i,j") = A("i,k") * VT("j,k").conj();
+  A("i,j")   = VT("i,k") * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA
+
+  const auto norm = A("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+
+  world.gop.fence();
+}
+
+template <typename Derived>
+void ReferenceFixture<Derived>::svd_allvectors_test(TA::World& world) {
+  world.gop.fence();
+
+  auto trange = gen_trange(N, {128ul});
+
+  // Generate Reference Tensor in TA
+  using array_type = TA::TArray<double>;
+  auto A = generate_ta_reference<array_type>(world, trange);
+
+  auto [S, U, VT] = Derived::template svd<TA::SVD::AllVectors>(A, trange, trange);
+
+  std::vector exact_singular_values = exact_evals;
+  std::sort(exact_singular_values.begin(), exact_singular_values.end(),
+            std::greater<double>());
+
+  // Check singular value correctness
+  double tol = N * N * std::numeric_limits<double>::epsilon();
+  compare_replicated_vector("TiledArray::Derived", exact_singular_values, S, tol);
+  
+  // Recreate SVD
+  // A <- U**H * A * VT**H
+  TA::TArray<double> tmp;
+  tmp("i,j") = A("i,k") * VT("j,k").conj();
+  A("i,j")   = U("k,i").conj() * tmp("k,j");
+  subtract_diagonal_tensor_inplace(A, S); // A -= SIGMA
+
+  const auto norm = A("i,j").norm(world).get();
+  BOOST_CHECK_SMALL(norm, tol);
+
+
+  world.gop.fence();
+}