diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bfbd06ce6d..c147526452 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,6 +32,10 @@ jobs: steps: - uses: actions/checkout@v2 + - uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '<14' + - name: Host system info shell: bash run: cmake -P ${{github.workspace}}/ci/host_system_info.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e8fce70cf..a43b767e03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -235,12 +235,15 @@ endif() set(INTEGER4 TRUE CACHE BOOL "If TRUE, use integer*4 Fortran integers in BLAS calls. Otherwise use integer*8.") mark_as_advanced(INTEGER4) -# Set the CPU L1 cache line size. -set(VECTOR_ALIGNMENT "16" CACHE STRING "Set the vector alignment in memory (DO NOT CHANGE THIS VALUE UNLESS YOU KNOW WHAT YOU ARE DOING)") -mark_as_advanced(VECTOR_ALIGNMENT) -set(TILEDARRAY_ALIGNMENT ${VECTOR_ALIGNMENT}) +# Set the align size +include(DetectAlignSize) +if (NOT DEFINED CACHE{TA_ALIGN_SIZE}) + set(TA_ALIGN_SIZE "${TA_ALIGN_SIZE_DETECTED}" CACHE STRING "Set the default alignment of data buffers used by array tiles (DO NOT CHANGE THIS VALUE UNLESS YOU KNOW WHAT YOU ARE DOING)") +endif() +mark_as_advanced(TA_ALIGN_SIZE) +set(TILEDARRAY_ALIGN_SIZE ${TA_ALIGN_SIZE}) -# Set the vectory. +# Set the CPU L1 cache line size. set(CACHE_LINE_SIZE "64" CACHE STRING "Set the CPU L1 cache line size in bytes (DO NOT CHANGE THIS VALUE UNLESS YOU KNOW WHAT YOU ARE DOING)") mark_as_advanced(CACHE_LINE_SIZE) set(TILEDARRAY_CACHELINE_SIZE ${CACHE_LINE_SIZE}) diff --git a/INSTALL.md b/INSTALL.md index 8a1e2f81e2..230cb643fb 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Range: header-only, *only used for unit testing* - [BTAS](http://github.com/ValeevGroup/BTAS), tag fba66ad9881ab29ea8df49ac6a6006cab3fb3ce5 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 66b199a08bf5f33b1565811fc202a051ec1b0fbb . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 40d2e38414179a8ebce508c7339fcee21244ffc6 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. @@ -393,13 +393,13 @@ directory with: ## Advanced configure options: -The following CMake cache variables are tuning parameters. You should only -modify these values if you know the values for your patricular system. +The following CMake cache variables are for performance tuning. You should only +modify these values if you know the values for your particular system. -* `VECTOR_ALIGNMENT` -- The alignment of memory for Tensor in bytes [Default=16] -* `CACHE_LINE_SIZE` -- The cache line size in bytes [Default=64] +* `TA_ALIGN_SIZE` -- The alignment of memory allocated by TA::Tensor (and other artifacts like TA::host_allocator), in bytes. [Default is platform-specific, if no platform-specific value is found =64] +* `TA_CACHE_LINE_SIZE` -- The cache line size in bytes [Default=64] -`VECTOR_ALIGNMENT` controls the alignment of Tensor data, and `CACHE_LINE_SIZE` +`TA_ALIGN_SIZE` controls the alignment of memory allocated for tiles, and `TA_CACHE_LINE_SIZE` controls the size of automatic loop unrolling for tensor operations. TiledArray does not currently use explicit vector instructions (i.e. intrinsics), but the code is written in such a way that compilers can more easily autovectorize @@ -416,7 +416,7 @@ support may be added. * `TA_TTG` -- Set to `ON` to find or fetch the TTG library. [Default=OFF]. * `TA_SIGNED_1INDEX_TYPE` -- Set to `OFF` to use unsigned 1-index coordinate type (default for TiledArray 1.0.0-alpha.2 and older). The default is `ON`, which enables the use of negative indices in coordinates. * `TA_MAX_SOO_RANK_METADATA` -- Specifies the maximum rank for which to use Small Object Optimization (hence, avoid the use of the heap) for metadata. The default is `8`. -* `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile memory allocations in TA::Tensor. +* `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile host memory allocations used by TA::Tensor. This causes the use of Umpire for host memory allocation. This also enables additional tracing facilities provided by Umpire; these can be controlled via [environment variable `UMPIRE_LOG_LEVEL`](https://umpire.readthedocs.io/en/develop/sphinx/features/logging_and_replay.html), but note that the default is to log Umpire info into a file rather than stdout. * `TA_UT_CTEST_TIMEOUT` -- The value (in seconds) of the timeout to use for running the TA unit tests via CTest when building the `check`/`check-tiledarray` targets. The default timeout is 1500s. # Build TiledArray diff --git a/cmake/modules/DetectAlignSize.cmake b/cmake/modules/DetectAlignSize.cmake new file mode 100644 index 0000000000..6830035fd6 --- /dev/null +++ b/cmake/modules/DetectAlignSize.cmake @@ -0,0 +1,17 @@ +# see https://stackoverflow.com/a/69952705 and https://gitlab.kitware.com/cmake/cmake/-/blob/master/Modules/CMakeDetermineCompilerABI.cmake + +set(BIN "${CMAKE_PLATFORM_INFO_DIR}/cmake/modules/DetectAlignSize.bin") +try_compile(DETECT_ALIGN_SIZE_COMPILED + ${CMAKE_BINARY_DIR} + SOURCES ${PROJECT_SOURCE_DIR}/cmake/modules/DetectAlignSize.cpp + CMAKE_FLAGS ${CMAKE_CXX_FLAGS} + COPY_FILE "${BIN}" + COPY_FILE_ERROR copy_error + OUTPUT_VARIABLE OUTPUT + ) +if (DETECT_ALIGN_SIZE_COMPILED AND NOT copy_error) + file(STRINGS "${BIN}" data REGEX "INFO:align_size\\[[^]]*\\]") + if (data MATCHES "INFO:align_size\\[0*([^]]*)\\]") + set(TA_ALIGN_SIZE_DETECTED "${CMAKE_MATCH_1}" CACHE INTERNAL "") + endif() +endif() diff --git a/cmake/modules/DetectAlignSize.cpp b/cmake/modules/DetectAlignSize.cpp new file mode 100644 index 0000000000..7af404e4e8 --- /dev/null +++ b/cmake/modules/DetectAlignSize.cpp @@ -0,0 +1,39 @@ +// +// Created by Eduard Valeyev on 10/18/22. +// + +#if defined(__x86_64__) +#if defined(__AVX__) +#define PREFERRED_ALIGN_SIZE 32 +#elif defined(__AVX512F__) +#define PREFERRED_ALIGN_SIZE 64 +#else // 64-bit x86 should have SSE +#define PREFERRED_ALIGN_SIZE 16 +#endif +#elif (defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64)) +#define PREFERRED_ALIGN_SIZE 16 +#elif defined(__VECTOR4DOUBLE__) +#define PREFERRED_ALIGN_SIZE 32 +#endif + +// else: default to typical cache line size +#ifndef PREFERRED_ALIGN_SIZE +#define PREFERRED_ALIGN_SIZE 64 +#endif + +/* Preferred align size, in bytes. */ +const char info_align_size[] = { + /* clang-format off */ + 'I', 'N', 'F', 'O', ':', 'a', 'l', 'i', 'g', 'n', '_', 's', 'i', 'z', + 'e', '[', ('0' + ((PREFERRED_ALIGN_SIZE / 10) % 10)), ('0' + (PREFERRED_ALIGN_SIZE % 10)), ']', + '\0' + /* clang-format on */ +}; + +int main(int argc, char* argv[]) { + int require = 0; + require += info_align_size[argc]; + (void)argv; + return require; +} diff --git a/examples/dgemm/ta_dense.cpp b/examples/dgemm/ta_dense.cpp index 4f05b662ca..85716cf05f 100644 --- a/examples/dgemm/ta_dense.cpp +++ b/examples/dgemm/ta_dense.cpp @@ -139,6 +139,18 @@ void gemm_(TiledArray::World& world, const TiledArray::TiledRange& trange, world.gop.fence(); madness::print_meminfo(world.rank(), str); } +#ifdef TA_TENSOR_MEM_PROFILE + { + world.gop.fence(); + std::cout << str << ": TA::Tensor allocated " + << umpire::ResourceManager::getInstance() + .getAllocator("HOST") + .getHighWatermark() + << " bytes and used " + << TA::hostEnv::instance()->host_allocator().getHighWatermark() + << " bytes" << std::endl; + } +#endif }; memtrace("start"); diff --git a/external/versions.cmake b/external/versions.cmake index f159d92321..e8cf72846a 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 66b199a08bf5f33b1565811fc202a051ec1b0fbb) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG c0df7338779d06df7eaff31644d508940a7cfd90) +set(TA_TRACKED_MADNESS_TAG 40d2e38414179a8ebce508c7339fcee21244ffc6) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 66b199a08bf5f33b1565811fc202a051ec1b0fbb) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) @@ -39,6 +39,6 @@ set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG bf17a7246af38d34523bd0099b01d9961d06d311 set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006) -set(TA_TRACKED_TTG_URL https://github.com/therault/ttg.git) -set(TA_TRACKED_TTG_TAG bb5309a5224e2546a5316daf7fc5c143f450f17b) -set(TA_TRACKED_TTG_PREVIOUS_TAG 5107143b418384c44587c2776a9e87065d33d670) +set(TA_TRACKED_TTG_URL https://github.com/TESSEorg/ttg) +set(TA_TRACKED_TTG_TAG 1251bec25e07a74a05e5cd4cdec181a95a9baa66) +set(TA_TRACKED_TTG_PREVIOUS_TAG bb5309a5224e2546a5316daf7fc5c143f450f17b) diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index a2e4f06ce4..68f5dc1374 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -66,7 +66,7 @@ #cmakedefine TILEDARRAY_HAS_LONG_LONG 1 /* Define the default alignment for arrays required by vector operations. */ -#cmakedefine TILEDARRAY_ALIGNMENT @TILEDARRAY_ALIGNMENT@ +#cmakedefine TILEDARRAY_ALIGN_SIZE @TILEDARRAY_ALIGN_SIZE@ /* Define the size of the CPU L1 cache lines. */ #cmakedefine TILEDARRAY_CACHELINE_SIZE @TILEDARRAY_CACHELINE_SIZE@ @@ -125,11 +125,11 @@ /* Add macro TILEDARRAY_ALIGNED_STORAGE which forces alignment of variables */ #if defined(__clang__) || defined(__GNUC__) || defined(__PGI) || defined(__IBMCPP__) || defined(__ARMCC_VERSION) -#define TILEDARRAY_ALIGNED_STORAGE __attribute__((aligned(TILEDARRAY_ALIGNMENT))) +#define TILEDARRAY_ALIGNED_STORAGE __attribute__((aligned(TILEDARRAY_ALIGN_SIZE))) #elif (defined _MSC_VER) -#define TILEDARRAY_ALIGNED_STORAGE __declspec(align(TILEDARRAY_ALIGNMENT)) +#define TILEDARRAY_ALIGNED_STORAGE __declspec(align(TILEDARRAY_ALIGN_SIZE)) #else diff --git a/src/TiledArray/external/umpire.h b/src/TiledArray/external/umpire.h index 2aefa3c6d4..9419968585 100644 --- a/src/TiledArray/external/umpire.h +++ b/src/TiledArray/external/umpire.h @@ -74,15 +74,20 @@ class umpire_allocator_impl { TA_ASSERT(umpalloc_); - result = static_cast(umpalloc_->allocate(n * sizeof(T))); + // this, instead of umpalloc_->allocate(n*sizeof(T)), profiles memory use + // even if introspection is off + result = static_cast( + umpalloc_->getAllocationStrategy()->allocate_internal(n * sizeof(T))); return result; } /// deallocate um memory using umpire dynamic pool - void deallocate(pointer ptr, size_t) { + void deallocate(pointer ptr, size_t size) { TA_ASSERT(umpalloc_); - umpalloc_->deallocate(ptr); + // this, instead of umpalloc_->deallocate(ptr, size), profiles mmeory use + // even if introspection is off + umpalloc_->getAllocationStrategy()->deallocate_internal(ptr, size); } const umpire::Allocator* umpire_allocator() const { return umpalloc_; } diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 94ef77ebda..51e43d9bee 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -64,8 +64,14 @@ class DensePolicy; class SparsePolicy; // TiledArray Tensors -// can also use host_allocator and std::allocator for A -template > +// can any standard-compliant allocator such as std::allocator +template +#else + host_allocator +#endif + > class Tensor; typedef Tensor TensorD; diff --git a/src/TiledArray/host/allocator.h b/src/TiledArray/host/allocator.h index efaaeff9c4..6a0eebde30 100644 --- a/src/TiledArray/host/allocator.h +++ b/src/TiledArray/host/allocator.h @@ -58,7 +58,7 @@ class host_allocator_impl : public umpire_allocator_impl { template friend bool operator==(const host_allocator_impl& lhs, const host_allocator_impl& rhs) noexcept; -}; // class host_allocator +}; // class host_allocator_impl template bool operator==(const host_allocator_impl& lhs, diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index 2ae0bf6930..21c487222d 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -28,6 +28,7 @@ // for memory management #include +#include #include #include #include @@ -42,11 +43,11 @@ namespace TiledArray { /** - * hostEnv set up global environment + * hostEnv maintains the (host-side, as opposed to device-side) environment, + * such as memory allocators * - * Singleton class + * \note this is a Singleton */ - class hostEnv { public: ~hostEnv() = default; @@ -56,20 +57,26 @@ class hostEnv { hostEnv& operator=(const hostEnv&) = delete; hostEnv& operator=(hostEnv&&) = delete; - /// access the instance, if not initialized will be initialized using default - /// params + /// access the singleton instance; if not initialized will be + /// initialized via hostEnv::initialize() with the default params static std::unique_ptr& instance() { if (!instance_accessor()) { - initialize(TiledArray::get_default_world()); + initialize(); } return instance_accessor(); } /// initialize the instance using explicit params - static void initialize(World& world, - const std::uint64_t max_memory_size = (1ul << 40), - const std::uint64_t page_size = (1ul << 22)) { - // initialize only when not initialized + /// \param max_memory_size max amount of memory (bytes) that TiledArray + /// can use for storage of TA::Tensor objects (these by default + /// store DistArray tile data and (if sparse) shape [default=2^40] + /// \param page_size memory added to the pool in chunks of at least + /// this size (bytes) [default=2^25] + static void initialize(const std::uint64_t max_memory_size = (1ul << 40), + const std::uint64_t page_size = (1ul << 25)) { + static std::mutex mtx; // to make initialize() reentrant + std::scoped_lock lock{mtx}; + // only the winner of the lock race gets to initialize if (instance_accessor() == nullptr) { // uncomment to debug umpire ops // @@ -80,26 +87,24 @@ class hostEnv { auto& rm = umpire::ResourceManager::getInstance(); - // turn off Umpire introspection for non-Debug builds -#ifndef NDEBUG - constexpr auto introspect = true; -#else + // N.B. we don't rely on Umpire introspection (even for profiling) constexpr auto introspect = false; -#endif - // allocate zero memory for device pool, same grain for subsequent allocs + // use QuickPool for host memory allocation, with min grain of 1 page auto host_size_limited_alloc = rm.makeAllocator( - "size_limited_alloc", rm.getAllocator("HOST"), max_memory_size); + "SizeLimited_HOST", rm.getAllocator("HOST"), max_memory_size); auto host_dynamic_pool = rm.makeAllocator( - "HostDynamicPool", host_size_limited_alloc, 0, page_size); - auto thread_safe_host_dynamic_pool = + "QuickPool_SizeLimited_HOST", host_size_limited_alloc, page_size, + page_size, /* alignment */ TILEDARRAY_ALIGN_SIZE); + auto thread_safe_host_aligned_dynamic_pool = rm.makeAllocator( - "ThreadSafeHostDynamicPool", host_dynamic_pool); + "ThreadSafe_QuickPool_SizeLimited_HOST", host_dynamic_pool); auto host_env = std::unique_ptr( - new hostEnv(world, thread_safe_host_dynamic_pool)); + new hostEnv(TiledArray::get_default_world(), + thread_safe_host_aligned_dynamic_pool)); instance_accessor() = std::move(host_env); } } diff --git a/src/TiledArray/math/linalg/ttg/util.h b/src/TiledArray/math/linalg/ttg/util.h index 1a9e60b7ba..76e3c023ec 100644 --- a/src/TiledArray/math/linalg/ttg/util.h +++ b/src/TiledArray/math/linalg/ttg/util.h @@ -229,7 +229,7 @@ auto make_writer_ttg( auto keymap2 = [pmap = A.pmap_shared(), range = A.trange().tiles_range()](const Key2& key) { - const auto IJ = range.ordinal({key.I, key.J}); + const auto IJ = range.ordinal({key[0], key[1]}); return pmap->owner(IJ); }; @@ -239,8 +239,8 @@ auto make_writer_ttg( (Layout == lapack::Layout::ColMajor ? tile.rows() : tile.cols())); // the code below only works if tile's LD == rows - const int I = key.I; - const int J = key.J; + const int I = key[0]; + const int J = key[1]; auto rng = A.trange().make_tile_range({I, J}); if constexpr (Uplo != lapack::Uplo::General) { if (I != J && diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index 5fe2a3cb21..ed0500d7e7 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -40,13 +40,6 @@ void gemm(Alpha alpha, const Tensor& A, const Tensor& B, namespace detail { -#ifdef TA_TENSOR_MEM_PROFILE -inline static std::mutex - ta_tensor_mem_profile_mtx; // protects the following statics -inline static std::uint64_t nbytes_allocated = 0; -inline static std::uint64_t max_nbytes_allocated = 0; -#endif // TA_TENSOR_MEM_PROFILE - /// Signals that we can take the trace of a Tensor (for numeric \c T) template struct TraceIsDefined, enable_if_numeric_t> : std::true_type {}; @@ -97,45 +90,6 @@ class Tensor { template using numeric_t = typename TiledArray::detail::numeric_type::type; -#ifdef TA_TENSOR_MEM_PROFILE - enum class MemOp { Alloc, Dealloc }; - void alloc_record(std::uint64_t n, MemOp action) { - const double to_MiB = 1 / (1024.0 * 1024.0); /* Convert from bytes to MiB */ - const auto nbytes = n * sizeof(value_type); - { - std::scoped_lock lock(detail::ta_tensor_mem_profile_mtx); - if (action == MemOp::Alloc) { - detail::nbytes_allocated += nbytes; - detail::max_nbytes_allocated = - std::max(detail::nbytes_allocated, detail::max_nbytes_allocated); - } else - detail::nbytes_allocated -= nbytes; - } - char buf[1024]; - auto value_type_str = []() { - if constexpr (std::is_same_v) - return "double"; - else if constexpr (std::is_same_v) - return "float"; - else if constexpr (std::is_same_v>) - return "zdouble"; - else if constexpr (std::is_same_v>) - return "zfloat"; - else - return ""; - }; - std::snprintf( - buf, 1023, - "TA::Tensor<%s>: %sallocated %lf MiB [wm = %lf MiB hwm = %lf MiB]\n", - value_type_str(), (action == MemOp::Dealloc ? "de" : " "), - nbytes * to_MiB, detail::nbytes_allocated * to_MiB, - detail::max_nbytes_allocated * to_MiB); - auto& os = madness::print_meminfo_ostream(); - os << buf; - os.flush(); - } -#endif - template struct is_tensor { static constexpr bool value = detail::is_tensor::value || @@ -149,9 +103,6 @@ class Tensor { size_t size = range_.volume() * batch_size; allocator_type allocator; auto* ptr = allocator.allocate(size); -#ifdef TA_TENSOR_MEM_PROFILE - alloc_record(size, MemOp::Alloc); -#endif if (default_construct) { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); @@ -160,9 +111,6 @@ class Tensor { size](auto&& ptr) mutable { std::destroy_n(ptr, size); allocator.deallocate(ptr, size); -#ifdef TA_TENSOR_MEM_PROFILE - alloc_record(size, MemOp::Dealloc); -#endif }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); } @@ -172,9 +120,6 @@ class Tensor { size_t size = range_.volume() * batch_size; allocator_type allocator; auto* ptr = allocator.allocate(size); -#ifdef TA_TENSOR_MEM_PROFILE - alloc_record(size, MemOp::Alloc); -#endif if (default_construct) { std::uninitialized_default_construct_n(ptr, size); // std::uninitialized_value_construct_n(ptr, size); @@ -182,10 +127,7 @@ class Tensor { auto deleter = [this, allocator = std::move(allocator), size](auto&& ptr) mutable { std::destroy_n(ptr, size); - allocator.deallocate(ptr, size); -#ifdef TA_TENSOR_MEM_PROFILE - alloc_record(size, MemOp::Dealloc); -#endif + allocator.deallocate(ptr, size * sizeof(T)); }; this->data_ = std::shared_ptr(ptr, std::move(deleter)); } @@ -2055,8 +1997,8 @@ class Tensor { }; // class Tensor -template -Tensor operator*(const Permutation &p, const Tensor &t) { +template +Tensor operator*(const Permutation& p, const Tensor& t) { return t.permute(p); } diff --git a/src/TiledArray/tile.h b/src/TiledArray/tile.h index 99be71851c..53bb56f444 100644 --- a/src/TiledArray/tile.h +++ b/src/TiledArray/tile.h @@ -557,8 +557,8 @@ class Tile { // Serialization ----------------------------------------------------------- template >::type* = nullptr> + typename std::enable_if< + madness::is_output_archive_v>::type* = nullptr> void serialize(Archive& ar) const { // Serialize data for empty tile check bool empty = !static_cast(pimpl_); @@ -570,8 +570,8 @@ class Tile { } template >::type* = nullptr> + typename std::enable_if< + madness::is_input_archive_v>::type* = nullptr> void serialize(Archive& ar) { // Check for empty tile bool empty = false; @@ -1150,7 +1150,7 @@ inline Tile& inplace_binary(Tile& left, const Tile& right, // Scaling operations -------------------------------------------------------- -/// Scalar the tile argument +/// Scale the tile argument /// \tparam Arg The tile argument type /// \param arg The left-hand argument to be scaled @@ -1614,10 +1614,10 @@ inline std::ostream& operator<<(std::ostream& os, const Tile& tile) { template struct Cast< TiledArray::Tensor, Tile, - std::void_t, T>>()( - std::declval()))>> { + std::void_t< + decltype(std::declval, + T>>()(std::declval()))>> { auto operator()(const Tile& arg) const { return TiledArray::Cast< TiledArray::Tensor, T>{}( diff --git a/src/TiledArray/val_array.h b/src/TiledArray/val_array.h index 65c2b72785..9a8620443d 100644 --- a/src/TiledArray/val_array.h +++ b/src/TiledArray/val_array.h @@ -26,12 +26,9 @@ #ifndef TILEDARRAY_SHARED_BUFFER_H__INCLUDED #define TILEDARRAY_SHARED_BUFFER_H__INCLUDED +#include #include -#ifndef TILEDARRAY_DEFAULT_ALIGNMENT -#define TILEDARRAY_DEFAULT_ALIGNMENT 16 -#endif // TILEDARRAY_ALIGNMENT - namespace TiledArray { namespace detail { @@ -58,7 +55,7 @@ class ValArray : private SizeArray { typedef typename SizeArray::const_iterator const_iterator; ///< Const iterator type - static const std::size_t alignment = TILEDARRAY_DEFAULT_ALIGNMENT; + static const std::size_t alignment = TILEDARRAY_ALIGN_SIZE; private: /// The pointer to reference counter @@ -453,8 +450,7 @@ class ValArray : private SizeArray { /// \tparam Archive An output archive type /// \param[out] ar an Archive object template >> + typename = std::enable_if_t>> void serialize(Archive& ar) const { // need to write size first to be able to init when deserializing ar& size() & madness::archive::wrap(data(), size()); @@ -465,8 +461,7 @@ class ValArray : private SizeArray { /// \tparam Archive An input archive type /// \param[out] ar an Archive object template >> + typename = std::enable_if_t>> void serialize(Archive& ar) { size_t sz = 0; ar& sz;