Skip to content

Commit

Permalink
Merge branch 'refs/heads/master' into gaudel/feature/retile_tot_arrays
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/ci.yml
#	INSTALL.md
#	external/versions.cmake
  • Loading branch information
bimalgaudel committed Oct 15, 2024
2 parents c12bc45 + 1986ccf commit 4bf23ad
Show file tree
Hide file tree
Showing 16 changed files with 173 additions and 126 deletions.
19 changes: 4 additions & 15 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,10 @@ jobs:
sudo ln -s /usr/lib/x86_64-linux-gnu/libscalapack-openmpi.so /usr/lib/x86_64-linux-gnu/libscalapack.so
echo "MPIEXEC=/usr/bin/mpiexec" >> $GITHUB_ENV
- name: Prepare ccache timestamp
id: ccache_cache_timestamp
shell: cmake -P {0}
run: |
string(TIMESTAMP current_date "%Y-%m-%d-%H;%M;%S" UTC)
message("::set-output name=timestamp::${current_date}")
- name: Setup ccache cache files
uses: actions/[email protected]
- name: Setup ccache
uses: hendrikmuhs/[email protected]
with:
path: ${{github.workspace}}/build/.ccache
key: ${{ matrix.config.name }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }}
restore-keys: |
${{ matrix.config.name }}-ccache-
key: ccache-${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.task_backend }}

- name: "Configure build: ${{ env.BUILD_CONFIG }}"
shell: bash
Expand All @@ -89,13 +79,12 @@ jobs:
working-directory: ${{github.workspace}}/build
shell: bash
run: |
ccache -p && ccache -z && cmake --build . --target tiledarray && cmake --build . --target examples && ccache -s
ccache -p && ccache -z && cmake --build . --target tiledarray ta_test examples && ccache -s
- name: Test
working-directory: ${{github.workspace}}/build
shell: bash
#run: ctest -C $${{matrix.build_type}}
run: |
source ${{github.workspace}}/ci/openmpi.env
cmake --build . --target ta_test
cmake --build . --target check-tiledarray
13 changes: 13 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ before_script:
# TODO optimize ta_test build memory consumption
- export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:=1}
- echo "CMAKE_BUILD_PARALLEL_LEVEL=$CMAKE_BUILD_PARALLEL_LEVEL"
# configure ccache
- export CCACHE_DIR=/root/.ccache
- export CCACHE_COMPRESS=true
- export CCACHE_COMPRESSLEVEL=6
# print out the ccache configuration
- ccache -p
# zero out the ccache statistics
- ccache -z

ubuntu:
stage: build
Expand Down Expand Up @@ -64,3 +72,8 @@ ubuntu:
ENABLE_CUDA : [ "ENABLE_CUDA=ON" ]
TA_TARGETS : [ "tiledarray examples-tiledarray check_serial-tiledarray" ]
RUNNER_TAGS: [ cuda ]


after_script:
# print out the ccache statistics
- ccache -s
20 changes: 11 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,16 @@ include_directories(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src)
##########################
add_custom_target(External-tiledarray)

# ccache is an optional dep but must be found first so that the rest of dependencies can use it
find_program(CCACHE ccache)
if(CCACHE)
mark_as_advanced(CCACHE)
message (STATUS "Found ccache: ${CCACHE}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++")
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling CUDA")
endif(CCACHE)

# required deps:
# 1. derive runtime (CUDA/HIP/...) first since others may depend on it
if(ENABLE_CUDA)
Expand Down Expand Up @@ -336,15 +346,7 @@ if(ENABLE_SCALAPACK)
include(external/scalapackpp.cmake)
endif()

# optional deps:
# 1. ccache
find_program(CCACHE ccache)
if(CCACHE)
mark_as_advanced(CCACHE)
message (STATUS "Found ccache: ${CCACHE}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++")
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C")
endif(CCACHE)
# other optional deps:
# 2. TTG
# N.B. make sure TA configures MADNESS correctly
#if (TA_TTG)
Expand Down
6 changes: 3 additions & 3 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
- Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing*
- Boost.Range: header-only, *only used for unit testing*
- [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later.
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 4b3757cc2b5862f93589afc1e37523e543779c7a . If usable BTAS installation is not found, TiledArray will download and compile
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 1cfcb12647c768ccd83b098c64cda723e1275e49 . If usable BTAS installation is not found, TiledArray will download and compile
BTAS from source. *This is the recommended way to compile BTAS for all users*.
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 .
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 93a9a5cec2a8fa87fba3afe8056607e6062a9058 .
Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
If usable MADNESS installation is not found, TiledArray will download and compile
MADNESS from source. *This is the recommended way to compile MADNESS for all users*.
Expand All @@ -69,7 +69,7 @@ Optional prerequisites:
- [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on NVIDIA's CUDA-enabled accelerators. CUDA 11 or later is required.
- [HIP/ROCm compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on AMD's ROCm-enabled accelerators. Note that TiledArray does not use ROCm directly but its C++ Heterogeneous-Compute Interface for Portability, `HIP`; although HIP can also be used to program CUDA-enabled devices, in TiledArray it is used only to program ROCm devices, hence ROCm and HIP will be used interchangeably.
- [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, ROCm, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) extended to provide thread-safety improvements (via github.com/ValeevGroup/cutt) and extended to non-CUDA platforms by [@victor-anisimov](github.com/victor-anisimov) (tag 6eed30d4dd2a5aa58840fe895dcffd80be7fbece).
- [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag v2024.02.1).
- [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag 8c85866107f78a58403e20a2ae8e1f24c9852287).
- [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later).
- [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing:
- [scalapackpp](https://github.com/wavefunction91/scalapackpp.git) -- a modern C++ (C++17) wrapper for ScaLAPACK (tag 6397f52cf11c0dfd82a79698ee198a2fce515d81); pulls and builds the following additional prerequisite
Expand Down
7 changes: 7 additions & 0 deletions external/librett.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ else()
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
endif(CMAKE_TOOLCHAIN_FILE)

foreach(lang C CXX CUDA)
if (DEFINED CMAKE_${lang}_COMPILER_LAUNCHER)
list(APPEND LIBRETT_CMAKE_ARGS
"-DCMAKE_${lang}_COMPILER_LAUNCHER=${CMAKE_${lang}_COMPILER_LAUNCHER}")
endif()
endforeach()

if (BUILD_SHARED_LIBS)
set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
else(BUILD_SHARED_LIBS)
Expand Down
11 changes: 9 additions & 2 deletions external/umpire.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ else()
)
endif(CMAKE_TOOLCHAIN_FILE)

foreach(lang C CXX CUDA)
if (DEFINED CMAKE_${lang}_COMPILER_LAUNCHER)
list(APPEND UMPIRE_CMAKE_ARGS
"-DCMAKE_${lang}_COMPILER_LAUNCHER=${CMAKE_${lang}_COMPILER_LAUNCHER}")
endif()
endforeach()

if (BUILD_SHARED_LIBS)
set(UMPIRE_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
else(BUILD_SHARED_LIBS)
Expand All @@ -170,8 +177,6 @@ else()
DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR}
GIT_REPOSITORY ${UMPIRE_URL}
GIT_TAG ${UMPIRE_TAG}
#--Patch step-----------------
PATCH_COMMAND patch -p1 -i ${CMAKE_CURRENT_SOURCE_DIR}/external/umpire.finalize_io.patch
#--Configure step-------------
SOURCE_DIR ${EXTERNAL_SOURCE_DIR}
LIST_SEPARATOR ::
Expand Down Expand Up @@ -218,6 +223,8 @@ else()
"$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl/umpire/fmt/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/include>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/include>"
INTERFACE_LINK_LIBRARIES
"$<BUILD_INTERFACE:${UMPIRE_BUILD_BYPRODUCTS}>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/lib/libumpire${UMPIRE_DEFAULT_LIBRARY_SUFFIX}>"
INTERFACE_COMPILE_DEFINITIONS
FMT_HEADER_ONLY=1
)

install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray)
Expand Down
47 changes: 0 additions & 47 deletions external/umpire.finalize_io.patch

This file was deleted.

12 changes: 6 additions & 6 deletions external/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7)
set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626)
set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496)

set(TA_TRACKED_MADNESS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75)
set(TA_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91)
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)

set(TA_TRACKED_BTAS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a)
set(TA_TRACKED_BTAS_PREVIOUS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2)
set(TA_TRACKED_BTAS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49)
set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a)

set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece)
set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83)

set(TA_TRACKED_UMPIRE_TAG v2024.02.1)
set(TA_TRACKED_UMPIRE_PREVIOUS_TAG 20839b2e8e8972070dd8f75c7f00d50d6c399716)
set(TA_TRACKED_UMPIRE_TAG 8c85866107f78a58403e20a2ae8e1f24c9852287)
set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v2024.02.1)

set(TA_TRACKED_SCALAPACKPP_TAG 6397f52cf11c0dfd82a79698ee198a2fce515d81)
set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 711ef363479a90c88788036f9c6c8adb70736cbf )
Expand Down
2 changes: 1 addition & 1 deletion python/src/TiledArray/python/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ void make_array_class(py::object m, const char *name) {
py::return_value_policy::reference)
.def_property_readonly("trange", &array::trange<Array>)
.def_property_readonly("shape", &array::shape<Array, py::tuple>)
.def("fill", &Array::fill, py::arg("value"),
.def("fill", &Array::template fill<>, py::arg("value"),
py::arg("skip_set") = false)
.def("init", &array::init_tiles<Array>)
// Array object needs be alive while iterator is used */
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ if(HIP_FOUND OR CUDA_FOUND)
TiledArray/external/cuda.h
TiledArray/device/cpu_cuda_vector.h)
endif(CUDA_FOUND)
endif(CUDA_FOUND OR HIP_FOUND)
endif(HIP_FOUND OR CUDA_FOUND)

set(TILEDARRAY_SOURCE_FILES
TiledArray/tiledarray.cpp
Expand Down
45 changes: 40 additions & 5 deletions src/TiledArray/array_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,17 @@ std::ostream& operator<<(std::ostream& os, const TileConstReference<Impl>& a) {
return os;
}

/// Callaback used to update counter (typically, task counter)
template <typename AtomicInt>
struct IncrementCounter : public madness::CallbackInterface {
AtomicInt& counter;
IncrementCounter(AtomicInt& counter) : counter(counter) {}
void notify() override {
++counter;
delete this;
}
};

} // namespace detail
} // namespace TiledArray

Expand Down Expand Up @@ -773,20 +784,24 @@ class ArrayImpl : public TensorImpl<Policy>,
/// \tparam Op The type of the functor/function
/// \param[in] op The operation used to generate tiles
/// \param[in] skip_set If false, will throw if any tiles are already set
/// \return the total number of tiles that have been (or will be) initialized
/// \throw TiledArray::Exception if the PIMPL is not set. Strong throw
/// guarantee.
/// \throw TiledArray::Exception if a tile is already set and skip_set is
/// false. Weak throw guarantee.
template <HostExecutor Exec = HostExecutor::Default, typename Op>
void init_tiles(Op&& op, bool skip_set = false) {
template <HostExecutor Exec = HostExecutor::Default, Fence fence = Fence::No,
typename Op>
std::int64_t init_tiles(Op&& op, bool skip_set = false) {
// lifetime management of op depends on whether it is a lvalue ref (i.e. has
// an external owner) or an rvalue ref
// - if op is an lvalue ref: pass op to tasks
// - if op is an rvalue ref pass make_shared_function(op) to tasks
auto op_shared_handle = make_op_shared_handle(std::forward<Op>(op));

std::int64_t ntiles_initialized{0};
auto it = this->pmap()->begin();
const auto end = this->pmap()->end();
std::atomic<std::int64_t> ntask_completed{0};
for (; it != end; ++it) {
const auto& index = *it;
if (!this->is_zero(index)) {
Expand All @@ -795,19 +810,39 @@ class ArrayImpl : public TensorImpl<Policy>,
if (fut.probe()) continue;
}
if constexpr (Exec == HostExecutor::MADWorld) {
Future<value_type> tile = this->world().taskq.add(
[this_sptr = this->shared_from_this(),
index = ordinal_type(index), op_shared_handle]() -> value_type {
Future<value_type> tile =
this->world().taskq.add([this_sptr = this->shared_from_this(),
index = ordinal_type(index),
op_shared_handle, this]() -> value_type {
return op_shared_handle(
this_sptr->trange().make_tile_range(index));
});
++ntiles_initialized;
if constexpr (fence == Fence::Local) {
tile.register_callback(
new IncrementCounter<decltype(ntask_completed)>(
ntask_completed));
}
set(index, std::move(tile));
} else {
static_assert(Exec == HostExecutor::Thread);
set(index, op_shared_handle(this->trange().make_tile_range(index)));
++ntiles_initialized;
}
}
}

if constexpr (fence == Fence::Local) {
if constexpr (Exec == HostExecutor::MADWorld) {
if (ntiles_initialized > 0)
this->world().await([&ntask_completed, ntiles_initialized]() {
return ntask_completed == ntiles_initialized;
});
}
} else if constexpr (fence == Fence::Global) {
this->world().gop.fence();
}
return ntiles_initialized;
}

}; // class ArrayImpl
Expand Down
Loading

0 comments on commit 4bf23ad

Please sign in to comment.