Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LibreTT integration into TiledArray #352

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Compiling BTAS requires the following prerequisites:

Optional prerequisites:
- [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on CUDA-enabled accelerators. CUDA 11 or later is required. Support for CUDA also requires the following additional prerequisites, both of which will be built and installed automatically if missing:
- [cuTT](github.com/ValeevGroup/cutt) -- CUDA transpose library; note that our fork of the [original cuTT repo](github.com/ap-hynninen/cutt) is required to provide thread-safety (tag 0e8685bf82910bc7435835f846e88f1b39f47f09).
- [LibreTT](github.com/victor-anisimov/LibreTT) -- free tensor transpose library for CUDA, HIP, and SYCL platforms that is based on the [original cuTT library](github.com/ap-hynninen/cutt) with our additional thread-safety improvements (tag 68abe31a9ec6fd2fd9ffbcd874daa80457f947da).
- [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag f9640e0fa4245691cdd434e4f719ac5f7d455f82).
- [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later).
- [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing:
Expand Down Expand Up @@ -329,7 +329,7 @@ Support for execution on CUDA-enabled hardware is controlled by the following va
* `ENABLE_CUDA` -- Set to `ON` to turn on CUDA support. [Default=OFF].
* `CMAKE_CUDA_HOST_COMPILER` -- Set to the path to the host C++ compiler to be used by CUDA compiler. CUDA compilers used to be notorious for only being able to use specific C++ host compilers, but support for more recent C++ host compilers has improved. The default is determined by the CUDA compiler and the user environment variables (`PATH` etc.).
* `ENABLE_CUDA_ERROR_CHECK` -- Set to `ON` to turn on assertions for successful completion of calls to CUDA runtime and libraries. [Default=OFF].
* `CUTT_INSTALL_DIR` -- the installation prefix of the pre-installed cuTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install cuTT.
* `LIBRETT_INSTALL_DIR` -- the installation prefix of the pre-installed LibreTT library. This should not be normally needed; it is strongly recommended to let TiledArray build and install LibreTT.
* `UMPIRE_INSTALL_DIR` -- the installation prefix of the pre-installed Umpire library. This should not be normally needed; it is strongly recommended to let TiledArray build and install Umpire.

For the CUDA compiler and toolkit to be discoverable the CUDA compiler (`nvcc`) should be in the `PATH` environment variable. Refer to the [FindCUDAToolkit module](https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html) for more info.
Expand Down
10 changes: 5 additions & 5 deletions bin/admin/dependency-versions-update-hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = '
btas_old_tag = tokens[2]
else:
btas_new_tag = tokens[2]
elif tokens[1].find('CUTT') != -1:
elif tokens[1].find('LIBRETT') != -1:
if tokens[1].find('PREVIOUS') != -1:
cutt_old_tag = tokens[2]
librett_old_tag = tokens[2]
else:
cutt_new_tag = tokens[2]
librett_new_tag = tokens[2]
elif tokens[1].find('UMPIRE') != -1:
if tokens[1].find('PREVIOUS') != -1:
umpire_old_tag = tokens[2]
Expand Down Expand Up @@ -146,8 +146,8 @@ def replace_dep_id(topsrc, file_ext, dep_name, old_id, new_id, search_prefix = '
# BTAS tag in INSTALL.md
any_files_changed |= replace_dep_id(topsrc, 'md', 'BTAS', btas_old_tag, btas_new_tag, 'ValeevGroup/BTAS), tag ', '')

# cuTT tag in INSTALL.md
any_files_changed |= replace_dep_id(topsrc, 'md', 'cuTT', cutt_old_tag, cutt_new_tag, '', '')
# LibreTT tag in INSTALL.md
any_files_changed |= replace_dep_id(topsrc, 'md', 'LibreTT', librett_old_tag, librett_new_tag, '', '')

# Umpire tag in INSTALL.md
any_files_changed |= replace_dep_id(topsrc, 'md', 'Umpire', umpire_old_tag, umpire_new_tag, '', '')
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

if(CUDA_FOUND)

foreach(_exec cuda_cutt cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda)
foreach(_exec cuda_librett cuda_task ta_dense_cuda ta_cc_abcd_cuda ta_vector_cuda ta_reduce_cuda)

# Add executable
add_ta_executable(${_exec} "${_exec}.cpp" "tiledarray")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include <iostream>

/**
* Test cuTT
* Test LibreTT
*/

const std::size_t N = 100;
Expand Down
4 changes: 2 additions & 2 deletions external/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,6 @@ message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRE
include(external/umpire.cmake)

##
## cuTT
## LibreTT
##
include(external/cutt.cmake)
include(external/librett.cmake)
97 changes: 49 additions & 48 deletions external/cutt.cmake → external/librett.cmake
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
##
## find cuTT
## find LibreTT
##

find_path(_CUTT_INSTALL_DIR NAMES include/cutt.h lib/libcutt.a HINTS ${CUTT_INSTALL_DIR})
find_path(_LIBRETT_INSTALL_DIR NAMES include/librett.h lib/librett.a HINTS ${LIBRETT_INSTALL_DIR})

if( _CUTT_INSTALL_DIR )
if( _LIBRETT_INSTALL_DIR )

message(STATUS "cuTT found at ${_CUTT_INSTALL_DIR}")
message(STATUS "LibreTT found at ${_LIBRETT_INSTALL_DIR}")

elseif(TA_EXPERT)

message("** cuTT was not found")
message(STATUS "** Downloading and building cuTT is explicitly disabled in EXPERT mode")
message("** LibreTT was not found")
message(STATUS "** Downloading and building LibreTT is explicitly disabled in EXPERT mode")

else()

# TODO need to fix the auto installation of cuTT
# TODO need to fix the auto installation of LibreTT

include(ExternalProject)

# to pass CMAKE_C_* vars to external project
enable_language(C)

# set source and build path for cuTT in the TiledArray project
set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/cutt-src)
# cutt only supports in source build
set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/cutt-build)
# set source and build path for LibreTT in the TiledArray project
set(EXTERNAL_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/librett-src)
# librett only supports in source build
set(EXTERNAL_BUILD_DIR ${FETCHCONTENT_BASE_DIR}/librett-build)
set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX})

if (NOT CUTT_URL)
set(CUTT_URL https://github.com/ValeevGroup/cutt.git)
endif (NOT CUTT_URL)
if (NOT CUTT_TAG)
set(CUTT_TAG ${TA_TRACKED_CUTT_TAG})
endif (NOT CUTT_TAG)
if (NOT LIBRETT_URL)
set(LIBRETT_URL https://github.com/victor-anisimov/librett.git)
endif (NOT LIBRETT_URL)
if (NOT LIBRETT_TAG)
set(LIBRETT_TAG ${TA_TRACKED_LIBRETT_TAG})
endif (NOT LIBRETT_TAG)

message("** Will clone cuTT from ${CUTT_URL}")
message("** Will clone LibreTT from ${LIBRETT_URL}")

# need to change the separator of list to avoid issues with ExternalProject parsing
# set(CUDA_FLAGS "${CUDA_NVCC_FLAGS}")
# string(REPLACE ";" "::" CUDA_FLAGS "${CUDA_NVCC_FLAGS}")
#message(STATUS "CUDA_FLAGS: " "${CUDA_FLAGS}")

set(CUTT_CMAKE_ARGS
set(LIBRETT_CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_DIR}
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE}
Expand All @@ -66,87 +66,88 @@ else()
-DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD}
-DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS}
-DENABLE_UMPIRE=OFF
-DCUTT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool
-DLIBRETT_USES_THIS_UMPIRE_ALLOCATOR=ThreadSafeUMDynamicPool
-DCMAKE_PREFIX_PATH=${_UMPIRE_INSTALL_DIR}
-DENABLE_NO_ALIGNED_ALLOC=ON
-DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER}
-DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT}
-DENABLE_CUDA=ON
)
if (DEFINED CMAKE_CUDA_ARCHITECTURES)
list(APPEND CUTT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES})
list(APPEND LIBRETT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES})
endif(DEFINED CMAKE_CUDA_ARCHITECTURES)
if (CMAKE_TOOLCHAIN_FILE)
set(CUTT_CMAKE_ARGS "${CUTT_CMAKE_ARGS}"
set(LIBRETT_CMAKE_ARGS "${LIBRETT_CMAKE_ARGS}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
endif(CMAKE_TOOLCHAIN_FILE)

if (BUILD_SHARED_LIBS)
set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
else(BUILD_SHARED_LIBS)
set(CUTT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
set(LIBRETT_DEFAULT_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif(BUILD_SHARED_LIBS)

# N.B. Ninja needs spelling out the byproducts of custom targets, see https://cmake.org/cmake/help/v3.3/policy/CMP0058.html
set(CUTT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/libcutt${CUTT_DEFAULT_LIBRARY_SUFFIX}")
message(STATUS "custom target cutt is expected to build these byproducts: ${CUTT_BUILD_BYPRODUCTS}")
set(LIBRETT_BUILD_BYPRODUCTS "${EXTERNAL_BUILD_DIR}/src/librett${LIBRETT_DEFAULT_LIBRARY_SUFFIX}")
message(STATUS "custom target librett is expected to build these byproducts: ${LIBRETT_BUILD_BYPRODUCTS}")

ExternalProject_Add(cutt
ExternalProject_Add(librett
PREFIX ${CMAKE_INSTALL_PREFIX}
STAMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts
TMP_DIR ${FETCHCONTENT_BASE_DIR}/cutt-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable
STAMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts
TMP_DIR ${FETCHCONTENT_BASE_DIR}/librett-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable
#--Download step--------------
DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR}
GIT_REPOSITORY ${CUTT_URL}
GIT_TAG ${CUTT_TAG}
GIT_REPOSITORY ${LIBRETT_URL}
GIT_TAG ${LIBRETT_TAG}
#--Configure step-------------
SOURCE_DIR ${EXTERNAL_SOURCE_DIR}
LIST_SEPARATOR ::
UPDATE_DISCONNECTED 1
CMAKE_ARGS
${CUTT_CMAKE_ARGS}
${LIBRETT_CMAKE_ARGS}
${EXTERNAL_SOURCE_DIR}
#--Build step-----------------
BINARY_DIR ${EXTERNAL_BUILD_DIR}
BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cutt -v
BUILD_BYPRODUCTS ${CUTT_BUILD_BYPRODUCTS}
BUILD_COMMAND ${CMAKE_COMMAND} --build . --target librett -v
BUILD_BYPRODUCTS ${LIBRETT_BUILD_BYPRODUCTS}
#--Install step---------------
INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "cuTT will be installed during TiledArray's installation."
INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "LibreTT will be installed during TiledArray's installation."
#--Custom targets-------------
STEP_TARGETS build
)

# TiledArray_CUTT target depends on existence of this directory to be usable from the build tree at configure time
# TiledArray_LIBRETT target depends on existence of this directory to be usable from the build tree at configure time
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${EXTERNAL_SOURCE_DIR}/src")

# do install of cuTT as part of building TiledArray's install target
# do install of LibreTT as part of building TiledArray's install target
install(CODE
"execute_process(
COMMAND \"${CMAKE_COMMAND}\" \"--build\" \".\" \"--target\" \"install\"
WORKING_DIRECTORY \"${EXTERNAL_BUILD_DIR}\"
RESULT_VARIABLE error_code)
if(error_code)
message(FATAL_ERROR \"Failed to install cuTT\")
message(FATAL_ERROR \"Failed to install LibreTT\")
endif()
")

# Add cuTT dependency to External
add_dependencies(External-tiledarray cutt-build)
# Add LibreTT dependency to External
add_dependencies(External-tiledarray librett-build)

set(_CUTT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR})
set(_LIBRETT_INSTALL_DIR ${EXTERNAL_INSTALL_DIR})

endif(_CUTT_INSTALL_DIR)
endif(_LIBRETT_INSTALL_DIR)

add_library(TiledArray_CUTT INTERFACE)
add_library(TiledArray_LIBRETT INTERFACE)

set_target_properties(TiledArray_CUTT
set_target_properties(TiledArray_LIBRETT
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src>;$<INSTALL_INTERFACE:${_CUTT_INSTALL_DIR}/include>"
"$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src>;$<INSTALL_INTERFACE:${_LIBRETT_INSTALL_DIR}/include>"
INTERFACE_LINK_LIBRARIES
"$<BUILD_INTERFACE:${CUTT_BUILD_BYPRODUCTS}>;$<INSTALL_INTERFACE:${_CUTT_INSTALL_DIR}/lib/libcutt.${CUTT_DEFAULT_LIBRARY_SUFFIX}>"
"$<BUILD_INTERFACE:${LIBRETT_BUILD_BYPRODUCTS}>;$<INSTALL_INTERFACE:${_LIBRETT_INSTALL_DIR}/lib/librett.${LIBRETT_DEFAULT_LIBRARY_SUFFIX}>"
)

install(TARGETS TiledArray_CUTT EXPORT tiledarray COMPONENT tiledarray)
install(TARGETS TiledArray_LIBRETT EXPORT tiledarray COMPONENT tiledarray)


#TODO test cuTT
#TODO test LibreTT
4 changes: 2 additions & 2 deletions external/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)
set(TA_TRACKED_BTAS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba)
set(TA_TRACKED_BTAS_PREVIOUS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2)

set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09)
set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3)
set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da)
set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 7e27ac766a9038df6aa05613784a54a036c4b796)

set(TA_TRACKED_UMPIRE_TAG f9640e0fa4245691cdd434e4f719ac5f7d455f82)
set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0)
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ if(CUDA_FOUND)

list(APPEND TILEDARRAY_HEADER_FILES
TiledArray/external/cuda.h
TiledArray/external/cutt.h
TiledArray/external/ta-librett.h
TiledArray/cuda/cublas.h
TiledArray/cuda/btas_cublas.h
TiledArray/cuda/btas_um_tensor.h
Expand Down Expand Up @@ -245,7 +245,7 @@ if(CUDA_FOUND)
LANGUAGE CUDA)

# the list of libraries on which TiledArray depends on
list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_CUTT)
list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_LIBRETT)

endif(CUDA_FOUND)

Expand Down
4 changes: 2 additions & 2 deletions src/TiledArray/cuda/btas_um_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

#include <TiledArray/cuda/btas_cublas.h>
#include <TiledArray/cuda/um_storage.h>
#include <TiledArray/external/cutt.h>
#include <TiledArray/external/ta-librett.h>
#include <TiledArray/tile.h>

namespace TiledArray {
Expand Down Expand Up @@ -187,7 +187,7 @@ btasUMTensorVarray<T, Range> permute(const btasUMTensorVarray<T, Range> &arg,
std::move(storage));

// invoke the permute function
cutt_permute(const_cast<T *>(device_data(arg.storage())),
librett_permute(const_cast<T *>(device_data(arg.storage())),
device_data(result.storage()), arg.range(), perm, stream);

synchronize_stream(&stream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
*
*/

#ifndef TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED
#define TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED
#ifndef TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED
#define TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED

#include <TiledArray/config.h>

Expand All @@ -31,7 +31,7 @@
#include <algorithm>
#include <vector>

#include <cutt.h>
#include <librett.h>

#include <TiledArray/permutation.h>
#include <TiledArray/range.h>
Expand Down Expand Up @@ -77,38 +77,39 @@ inline void permutation_to_col_major(std::vector<int>& perm) {
* @param stream the CUDA stream this permutation will be submitted to
*/
template <typename T>
void cutt_permute(T* inData, T* outData, const TiledArray::Range& range,
void librett_permute(T* inData, T* outData, const TiledArray::Range& range,
const TiledArray::Permutation& perm, cudaStream_t stream) {
auto extent = range.extent();
std::vector<int> extent_int(extent.begin(), extent.end());

// cuTT uses FROM notation
// LibreTT uses FROM notation
auto perm_inv = perm.inv();
std::vector<int> perm_int(perm_inv.begin(), perm_inv.end());

// cuTT uses ColMajor
// LibreTT uses ColMajor
TiledArray::extent_to_col_major(extent_int);
TiledArray::permutation_to_col_major(perm_int);

cuttResult_t status;
//librettResult_t status;
librettResult status;

cuttHandle plan;
status = cuttPlan(&plan, range.rank(), extent_int.data(), perm_int.data(),
librettHandle plan;
status = librettPlan(&plan, range.rank(), extent_int.data(), perm_int.data(),
sizeof(T), stream);

TA_ASSERT(status == CUTT_SUCCESS);
TA_ASSERT(status == LIBRETT_SUCCESS);

status = cuttExecute(plan, inData, outData);
status = librettExecute(plan, inData, outData);

TA_ASSERT(status == CUTT_SUCCESS);
TA_ASSERT(status == LIBRETT_SUCCESS);

status = cuttDestroy(plan);
status = librettDestroy(plan);

TA_ASSERT(status == CUTT_SUCCESS);
TA_ASSERT(status == LIBRETT_SUCCESS);
}

} // namespace TiledArray

#endif // TILEDARRAY_HAS_CUDA

#endif // TILEDARRAY_EXTERNAL_CUTT_H__INCLUDED
#endif // TILEDARRAY_EXTERNAL_LIBRETT_H__INCLUDED
Loading