Skip to content

Commit

Permalink
Write usm_host_allocator that wraps call to free in try/catch
Browse files Browse the repository at this point in the history
Wrote dpctl::tensor::offset_utils::usm_host_allocator<T> to allocate
USM-host memory as storage to std::vector.

Replaced uses of sycl::usm_memory<T, sycl::alloc::kind::host>. The
new class derives from this, but overrides deallocate method to
wrap call to base::deallocate in try/except. The exception, if
caught, is printed but otherwise ignored, consistent like this is
done on USMDeleter class used in dpctl.memory

This is to work around sporadic crashes due to unhandled exception
thrown by openCL::CPU driver, which appears to be benign.

The issue was reported to CPU driver team, with native reproducer
(compiler LLVM jira ticket 58387).
  • Loading branch information
oleksandr-pavlyk committed Aug 8, 2024
1 parent 89144e8 commit ccbd886
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
28 changes: 26 additions & 2 deletions dpctl/tensor/libtensor/include/utils/offset_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#pragma once

#include <algorithm>
#include <exception>
#include <sycl/sycl.hpp>
#include <tuple>
#include <vector>
Expand Down Expand Up @@ -81,6 +82,30 @@ std::vector<T, A> concat(std::vector<T, A> lhs, Vs &&...vs)

} // namespace detail

template <typename T>
class usm_host_allocator : public sycl::usm_allocator<T, sycl::usm::alloc::host>
{
public:
using baseT = sycl::usm_allocator<T, sycl::usm::alloc::host>;
using baseT::baseT;

template <typename U> struct rebind
{
typedef usm_host_allocator<U> other;
};

void deallocate(T *ptr, size_t n)
{
try {
baseT::deallocate(ptr, n);
} catch (const std::exception &e) {
std::cerr
<< "Exception caught in `usm_host_allocator::deallocate`: "
<< e.what() << std::endl;
}
}
};

template <typename indT, typename... Vs>
std::tuple<indT *, size_t, sycl::event>
device_allocate_and_pack(sycl::queue &q,
Expand All @@ -90,8 +115,7 @@ device_allocate_and_pack(sycl::queue &q,

// memory transfer optimization, use USM-host for temporary speeds up
// transfer to device, especially on dGPUs
using usm_host_allocatorT =
sycl::usm_allocator<indT, sycl::usm::alloc::host>;
using usm_host_allocatorT = usm_host_allocator<indT>;
using shT = std::vector<indT, usm_host_allocatorT>;

usm_host_allocatorT usm_host_allocator(q);
Expand Down
5 changes: 3 additions & 2 deletions dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "dpctl4pybind11.hpp"
#include "kernels/integer_advanced_indexing.hpp"
#include "utils/memory_overlap.hpp"
#include "utils/offset_utils.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_dispatch.hpp"
#include "utils/type_utils.hpp"
Expand Down Expand Up @@ -91,15 +92,15 @@ _populate_kernel_params(sycl::queue &exec_q,
{

using usm_host_allocator_T =
sycl::usm_allocator<char *, sycl::usm::alloc::host>;
dpctl::tensor::offset_utils::usm_host_allocator<char *>;
using ptrT = std::vector<char *, usm_host_allocator_T>;

usm_host_allocator_T ptr_allocator(exec_q);
std::shared_ptr<ptrT> host_ind_ptrs_shp =
std::make_shared<ptrT>(k, ptr_allocator);

using usm_host_allocatorT =
sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
using shT = std::vector<py::ssize_t, usm_host_allocatorT>;

usm_host_allocatorT sz_allocator(exec_q);
Expand Down
3 changes: 2 additions & 1 deletion dpctl/tensor/libtensor/source/triul_ctor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "kernels/constructors.hpp"
#include "simplify_iteration_space.hpp"
#include "utils/memory_overlap.hpp"
#include "utils/offset_utils.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_dispatch.hpp"

Expand Down Expand Up @@ -150,7 +151,7 @@ usm_ndarray_triul(sycl::queue &exec_q,
nd += 2;

using usm_host_allocatorT =
sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
using usmshT = std::vector<py::ssize_t, usm_host_allocatorT>;

usm_host_allocatorT allocator(exec_q);
Expand Down

0 comments on commit ccbd886

Please sign in to comment.