Skip to content

Commit

Permalink
Enable device caching for kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
ZzEeKkAa committed Dec 12, 2023
1 parent 774d543 commit a8178c6
Show file tree
Hide file tree
Showing 9 changed files with 343 additions and 49 deletions.
8 changes: 8 additions & 0 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "_queuestruct.h"
#include "_usmarraystruct.h"

#include "experimental/kernel_caching.h"
#include "experimental/nrt_reserve_meminfo.h"
#include "numba/core/runtime/nrt_external.h"

Expand Down Expand Up @@ -1493,6 +1494,8 @@ static PyObject *build_c_helpers_dict(void)
_declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
_declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
&DPEXRT_nrt_acquire_meminfo_and_schedule_release);
_declpointer("DPEXRT_build_or_get_kernel", &DPEXRT_build_or_get_kernel);
_declpointer("DPEXRT_kernel_cache_size", &DPEXRT_kernel_cache_size);

#undef _declpointer
return dct;
Expand Down Expand Up @@ -1563,6 +1566,11 @@ MOD_INIT(_dpexrt_python)
PyModule_AddObject(
m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
PyModule_AddObject(m, "DPEXRT_build_or_get_kernel",
PyLong_FromVoidPtr(&DPEXRT_build_or_get_kernel));
PyModule_AddObject(m, "DPEXRT_kernel_cache_size",
PyLong_FromVoidPtr(&DPEXRT_kernel_cache_size));

PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
return MOD_SUCCESS_VAL(m);
}
53 changes: 53 additions & 0 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,56 @@ def acquire_meminfo_and_schedule_release(
ret = builder.call(fn, args)

return ret

def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
"""Inserts LLVM IR to call build_or_get_kernel.
DPCTLSyclKernelRef
DPEXRT_build_or_get_kernel(
const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name,
);
"""
mod = builder.module

func_ty = llvmir.FunctionType(
cgutils.voidptr_t,
[
cgutils.voidptr_t,
cgutils.voidptr_t,
llvmir.IntType(64),
cgutils.voidptr_t,
llvmir.IntType(64),
cgutils.voidptr_t,
cgutils.voidptr_t,
],
)
fn = cgutils.get_or_insert_function(
mod, func_ty, "DPEXRT_build_or_get_kernel"
)
ret = builder.call(fn, args)

return ret

def kernel_cache_size(self, builder: llvmir.IRBuilder):
"""Inserts LLVM IR to call kernel_cache_size.
size_t DPEXRT_kernel_cache_size();
"""
fn = cgutils.get_or_insert_function(
builder.module,
llvmir.FunctionType(
llvmir.IntType(64),
[],
),
"DPEXRT_kernel_cache_size",
)

return builder.call(fn, [])
111 changes: 111 additions & 0 deletions numba_dpex/core/runtime/experimental/kernel_caching.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include "kernel_caching.h"
#include <unordered_map>

extern "C"
{
#include "dpctl_capi.h"
#include "dpctl_sycl_interface.h"

#include "_dbg_printer.h"

#include "numba/core/runtime/nrt_external.h"
}

#include "syclinterface/dpctl_sycl_type_casters.hpp"
#include "tools/boost_hash.hpp"
#include "tools/dpctl.hpp"

using CacheKey = std::tuple<DPCTLSyclContextRef, DPCTLSyclDeviceRef, size_t>;

namespace std
{
template <> struct hash<CacheKey>
{
size_t operator()(const CacheKey &ck) const
{
std::size_t seed = 0;
boost::hash_combine(seed, std::get<DPCTLSyclDeviceRef>(ck));
boost::hash_combine(seed, std::get<DPCTLSyclContextRef>(ck));
boost::hash_detail::hash_combine_impl(seed, std::get<size_t>(ck));
return seed;
}
};
template <> struct equal_to<CacheKey>
{
constexpr bool operator()(const CacheKey &lhs, const CacheKey &rhs) const
{
return DPCTLDevice_AreEq(std::get<DPCTLSyclDeviceRef>(lhs),
std::get<DPCTLSyclDeviceRef>(rhs)) &&
DPCTLContext_AreEq(std::get<DPCTLSyclContextRef>(lhs),
std::get<DPCTLSyclContextRef>(rhs)) &&
std::get<size_t>(lhs) == std::get<size_t>(rhs);
}
};
} // namespace std

// TODO: add cache cleaning
// https://github.com/IntelPython/numba-dpex/issues/1240
std::unordered_map<CacheKey, DPCTLSyclKernelRef> sycl_kernel_cache =
std::unordered_map<CacheKey, DPCTLSyclKernelRef>();

template <class M, class Key, class F>
typename M::mapped_type &get_else_compute(M &m, Key const &k, F f)
{
typedef typename M::mapped_type V;
std::pair<typename M::iterator, bool> r =
m.insert(typename M::value_type(k, V()));
V &v = r.first->second;
if (r.second) {
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: building kernel.\n"););
f(v);
}
else {
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: using cached kernel.\n"););
DPCTLDevice_Delete(std::get<DPCTLSyclDeviceRef>(k));
DPCTLContext_Delete(std::get<DPCTLSyclContextRef>(k));
}
return v;
}

extern "C"
{
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name)
{
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-DEBUG: in build or get kernel.\n"););

CacheKey key = std::make_tuple(ctx, dev, il_hash);

DPEXRT_DEBUG(auto ctx_hash = std::hash<DPCTLSyclContextRef>{}(ctx);
auto dev_hash = std::hash<DPCTLSyclDeviceRef>{}(dev);
drt_debug_print("DPEXRT-DEBUG: key hashes: %d %d %d.\n",
ctx_hash, dev_hash, il_hash););

auto k_ref = get_else_compute(
sycl_kernel_cache, key,
[ctx, dev, il, il_length, compile_opts,
kernel_name](DPCTLSyclKernelRef &k_ref) {
auto kb_ref = DPCTLKernelBundle_CreateFromSpirv(
ctx, dev, il, il_length, compile_opts);
k_ref = DPCTLKernelBundle_GetKernel(kb_ref, kernel_name);
DPCTLKernelBundle_Delete(kb_ref);
});

DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: kernel hash size: %d.\n",
sycl_kernel_cache.size()););

return DPCTLKernel_Copy(k_ref);
}

size_t DPEXRT_kernel_cache_size() { return sycl_kernel_cache.size(); }
}
54 changes: 54 additions & 0 deletions numba_dpex/core/runtime/experimental/kernel_caching.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// SPDX-FileCopyrightText: 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

//===----------------------------------------------------------------------===//
///
/// \file
/// Defines dpex run time function(s) that cache kernel on device.
///
//===----------------------------------------------------------------------===//

#pragma once

#include "dpctl_capi.h"
#include "dpctl_sycl_interface.h"

#ifdef __cplusplus
extern "C"
{
#endif
/*!
* @brief returns dpctl kernel reference for the SPIRV file on particular
* device. Compiles only first time, all others will use cache for the same
* input. It steals reference to context and device because we need to keep
* it alive for cache keys.
*
* @param ctx Context reference,
* @param dev Device reference,
* @param il_hash Hash of the SPIRV binary data,
* @param il SPIRV binary data,
* @param il_length SPIRV binary data size,
* @param compile_opts compile options,
* @param kernel_name kernel name inside SPIRV binary data to return
* reference to.
*
* @return {return} Kernel reference to the compiled SPIR-V.
*/
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name);

/*!
* @brief returns cache size. Intended for test purposes only
*
* @return {return} Kernel cache size.
*/
size_t DPEXRT_kernel_cache_size();
#ifdef __cplusplus
}
#endif
24 changes: 24 additions & 0 deletions numba_dpex/core/runtime/experimental/tools/dpctl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include "dpctl.hpp"
#include <CL/sycl.hpp>

namespace std
{

size_t
hash<DPCTLSyclDeviceRef>::operator()(const DPCTLSyclDeviceRef &DRef) const
{
using dpctl::syclinterface::unwrap;
return hash<sycl::device>()(*unwrap<sycl::device>(DRef));
}

size_t
hash<DPCTLSyclContextRef>::operator()(const DPCTLSyclContextRef &CRef) const
{
using dpctl::syclinterface::unwrap;
return hash<sycl::context>()(*unwrap<sycl::context>(CRef));
}
} // namespace std
26 changes: 26 additions & 0 deletions numba_dpex/core/runtime/experimental/tools/dpctl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

//===----------------------------------------------------------------------===//
///
/// \file
/// Defines overloads to dpctl library that eventually must be ported there.
///
//===----------------------------------------------------------------------===//

#pragma once
#include "syclinterface/dpctl_sycl_type_casters.hpp"

namespace std
{
template <> struct hash<DPCTLSyclDeviceRef>
{
size_t operator()(const DPCTLSyclDeviceRef &DRef) const;
};

template <> struct hash<DPCTLSyclContextRef>
{
size_t operator()(const DPCTLSyclContextRef &CRef) const;
};
} // namespace std
Loading

0 comments on commit a8178c6

Please sign in to comment.