-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
284 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
numba_dpex/core/runtime/experimental/kernel_caching.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// A Python module that pprovides constructors to create a Numba MemInfo | ||
/// PyObject using a sycl USM allocator as the external memory allocator. | ||
/// The Module also provides the Numba box and unbox implementations for a | ||
/// dpnp.ndarray object. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "kernel_caching.h" | ||
#include <unordered_map> | ||
|
||
extern "C" | ||
{ | ||
#include "dpctl_capi.h" | ||
#include "dpctl_sycl_interface.h" | ||
|
||
#include "_dbg_printer.h" | ||
|
||
#include "numba/core/runtime/nrt_external.h" | ||
} | ||
|
||
#include "syclinterface/dpctl_sycl_type_casters.hpp" | ||
#include "tools/dpctl.hpp" | ||
#include "tools/hash_tuple.hpp" | ||
|
||
using CacheKey = std::tuple<DPCTLSyclContextRef, DPCTLSyclDeviceRef, size_t>; | ||
|
||
class CacheKeysAreEqual | ||
{ | ||
public: | ||
bool operator()(CacheKey const &lhs, CacheKey const &rhs) const | ||
{ | ||
// TODO: implement full comparison | ||
return DPCTLDevice_AreEq(std::get<DPCTLSyclDeviceRef>(lhs), | ||
std::get<DPCTLSyclDeviceRef>(rhs)) && | ||
DPCTLContext_AreEq(std::get<DPCTLSyclContextRef>(lhs), | ||
std::get<DPCTLSyclContextRef>(rhs)) && | ||
std::get<size_t>(lhs) == std::get<size_t>(rhs); | ||
} | ||
}; | ||
|
||
// TODO: add cache cleaning | ||
std::unordered_map<CacheKey, | ||
DPCTLSyclKernelRef, | ||
std::hash<CacheKey>, | ||
CacheKeysAreEqual> | ||
sycl_kernel_cache = std::unordered_map<CacheKey, | ||
DPCTLSyclKernelRef, | ||
std::hash<CacheKey>, | ||
CacheKeysAreEqual>(); | ||
|
||
template <class M, class Key, class F> | ||
typename M::mapped_type &get_else_compute(M &m, Key const &k, F f) | ||
{ | ||
typedef typename M::mapped_type V; | ||
std::pair<typename M::iterator, bool> r = | ||
m.insert(typename M::value_type(k, V())); | ||
V &v = r.first->second; | ||
if (r.second) { | ||
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: building kernel.\n");); | ||
f(v); | ||
} | ||
else { | ||
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: using cached kernel.\n");); | ||
} | ||
return v; | ||
} | ||
|
||
extern "C" | ||
{ | ||
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx, | ||
const DPCTLSyclDeviceRef dev, | ||
size_t il_hash, | ||
const char *il, | ||
size_t il_length, | ||
const char *compile_opts, | ||
const char *kernel_name) | ||
{ | ||
DPEXRT_DEBUG( | ||
drt_debug_print("DPEXRT-DEBUG: in build or get kernel.\n");); | ||
|
||
CacheKey key = std::make_tuple(ctx, dev, il_hash); | ||
|
||
DPEXRT_DEBUG(auto ctx_hash = std::hash<DPCTLSyclContextRef>{}(ctx); | ||
auto dev_hash = std::hash<DPCTLSyclDeviceRef>{}(dev); | ||
drt_debug_print("DPEXRT-DEBUG: key hashes: %d %d %d.\n", | ||
ctx_hash, dev_hash, il_hash);); | ||
|
||
auto k_ref = get_else_compute( | ||
sycl_kernel_cache, key, | ||
[ctx, dev, il, il_length, compile_opts, | ||
kernel_name](DPCTLSyclKernelRef &k_ref) { | ||
auto kb_ref = DPCTLKernelBundle_CreateFromSpirv( | ||
ctx, dev, il, il_length, compile_opts); | ||
k_ref = DPCTLKernelBundle_GetKernel(kb_ref, kernel_name); | ||
DPCTLKernelBundle_Delete(kb_ref); | ||
}); | ||
return DPCTLKernel_Copy(k_ref); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// A Python module that pprovides constructors to create a Numba MemInfo | ||
/// PyObject using a sycl USM allocator as the external memory allocator. | ||
/// The Module also provides the Numba box and unbox implementations for a | ||
/// dpnp.ndarray object. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifdef __cplusplus | ||
extern "C" | ||
{ | ||
#endif | ||
|
||
#include "dpctl_capi.h" | ||
#include "dpctl_sycl_interface.h" | ||
|
||
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx, | ||
const DPCTLSyclDeviceRef dev, | ||
size_t il_hash, | ||
const char *il, | ||
size_t il_length, | ||
const char *compile_opts, | ||
const char *kernel_name); | ||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#pragma once | ||
|
||
#include "syclinterface/dpctl_sycl_type_casters.hpp" | ||
#include <CL/sycl.hpp> | ||
|
||
namespace std | ||
{ | ||
template <> struct hash<DPCTLSyclDeviceRef> | ||
{ | ||
size_t operator()(const DPCTLSyclDeviceRef &DRef) const | ||
{ | ||
using dpctl::syclinterface::unwrap; | ||
return hash<sycl::device>()(*unwrap<sycl::device>(DRef)); | ||
} | ||
}; | ||
|
||
template <> struct hash<DPCTLSyclContextRef> | ||
{ | ||
size_t operator()(const DPCTLSyclContextRef &CRef) const | ||
{ | ||
using dpctl::syclinterface::unwrap; | ||
return hash<sycl::context>()(*unwrap<sycl::context>(CRef)); | ||
} | ||
}; | ||
} // namespace std |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#pragma once | ||
|
||
#include <tuple> | ||
namespace std | ||
{ | ||
namespace | ||
{ | ||
|
||
// Code from boost | ||
// Reciprocal of the golden ratio helps spread entropy | ||
// and handles duplicates. | ||
// See Mike Seymour in magic-numbers-in-boosthash-combine: | ||
// http://stackoverflow.com/questions/4948780 | ||
|
||
template <class T> inline void hash_combine(std::size_t &seed, T const &v) | ||
{ | ||
seed ^= std::hash<T>()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); | ||
} | ||
|
||
// Recursive template code derived from Matthieu M. | ||
template <class Tuple, size_t Index = std::tuple_size<Tuple>::value - 1> | ||
struct HashValueImpl | ||
{ | ||
static void apply(size_t &seed, Tuple const &tuple) | ||
{ | ||
HashValueImpl<Tuple, Index - 1>::apply(seed, tuple); | ||
hash_combine(seed, std::get<Index>(tuple)); | ||
} | ||
}; | ||
|
||
template <class Tuple> struct HashValueImpl<Tuple, 0> | ||
{ | ||
static void apply(size_t &seed, Tuple const &tuple) | ||
{ | ||
hash_combine(seed, std::get<0>(tuple)); | ||
} | ||
}; | ||
} // namespace | ||
|
||
template <typename... TT> struct hash<std::tuple<TT...>> | ||
{ | ||
size_t operator()(std::tuple<TT...> const &tt) const | ||
{ | ||
size_t seed = 0; | ||
HashValueImpl<std::tuple<TT...>>::apply(seed, tt); | ||
return seed; | ||
} | ||
}; | ||
} // namespace std |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.