-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
sporadic inaccurate results relative to numpy if atomic add is used #711
Comments
This problem is reproduced only on Gen9 with dppy 17.4 and 18.0. |
After updating the script to catch up all the changes it just freezes: import argparse
import math
import time
import dpctl
import dpnp
import numba
import numpy as np
import numpy.random as rnd
import numba_dpex as dppy
import numba_dpex as numba_dppy
from numba_dpex import kernel, atomic, DEFAULT_LOCAL_SIZE
atomic_add = atomic.add
SEED = 777777
DTYPE = np.float32
#@kernel(access_types={"read_only": ["a", "b"], "write_only": ["c"]})
@kernel
def l2_distance_kernel(a, b, c):
i = numba_dppy.get_global_id(0)
j = numba_dppy.get_global_id(1)
sub = a[i, j] - b[i, j]
sq = sub**2
atomic_add(c, 0, sq)
def gen_data(nopt, dims, dtype=DTYPE):
rnd.seed(SEED)
return rnd.random((nopt, dims)).astype(dtype), rnd.random((nopt, dims)).astype(dtype)
def l2_distance_python(a, b):
return np.linalg.norm(a - b)
def run(sizes=3, step=2, nopt=2**20):
parser = argparse.ArgumentParser(description="Black-Scholes")
parser.add_argument("--iter", dest="iter", type=int, default=10)
args = parser.parse_args()
dims = 1
for _ in range(sizes):
# Use the environment variable SYCL_DEVICE_FILTER to change the default device.
# See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
device = dpctl.select_default_device()
print("Using device ...", device)
device.print_device_info()
X, Y = gen_data(nopt, dims, np.float32)
distance = np.asarray([0.0]).astype(np.float32)
p_dis = l2_distance_python(X, Y)
X, Y, distance = dpnp.array(X), dpnp.array(Y), dpnp.array(distance)
n_dis = 0
with dpctl.device_context(device):
l2_distance_kernel[numba_dppy.Range(X.shape[0], X.shape[1])](X, Y, distance)
if int(distance) >= 0:
n_dis = math.sqrt(distance)
if np.allclose(n_dis, p_dis, rtol=1e-05 * np.sqrt(nopt)):
print("Test succeeded for size", nopt, ". Python dis: ", p_dis, " Numba dis: ", n_dis, "\n")
else:
print("Test failed for size", nopt, ". Python dis: ", p_dis, " Numba dis: ", n_dis, "\n")
nopt *= step
print("Done...")
if __name__ == "__main__":
run() |
Updated the reproducer to latest API and I can reproduce the freeze/deadlock reported previously: import argparse
import math
import dpctl
import dpnp
import numpy as np
import numpy.random as rnd
from numba_dpex import kernel_api as kapi
from numba_dpex import kernel, call_kernel
SEED = 777777
DTYPE = np.float32
@kernel
def l2_distance_kernel(item, a, b, c):
i = item.get_id(0)
j = item.get_id(1)
sub = a[i, j] - b[i, j]
sq = sub**2
sq_aref = kapi.AtomicRef(c, 0)
sq_aref.fetch_add(sq)
def gen_data(nopt, dims, dtype=DTYPE):
rnd.seed(SEED)
return rnd.random((nopt, dims)).astype(dtype), rnd.random(
(nopt, dims)
).astype(dtype)
def l2_distance_python(a, b):
return np.linalg.norm(a - b)
def run(sizes=3, step=2, nopt=2**20):
parser = argparse.ArgumentParser(description="Black-Scholes")
parser.add_argument("--iter", dest="iter", type=int, default=10)
args = parser.parse_args()
dims = 1
for _ in range(sizes):
# Use the environment variable SYCL_DEVICE_FILTER to change the default device.
# See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
device = dpctl.select_default_device()
print("Using device ...", device)
device.print_device_info()
X, Y = gen_data(nopt, dims, np.float32)
distance = np.asarray([0.0]).astype(np.float32)
p_dis = l2_distance_python(X, Y)
X, Y, distance = dpnp.array(X), dpnp.array(Y), dpnp.array(distance)
n_dis = 0
print(distance)
print("0000000000000000000000000000")
call_kernel(
l2_distance_kernel,
kapi.Range(X.shape[0], X.shape[1]),
X,
Y,
distance,
)
print("10000000000000000000000000000")
if int(distance) >= 0:
n_dis = math.sqrt(distance)
if np.allclose(n_dis, p_dis, rtol=1e-05 * np.sqrt(nopt)):
print(
"Test succeeded for size",
nopt,
". Python dis: ",
p_dis,
" Numba dis: ",
n_dis,
"\n",
)
else:
print(
"Test failed for size",
nopt,
". Python dis: ",
p_dis,
" Numba dis: ",
n_dis,
"\n",
)
nopt *= step
print("Done...")
if __name__ == "__main__":
run() |
I experience the issue on a Gen9 integrated graphics only at problem size I think the issue right now is that for I will next write a dpc++ example to verify what happens when we run a similar code in C++. |
I'm running on Gen9 and dppy 17.4 and have sporadic inaccurate results relative to numpy for the following code
the results is the following
Full code of the benchmark you can find here
The text was updated successfully, but these errors were encountered: