diff --git a/numba_dpex/examples/kernel/atomic_op.py b/numba_dpex/examples/kernel/atomic_op.py index 2e10f7cc18..653fbc15fe 100644 --- a/numba_dpex/examples/kernel/atomic_op.py +++ b/numba_dpex/examples/kernel/atomic_op.py @@ -5,6 +5,7 @@ import dpnp as np import numba_dpex as ndpex +from numba_dpex.core.kernel_interface.utils import Range @ndpex.kernel @@ -20,7 +21,7 @@ def main(): print("Using device ...") print(a.device) - atomic_reduction[N](a) + atomic_reduction[Range(N)](a) print("Reduction sum =", a[0]) print("Done...") diff --git a/numba_dpex/examples/kernel/black_scholes.py b/numba_dpex/examples/kernel/black_scholes.py index 3f6e9c5bd6..75be6d3b0d 100644 --- a/numba_dpex/examples/kernel/black_scholes.py +++ b/numba_dpex/examples/kernel/black_scholes.py @@ -8,6 +8,7 @@ import dpnp as np import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # Stock price range S0L = 10.0 @@ -94,7 +95,9 @@ def main(): print("Using device ...") print(price.device) - kernel_black_scholes[NOPT](price, strike, t, rate, volatility, call, put) + kernel_black_scholes[Range(NOPT)]( + price, strike, t, rate, volatility, call, put + ) print("Call:", call) print("Put:", put) diff --git a/numba_dpex/examples/kernel/device_func.py b/numba_dpex/examples/kernel/device_func.py index 1c6fe52d39..80089a70fb 100644 --- a/numba_dpex/examples/kernel/device_func.py +++ b/numba_dpex/examples/kernel/device_func.py @@ -6,6 +6,7 @@ import numba_dpex as ndpex from numba_dpex import float32, int32, int64 +from numba_dpex.core.kernel_interface.utils import Range # Array size N = 10 @@ -69,7 +70,7 @@ def test1(): print("A=", a) try: - a_kernel_function[N](a, b) + a_kernel_function[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -87,7 +88,7 @@ def test2(): print("A=", a) try: - a_kernel_function_int32[N](a, b) + a_kernel_function_int32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -105,7 +106,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -119,7 +120,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -134,7 +135,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) diff --git a/numba_dpex/examples/kernel/interpolation.py b/numba_dpex/examples/kernel/interpolation.py index 7568ad60e7..3aa3c91765 100644 --- a/numba_dpex/examples/kernel/interpolation.py +++ b/numba_dpex/examples/kernel/interpolation.py @@ -7,6 +7,7 @@ from numpy.testing import assert_almost_equal import numba_dpex as ndpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range # Interpolation domain XLO = 10.0 @@ -114,9 +115,13 @@ def main(): print("Using device ...") print(xp.device) - global_range = (N_POINTS // N_POINTS_PER_WORK_ITEM,) - local_range = (LOCAL_SIZE,) - kernel_polynomial[global_range, local_range](xp, yp, COEFFICIENTS) + global_range = Range( + N_POINTS // N_POINTS_PER_WORK_ITEM, + ) + local_range = Range( + LOCAL_SIZE, + ) + kernel_polynomial[NdRange(global_range, local_range)](xp, yp, COEFFICIENTS) # Copy results back to the host nyp = np.asnumpy(yp) diff --git a/numba_dpex/examples/kernel/kernel_private_memory.py b/numba_dpex/examples/kernel/kernel_private_memory.py index 089f8b41d4..3219281f7c 100644 --- a/numba_dpex/examples/kernel/kernel_private_memory.py +++ b/numba_dpex/examples/kernel/kernel_private_memory.py @@ -8,6 +8,7 @@ from numba import float32 import numba_dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range def private_memory(): @@ -39,9 +40,9 @@ def private_memory_kernel(A): print("Using device ...") device.print_device_info() - global_range = (N,) - local_range = (N,) - private_memory_kernel[global_range, local_range](arr) + global_range = Range(N) + local_range = Range(N) + private_memory_kernel[NdRange(global_range, local_range)](arr) arr_out = dpt.asnumpy(arr) np.testing.assert_allclose(orig * 2, arr_out) diff --git a/numba_dpex/examples/kernel/kernel_specialization.py b/numba_dpex/examples/kernel/kernel_specialization.py index 8e46949bd9..e1aff12c23 100644 --- a/numba_dpex/examples/kernel/kernel_specialization.py +++ b/numba_dpex/examples/kernel/kernel_specialization.py @@ -11,6 +11,7 @@ InvalidKernelSpecializationError, MissingSpecializationError, ) +from numba_dpex.core.kernel_interface.utils import Range # Similar to Numba, numba-dpex supports eager compilation of functions. The # following examples demonstrate the feature for numba_dpex.kernel and presents @@ -38,7 +39,7 @@ def data_parallel_sum(a, b, c): b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum[1024](a, b, c) +data_parallel_sum[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -65,7 +66,7 @@ def data_parallel_sum2(a, b, c): b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum2[1024](a, b, c) +data_parallel_sum2[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -76,7 +77,7 @@ def data_parallel_sum2(a, b, c): b = dpt.ones(1024, dtype=dpt.float32) c = dpt.zeros(1024, dtype=dpt.float32) -data_parallel_sum2[1024](a, b, c) +data_parallel_sum2[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.float32) @@ -94,7 +95,7 @@ def data_parallel_sum2(a, b, c): c = dpt.zeros(1024, dtype=dpt.int32) try: - data_parallel_sum[1024](a, b, c) + data_parallel_sum[Range(1024)](a, b, c) except MissingSpecializationError as mse: print(mse) diff --git a/numba_dpex/examples/kernel/matmul.py b/numba_dpex/examples/kernel/matmul.py index a40ccc207b..5fd8e44832 100644 --- a/numba_dpex/examples/kernel/matmul.py +++ b/numba_dpex/examples/kernel/matmul.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -30,13 +31,13 @@ def gemm(a, b, c): Y = 16 global_size = X, X -griddim = X, X -blockdim = Y, Y +griddim = Range(X, X) +blockdim = Range(Y, Y) def driver(a, b, c): # Invoke the kernel - gemm[griddim, blockdim](a, b, c) + gemm[NdRange(griddim, blockdim)](a, b, c) def main(): diff --git a/numba_dpex/examples/kernel/pairwise_distance.py b/numba_dpex/examples/kernel/pairwise_distance.py index 30d940a871..da4822c64f 100644 --- a/numba_dpex/examples/kernel/pairwise_distance.py +++ b/numba_dpex/examples/kernel/pairwise_distance.py @@ -12,6 +12,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range parser = argparse.ArgumentParser( description="Program to compute pairwise distance" @@ -25,9 +26,9 @@ args = parser.parse_args() # Global work size is equal to the number of points -global_size = (args.n,) +global_size = Range(args.n) # Local Work size is optional -local_size = (args.l,) +local_size = Range(args.l) X = np.random.random((args.n, args.d)).astype(np.single) D = np.empty((args.n, args.n), dtype=np.single) @@ -65,7 +66,7 @@ def driver(): for repeat in range(args.r): start = time() - pairwise_distance[global_size, local_size]( + pairwise_distance[NdRange(global_size, local_size)]( x_ndarray, d_ndarray, X.shape[0], X.shape[1] ) end = time() diff --git a/numba_dpex/examples/kernel/scan.py b/numba_dpex/examples/kernel/scan.py index 6ee4056fbb..13374bbf4b 100644 --- a/numba_dpex/examples/kernel/scan.py +++ b/numba_dpex/examples/kernel/scan.py @@ -7,6 +7,7 @@ import dpnp as np import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # 1D array size N = 64 @@ -56,7 +57,7 @@ def main(): print("Using device ...") print(arr.device) - kernel_hillis_steele_scan[N](arr) + kernel_hillis_steele_scan[Range(N)](arr) # the output should be [0, 1, 3, 6, ...] arr_np = np.asnumpy(arr) diff --git a/numba_dpex/examples/kernel/select_device_for_kernel.py b/numba_dpex/examples/kernel/select_device_for_kernel.py index 7c08d7e9eb..fbe1f27bd1 100644 --- a/numba_dpex/examples/kernel/select_device_for_kernel.py +++ b/numba_dpex/examples/kernel/select_device_for_kernel.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range """ We support passing arrays of two types to a @numba_dpex.kernel decorated @@ -86,7 +87,7 @@ def select_device_ndarray(N): default_device = dpctl.select_default_device() with numba_dpex.offload_to_sycl_device(default_device.filter_string): - sum_kernel[(N,), (1,)](a, b, got) + sum_kernel[NdRange(Range(N), Range(1))](a, b, got) expected = a + b @@ -110,7 +111,7 @@ def select_device_SUAI(N): # Users don't need to specify where the computation will # take place. It will be inferred from data. - sum_kernel[(N,), (1,)](da, db, dc) + sum_kernel[NdRange(Range(N), Range(1))](da, db, dc) dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1")) diff --git a/numba_dpex/examples/kernel/sum_reduction_ocl.py b/numba_dpex/examples/kernel/sum_reduction_ocl.py index 9ab19bdebd..03ddecdd0f 100644 --- a/numba_dpex/examples/kernel/sum_reduction_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_ocl.py @@ -7,6 +7,7 @@ from numba import int32 import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -49,9 +50,9 @@ def sum_reduce(A): partial_sums = dpt.zeros(nb_work_groups, dtype=A.dtype, device=A.device) - gs = (global_size,) - ls = (work_group_size,) - sum_reduction_kernel[gs, ls](A, partial_sums) + gs = Range(global_size) + ls = Range(work_group_size) + sum_reduction_kernel[NdRange(gs, ls)](A, partial_sums) final_sum = 0 # calculate the final sum in HOST diff --git a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py b/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py index 40183c5931..b90a985df0 100644 --- a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py @@ -13,6 +13,7 @@ from numba import int32 import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -58,13 +59,15 @@ def sum_recursive_reduction(size, group_size, Dinp, Dpartial_sums): nb_work_groups += 1 passed_size = nb_work_groups * group_size - gr = (passed_size,) - lr = (group_size,) + gr = Range(passed_size) + lr = Range(group_size) - sum_reduction_kernel[gr, lr](Dinp, size, Dpartial_sums) + sum_reduction_kernel[NdRange(gr, lr)](Dinp, size, Dpartial_sums) if nb_work_groups <= group_size: - sum_reduction_kernel[lr, lr](Dpartial_sums, nb_work_groups, Dinp) + sum_reduction_kernel[NdRange(lr, lr)]( + Dpartial_sums, nb_work_groups, Dinp + ) result = int(Dinp[0]) else: result = sum_recursive_reduction( diff --git a/numba_dpex/examples/kernel/vector_sum.py b/numba_dpex/examples/kernel/vector_sum.py index cb1b9fa2bb..40ccc268ba 100644 --- a/numba_dpex/examples/kernel/vector_sum.py +++ b/numba_dpex/examples/kernel/vector_sum.py @@ -6,6 +6,7 @@ import numpy.testing as testing import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # Data parallel kernel implementing vector sum @@ -18,7 +19,7 @@ def kernel_vector_sum(a, b, c): # Utility function for printing and testing def driver(a, b, c, global_size): - kernel_vector_sum[global_size](a, b, c) + kernel_vector_sum[Range(global_size)](a, b, c) a_np = dpnp.asnumpy(a) # Copy dpnp array a to NumPy array a_np b_np = dpnp.asnumpy(b) # Copy dpnp array b to NumPy array b_np diff --git a/numba_dpex/examples/kernel/vector_sum2D.py b/numba_dpex/examples/kernel/vector_sum2D.py index 4e38f0bc4b..5547698df8 100644 --- a/numba_dpex/examples/kernel/vector_sum2D.py +++ b/numba_dpex/examples/kernel/vector_sum2D.py @@ -23,19 +23,14 @@ def data_parallel_sum(a, b, c): def driver(a, b, c, global_size): - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) - - -def driver_with_range(a, b, c, global_size): - ranges = Range(*global_size) - data_parallel_sum[ranges](a, b, c) + data_parallel_sum[global_size](a, b, c) def main(): # Array dimensions X = 8 Y = 8 - global_size = X, Y + global_size = Range(X, Y) a = np.arange(X * Y, dtype=np.float32).reshape(X, Y) b = np.arange(X * Y, dtype=np.float32).reshape(X, Y) @@ -59,11 +54,6 @@ def main(): c_out = dpt.asnumpy(c_dpt) assert np.allclose(c, c_out) - print("Running kernel with the new lanuch parameter syntax ...") - driver_with_range(a_dpt, b_dpt, c_dpt, global_size) - c_out = dpt.asnumpy(c_dpt) - assert np.allclose(c, c_out) - print("Done...")