From 88f5cfb5dfa74f32d634389196fd759640c400eb Mon Sep 17 00:00:00 2001
From: Diptorup Deb <diptorup.deb@intel.com>
Date: Tue, 6 Jun 2023 11:32:31 -0500
Subject: [PATCH 1/3] Add fveclib=none to compile with latest icx.

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index b81806266e..e2422f6094 100644
--- a/setup.py
+++ b/setup.py
@@ -99,6 +99,7 @@ def spirv_compile():
     clang_args = [
         compiler,
         "-flto",
+        "-fveclib=none",
         "-target",
         "spir64-unknown-unknown",
         "-c",

From 4b9aacfcbc90a1d36c37f2a721663b294b3b0d7b Mon Sep 17 00:00:00 2001
From: Diptorup Deb <diptorup.deb@intel.com>
Date: Tue, 6 Jun 2023 13:15:16 -0500
Subject: [PATCH 2/3] Remove filter string parameterization from tests.

---
 .../DpctlSyclQueue/test_queue_ref_attr.py     |  2 +-
 .../USMNdArray/test_array_creation_errors.py  |  1 -
 .../parfors/test_dpnp_bitwise_ops.py          |  7 +--
 .../parfors/test_dpnp_logic_ops.py            |  7 +--
 .../test_dpnp_transcedental_functions.py      | 11 ++--
 .../test_dpnp_trigonometric_functions.py      | 18 ++-----
 .../tests/kernel_tests/test_atomic_op.py      | 21 ++++----
 numba_dpex/tests/kernel_tests/test_barrier.py | 10 ++--
 numba_dpex/tests/kernel_tests/test_caching.py | 14 ++---
 numba_dpex/tests/test_array_utils.py          | 54 +++++++++----------
 numba_dpex/tests/test_vectorize.py            | 19 +++----
 11 files changed, 61 insertions(+), 103 deletions(-)

diff --git a/numba_dpex/tests/core/types/DpctlSyclQueue/test_queue_ref_attr.py b/numba_dpex/tests/core/types/DpctlSyclQueue/test_queue_ref_attr.py
index 08a1315b76..57cd6329b6 100644
--- a/numba_dpex/tests/core/types/DpctlSyclQueue/test_queue_ref_attr.py
+++ b/numba_dpex/tests/core/types/DpctlSyclQueue/test_queue_ref_attr.py
@@ -72,7 +72,7 @@ def test_queue_equality(queue1, queue2):
     cq1 = dpctl._sycl_queue_manager.get_device_cached_queue(d)
     cq2 = dpctl._sycl_queue_manager.get_device_cached_queue(d)
 
-    expected = cq1 == cq2
     actual = test_queue_equality(cq1, cq2)
+    expected = cq1 == cq2
 
     assert expected == actual
diff --git a/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py b/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
index 6d2293e81c..2236860e17 100644
--- a/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
+++ b/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
@@ -1,5 +1,4 @@
 import dpctl
-from numba.core.types.scalars import Float
 
 from numba_dpex.core.types import USMNdArray
 
diff --git a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_bitwise_ops.py b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_bitwise_ops.py
index e5ecf7cbbf..6740a5da00 100644
--- a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_bitwise_ops.py
+++ b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_bitwise_ops.py
@@ -9,7 +9,6 @@
 import pytest
 
 from numba_dpex import dpjit
-from numba_dpex.tests._helper import filter_strings
 
 list_of_binary_ops = [
     "bitwise_and",
@@ -51,8 +50,7 @@ def input_arrays(request):
     return a, b
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_binary_ops(filter_str, binary_op, input_arrays):
+def test_binary_ops(binary_op, input_arrays):
     a, b = input_arrays
     binop = getattr(dpnp, binary_op)
     actual = dpnp.empty(shape=a.shape, dtype=a.dtype)
@@ -73,8 +71,7 @@ def f(a, b):
     )
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_unary_ops(filter_str, unary_op, input_arrays):
+def test_unary_ops(unary_op, input_arrays):
     a = input_arrays[0]
     uop = getattr(dpnp, unary_op)
     actual = np.empty(shape=a.shape, dtype=a.dtype)
diff --git a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_logic_ops.py b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_logic_ops.py
index df899124a1..1d617553d1 100644
--- a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_logic_ops.py
+++ b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_logic_ops.py
@@ -9,7 +9,6 @@
 import pytest
 
 from numba_dpex import dpjit
-from numba_dpex.tests._helper import filter_strings
 
 """ Following cases, dpnp raises NotImplementedError"""
 
@@ -61,8 +60,7 @@ def input_arrays(request):
 
 
 @pytest.mark.xfail
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_binary_ops(filter_str, binary_op, input_arrays):
+def test_binary_ops(binary_op, input_arrays):
     a, b = input_arrays
     binop = getattr(dpnp, binary_op)
     actual = dpnp.empty(shape=a.shape, dtype=a.dtype)
@@ -84,8 +82,7 @@ def f(a, b):
 
 
 @pytest.mark.xfail
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_unary_ops(filter_str, unary_op, input_arrays):
+def test_unary_ops(unary_op, input_arrays):
     a = input_arrays[0]
     uop = getattr(dpnp, unary_op)
     actual = dpnp.empty(shape=a.shape, dtype=a.dtype)
diff --git a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_transcedental_functions.py b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_transcedental_functions.py
index a2a2d65f46..992939fdb8 100644
--- a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_transcedental_functions.py
+++ b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_transcedental_functions.py
@@ -9,7 +9,7 @@
 import pytest
 
 from numba_dpex import dpjit
-from numba_dpex.tests._helper import filter_strings, is_gen12
+from numba_dpex.tests._helper import is_gen12
 
 """dpnp raise error on : mod, abs and remainder(float32)"""
 list_of_binary_ops = [
@@ -78,8 +78,7 @@ def input_arrays(request):
     return a, b
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_binary_ops(filter_str, binary_op, input_arrays):
+def test_binary_ops(binary_op, input_arrays):
     a, b = input_arrays
     binop = getattr(dpnp, binary_op)
     actual = dpnp.empty(shape=a.shape, dtype=a.dtype)
@@ -101,10 +100,10 @@ def f(a, b):
     )
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_unary_ops(filter_str, unary_op, input_arrays):
+def test_unary_ops(unary_op, input_arrays):
     skip_ops = ["abs", "sign", "log", "log2", "log10", "expm1"]
-    if unary_op in skip_ops and is_gen12(filter_str):
+    device = dpctl.SyclDevice()
+    if unary_op in skip_ops and is_gen12(device.filter_string):
         pytest.skip()
 
     a = input_arrays[0]
diff --git a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_trigonometric_functions.py b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_trigonometric_functions.py
index 6c4e877f54..850b646293 100644
--- a/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_trigonometric_functions.py
+++ b/numba_dpex/tests/dpjit_tests/parfors/test_dpnp_trigonometric_functions.py
@@ -9,19 +9,7 @@
 import pytest
 
 from numba_dpex import dpjit
-from numba_dpex.tests._helper import filter_strings, is_gen12
-
-list_of_filter_strs = [
-    "opencl:gpu:0",
-    "level_zero:gpu:0",
-    "opencl:cpu:0",
-]
-
-
-@pytest.fixture(params=list_of_filter_strs)
-def filter_str(request):
-    return request.param
-
+from numba_dpex.tests._helper import is_gen12
 
 list_of_trig_ops = [
     "sin",
@@ -70,8 +58,8 @@ def input_arrays(request):
     return a, b
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_trigonometric_fn(filter_str, trig_op, input_arrays):
+def test_trigonometric_fn(trig_op, input_arrays):
+    filter_str = dpctl.SyclDevice().filter_string
     # FIXME: Why does archcosh fail on Gen12 discrete graphics card?
     if trig_op == "arccosh" and is_gen12(filter_str):
         pytest.skip()
diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py
index d4aacb9ce1..3f18b20e57 100644
--- a/numba_dpex/tests/kernel_tests/test_atomic_op.py
+++ b/numba_dpex/tests/kernel_tests/test_atomic_op.py
@@ -9,7 +9,7 @@
 import numba_dpex as dpex
 from numba_dpex import config
 from numba_dpex.core.descriptor import dpex_kernel_target
-from numba_dpex.tests._helper import filter_strings, override_config
+from numba_dpex.tests._helper import override_config
 
 global_size = 100
 N = global_size
@@ -38,8 +38,8 @@ def fdtype(request):
 
 @pytest.fixture(params=list_of_i_dtypes + list_of_f_dtypes)
 def input_arrays(request):
-    def _inpute_arrays(filter_str):
-        a = np.array([0], request.param, device=filter_str)
+    def _inpute_arrays():
+        a = np.array([0], request.param)
         return a, request.param
 
     return _inpute_arrays
@@ -72,10 +72,9 @@ def f(a):
 )
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
 @skip_no_atomic_support
-def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):
-    a, dtype = input_arrays(filter_str)
+def test_kernel_atomic_simple(input_arrays, kernel_result_pair):
+    a, dtype = input_arrays()
     kernel, expected = kernel_result_pair
     kernel[dpex.Range(global_size)](a)
     assert a[0] == expected
@@ -112,10 +111,9 @@ def f(a):
     return f
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
 @skip_no_atomic_support
-def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
-    a, dtype = input_arrays(filter_str)
+def test_kernel_atomic_local(input_arrays, return_list_of_op):
+    a, dtype = input_arrays()
     op_type, expected = return_list_of_op
     f = get_func_local(op_type, dtype)
     kernel = dpex.kernel(f)
@@ -150,15 +148,14 @@ def f(a):
     return dpex.kernel(f)
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
 @skip_no_atomic_support
 def test_kernel_atomic_multi_dim(
-    filter_str, return_list_of_op, return_list_of_dim, return_dtype
+    return_list_of_op, return_list_of_dim, return_dtype
 ):
     op_type, expected = return_list_of_op
     dim = return_list_of_dim
     kernel = get_kernel_multi_dim(op_type, len(dim))
-    a = np.zeros(dim, dtype=return_dtype, device=filter_str)
+    a = np.zeros(dim, dtype=return_dtype)
     kernel[dpex.Range(global_size)](a)
     assert a[0] == expected
 
diff --git a/numba_dpex/tests/kernel_tests/test_barrier.py b/numba_dpex/tests/kernel_tests/test_barrier.py
index c9cf85536f..92b8c604ed 100644
--- a/numba_dpex/tests/kernel_tests/test_barrier.py
+++ b/numba_dpex/tests/kernel_tests/test_barrier.py
@@ -9,13 +9,11 @@
 
 import numba_dpex as dpex
 from numba_dpex import float32, usm_ndarray, void
-from numba_dpex.tests._helper import filter_strings
 
 f32arrty = usm_ndarray(ndim=1, dtype=float32, layout="C")
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_proper_lowering(filter_str):
+def test_proper_lowering():
     # This will trigger eager compilation
     @dpex.kernel(void(f32arrty))
     def twice(A):
@@ -35,8 +33,7 @@ def twice(A):
     np.testing.assert_allclose(orig * 2, after)
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_no_arg_barrier_support(filter_str):
+def test_no_arg_barrier_support():
     @dpex.kernel(void(f32arrty))
     def twice(A):
         i = dpex.get_global_id(0)
@@ -54,8 +51,7 @@ def twice(A):
     np.testing.assert_allclose(orig * 2, after)
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_local_memory(filter_str):
+def test_local_memory():
     blocksize = 10
 
     @dpex.kernel(void(f32arrty))
diff --git a/numba_dpex/tests/kernel_tests/test_caching.py b/numba_dpex/tests/kernel_tests/test_caching.py
index a298b1cdbd..4e1c6e31a4 100644
--- a/numba_dpex/tests/kernel_tests/test_caching.py
+++ b/numba_dpex/tests/kernel_tests/test_caching.py
@@ -11,7 +11,6 @@
 import numba_dpex as dpex
 from numba_dpex.core.caching import LRUCache
 from numba_dpex.core.kernel_interface.dispatcher import JitKernel
-from numba_dpex.tests._helper import filter_strings
 
 
 def test_LRUcache_operations():
@@ -106,17 +105,14 @@ def test_LRUcache_operations():
     assert str(cache.evicted) == "{5: 'f', 7: 'h', 8: 'i', 9: 'j', 2: 'c'}"
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_caching_hit_counts(filter_str):
+def test_caching_hit_counts():
     """Tests the correct number of cache hits.
+
     If a Dispatcher is invoked 10 times and if the caching is enabled,
     then the total number of cache hits will be 9. Given the fact that
     the first time the kernel will be compiled and it will be loaded
     off the cache for the next time on.
 
-    Args:
-        filter_str (str): The device name coming from filter_strings in
-        ._helper.py
     """
 
     def data_parallel_sum(x, y, z):
@@ -126,9 +122,9 @@ def data_parallel_sum(x, y, z):
         i = dpex.get_global_id(0)
         z[i] = x[i] + y[i]
 
-    a = dpt.arange(0, 100, device=filter_str)
-    b = dpt.arange(0, 100, device=filter_str)
-    c = dpt.zeros_like(a, device=filter_str)
+    a = dpt.arange(0, 100)
+    b = dpt.arange(0, 100)
+    c = dpt.zeros_like(a)
 
     expected = dpt.asnumpy(a) + dpt.asnumpy(b)
 
diff --git a/numba_dpex/tests/test_array_utils.py b/numba_dpex/tests/test_array_utils.py
index 5c7b9f1852..bf8590a585 100644
--- a/numba_dpex/tests/test_array_utils.py
+++ b/numba_dpex/tests/test_array_utils.py
@@ -8,9 +8,7 @@
 import dpctl.memory as dpctl_mem
 import dpctl.tensor as dpt
 import numpy as np
-import pytest
 
-from numba_dpex.tests._helper import filter_strings
 from numba_dpex.utils import (
     as_usm_obj,
     copy_to_numpy_from_usm_obj,
@@ -20,38 +18,34 @@
 from . import _helper
 
 
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_has_usm_memory(filter_str):
+def test_has_usm_memory():
     a = np.ones(1023, dtype=np.float32)
+    q = dpctl.SyclQueue()
+    # test usm_ndarray
+    da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared")
+    usm_mem = has_usm_memory(da)
+    assert da.usm_data._pointer == usm_mem._pointer
 
-    with dpctl.device_context(filter_str) as q:
-        # test usm_ndarray
-        da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared")
-        usm_mem = has_usm_memory(da)
-        assert da.usm_data._pointer == usm_mem._pointer
+    # test usm allocated numpy.ndarray
+    buf = dpctl_mem.MemoryUSMShared(a.size * a.dtype.itemsize, queue=q)
+    ary_buf = np.ndarray(a.shape, buffer=buf, dtype=a.dtype)
+    usm_mem = has_usm_memory(ary_buf)
+    assert buf._pointer == usm_mem._pointer
 
-        # test usm allocated numpy.ndarray
-        buf = dpctl_mem.MemoryUSMShared(a.size * a.dtype.itemsize, queue=q)
-        ary_buf = np.ndarray(a.shape, buffer=buf, dtype=a.dtype)
-        usm_mem = has_usm_memory(ary_buf)
-        assert buf._pointer == usm_mem._pointer
+    usm_mem = has_usm_memory(a)
+    assert usm_mem is None
 
-        usm_mem = has_usm_memory(a)
-        assert usm_mem is None
 
-
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_as_usm_obj(filter_str):
+def test_as_usm_obj():
     a = np.ones(1023, dtype=np.float32)
     b = a * 3
-
-    with dpctl.device_context(filter_str) as queue:
-        a_copy = np.empty_like(a)
-        usm_mem = as_usm_obj(a, queue=queue)
-        copy_to_numpy_from_usm_obj(usm_mem, a_copy)
-        assert np.all(a == a_copy)
-
-        b_copy = np.empty_like(b)
-        usm_mem = as_usm_obj(b, queue=queue, copy=False)
-        copy_to_numpy_from_usm_obj(usm_mem, b_copy)
-        assert np.any(np.not_equal(b, b_copy))
+    queue = dpctl.SyclQueue()
+    a_copy = np.empty_like(a)
+    usm_mem = as_usm_obj(a, queue=queue)
+    copy_to_numpy_from_usm_obj(usm_mem, a_copy)
+    assert np.all(a == a_copy)
+
+    b_copy = np.empty_like(b)
+    usm_mem = as_usm_obj(b, queue=queue, copy=False)
+    copy_to_numpy_from_usm_obj(usm_mem, b_copy)
+    assert np.any(np.not_equal(b, b_copy))
diff --git a/numba_dpex/tests/test_vectorize.py b/numba_dpex/tests/test_vectorize.py
index 49cb358c53..f33593a919 100644
--- a/numba_dpex/tests/test_vectorize.py
+++ b/numba_dpex/tests/test_vectorize.py
@@ -4,12 +4,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import dpctl
 import numpy as np
 import pytest
-from numba import float32, float64, int32, int64, njit, vectorize
-
-from numba_dpex.tests._helper import filter_strings
+from numba import float32, float64, int32, int64, vectorize
 
 list_of_shape = [
     (100, 100),
@@ -45,8 +42,7 @@ def input_type(request):
 
 
 @pytest.mark.xfail
-@pytest.mark.parametrize("filter_str", filter_strings)
-def test_vectorize(filter_str, shape, dtypes, input_type):
+def test_vectorize(shape, dtypes, input_type):
     def vector_add(a, b):
         return a + b
 
@@ -61,10 +57,9 @@ def vector_add(a, b):
         A = dtype(1.2)
         B = dtype(2.3)
 
-    with dpctl.device_context(filter_str):
-        f = vectorize(sig, target="dpex")(vector_add)
-        expected = f(A, B)
-        actual = vector_add(A, B)
+    f = vectorize(sig, target="dpex")(vector_add)
+    expected = f(A, B)
+    actual = vector_add(A, B)
 
-        max_abs_err = np.sum(expected) - np.sum(actual)
-        assert max_abs_err < 1e-5
+    max_abs_err = np.sum(expected) - np.sum(actual)
+    assert max_abs_err < 1e-5

From bbd61320fd73d6e39ae16787554d9ca5e27c9cad Mon Sep 17 00:00:00 2001
From: Diptorup Deb <diptorup.deb@intel.com>
Date: Thu, 8 Jun 2023 20:13:11 -0500
Subject: [PATCH 3/3] Changed the DpctlSyclQueue and USMNdArray types.

   - Storing the Python dpctl.SyclQueue inside any instance of
     the DpctlSyclQueue type was causing segfaults due to
     the Python object getting garbage collected prematurely.

     The changes in the PR update the DpctlSyclQueue type to
     only store the filter string associated with the
     dpctl.SyclQueue and not the actual Python object. In addition,
     the USMNdArray type now stores an instance of a
     DpctlSyclQueue in its queue parameter instead of a Python
     dpctl.SyclQueue object.

     Due to these changes, all places where the Python
     dpctl.SyclQueue was getting extracted and used from a
     UsmNdArray instance or DpctlSyclQueue instance have been
     updated.

     All test cases were also updated.
---
 .../core/kernel_interface/dispatcher.py       |   7 +-
 numba_dpex/core/parfors/kernel_builder.py     |   6 +-
 .../core/parfors/reduction_kernel_builder.py  |  22 +-
 numba_dpex/core/runtime/_dpexrt_python.c      |   2 +-
 numba_dpex/core/typeconv/array_conversion.py  |   5 +-
 numba_dpex/core/types/dpctl_types.py          |  31 ++-
 numba_dpex/core/types/usm_ndarray_type.py     |  46 ++--
 numba_dpex/core/typing/typeof.py              |   7 +-
 numba_dpex/dpnp_iface/_intrinsic.py           | 258 ++++++++++++++----
 numba_dpex/dpnp_iface/arrayobj.py             | 154 +++--------
 .../USMNdArray/test_array_creation_errors.py  | 100 +++----
 .../USMNdArray/test_usm_ndarray_creation.py   |  16 +-
 .../tests/dpjit_tests/dpnp/test_dpnp_empty.py |   5 +-
 .../dpjit_tests/dpnp/test_dpnp_empty_like.py  |   8 +-
 .../tests/dpjit_tests/test_dpjit_reduction.py |   1 -
 .../kernel_tests/test_scalar_arg_types.py     |   3 +-
 16 files changed, 368 insertions(+), 303 deletions(-)

diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py
index 627e082fc1..847e1424cf 100644
--- a/numba_dpex/core/kernel_interface/dispatcher.py
+++ b/numba_dpex/core/kernel_interface/dispatcher.py
@@ -409,9 +409,14 @@ def __call__(self, *args):
         # FIXME: For specialized and ahead of time compiled and cached kernels,
         # the CFD check was already done statically. The run-time check is
         # redundant. We should avoid these checks for the specialized case.
-        exec_queue = determine_kernel_launch_queue(
+        ty_queue = determine_kernel_launch_queue(
             args, argtypes, self.kernel_name
         )
+
+        # FIXME: We need a better way than having to create a queue every time.
+        device = ty_queue.sycl_device
+        exec_queue = dpctl.get_device_cached_queue(device)
+
         backend = exec_queue.backend
 
         if exec_queue.backend not in [
diff --git a/numba_dpex/core/parfors/kernel_builder.py b/numba_dpex/core/parfors/kernel_builder.py
index 01c2bd4c0a..7200a6e62a 100644
--- a/numba_dpex/core/parfors/kernel_builder.py
+++ b/numba_dpex/core/parfors/kernel_builder.py
@@ -6,6 +6,7 @@
 import sys
 import warnings
 
+import dpctl
 import dpctl.program as dpctl_prog
 from numba.core import ir, types
 from numba.core.errors import NumbaParallelSafetyWarning
@@ -426,7 +427,10 @@ def create_kernel_for_parfor(
     for arg in parfor_args:
         obj = typemap[arg]
         if isinstance(obj, DpnpNdArray):
-            exec_queue = obj.queue
+            filter_string = obj.queue.sycl_device
+            # FIXME: A better design is required so that we do not have to
+            # create a queue every time.
+            exec_queue = dpctl.get_device_cached_queue(filter_string)
 
     if not exec_queue:
         raise AssertionError(
diff --git a/numba_dpex/core/parfors/reduction_kernel_builder.py b/numba_dpex/core/parfors/reduction_kernel_builder.py
index 3e2d7d0f51..15ad116288 100644
--- a/numba_dpex/core/parfors/reduction_kernel_builder.py
+++ b/numba_dpex/core/parfors/reduction_kernel_builder.py
@@ -4,6 +4,7 @@
 
 import warnings
 
+import dpctl
 from numba.core import types
 from numba.core.errors import NumbaParallelSafetyWarning
 from numba.core.ir_utils import (
@@ -18,6 +19,8 @@
 )
 from numba.core.typing import signature
 
+from numba_dpex.core.types import DpctlSyclQueue
+
 from ..utils.kernel_templates.reduction_template import (
     RemainderReduceIntermediateKernelTemplate,
     TreeReduceIntermediateKernelTemplate,
@@ -134,7 +137,13 @@ def create_reduction_main_kernel_for_parfor(
         flags.noalias = True
 
     kernel_sig = signature(types.none, *kernel_param_types)
-    exec_queue = typemap[reductionKernelVar.parfor_params[0]].queue
+
+    # FIXME: A better design is required so that we do not have to create a
+    # queue every time.
+    ty_queue: DpctlSyclQueue = typemap[
+        reductionKernelVar.parfor_params[0]
+    ].queue
+    exec_queue = dpctl.get_device_cached_queue(ty_queue.sycl_device)
 
     sycl_kernel = _compile_kernel_parfor(
         exec_queue,
@@ -331,11 +340,12 @@ def create_reduction_remainder_kernel_for_parfor(
 
     kernel_sig = signature(types.none, *kernel_param_types)
 
-    # FIXME: Enable check after CFD pass has been added
-    # exec_queue = determine_kernel_launch_queue(
-    #     args=parfor_args, argtypes=kernel_param_types, kernel_name=kernel_name
-    # )
-    exec_queue = typemap[reductionKernelVar.parfor_params[0]].queue
+    # FIXME: A better design is required so that we do not have to create a
+    # queue every time.
+    ty_queue: DpctlSyclQueue = typemap[
+        reductionKernelVar.parfor_params[0]
+    ].queue
+    exec_queue = dpctl.get_device_cached_queue(ty_queue.sycl_device)
 
     sycl_kernel = _compile_kernel_parfor(
         exec_queue,
diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c
index 22cddef3e0..981f1a617d 100644
--- a/numba_dpex/core/runtime/_dpexrt_python.c
+++ b/numba_dpex/core/runtime/_dpexrt_python.c
@@ -1180,7 +1180,7 @@ static int DPEXRT_sycl_queue_from_python(PyObject *obj,
     PyGILState_STATE gstate;
 
     // Increment the ref count on obj to prevent CPython from garbage
-    // collecting the array.
+    // collecting the dpctl.SyclQueue object
     Py_IncRef(obj);
 
     // We are unconditionally casting obj to a struct PySyclQueueObject*. If
diff --git a/numba_dpex/core/typeconv/array_conversion.py b/numba_dpex/core/typeconv/array_conversion.py
index 228265b6ec..5b7472c59b 100644
--- a/numba_dpex/core/typeconv/array_conversion.py
+++ b/numba_dpex/core/typeconv/array_conversion.py
@@ -4,8 +4,7 @@
 
 from numba.np import numpy_support
 
-from numba_dpex.core.types import USMNdArray
-from numba_dpex.core.utils import get_info_from_suai
+from numba_dpex.core.types import DpctlSyclQueue, USMNdArray
 from numba_dpex.utils.constants import address_space
 
 
@@ -37,7 +36,7 @@ def to_usm_ndarray(suai_attrs, addrspace=address_space.GLOBAL):
         ndim=suai_attrs.dimensions,
         layout=layout,
         usm_type=suai_attrs.usm_type,
-        queue=suai_attrs.queue,
+        queue=DpctlSyclQueue(suai_attrs.queue),
         readonly=not suai_attrs.is_writable,
         name=None,
         aligned=True,
diff --git a/numba_dpex/core/types/dpctl_types.py b/numba_dpex/core/types/dpctl_types.py
index 2dff5b1406..f7d03ba04a 100644
--- a/numba_dpex/core/types/dpctl_types.py
+++ b/numba_dpex/core/types/dpctl_types.py
@@ -14,20 +14,18 @@
 
 
 class DpctlSyclQueue(types.Type):
-    """A Numba type to represent a dpctl.SyclQueue PyObject.
-
-    For now, a dpctl.SyclQueue is represented as a Numba opaque type that allows
-    passing in and using a SyclQueue object as an opaque pointer type inside
-    Numba.
-    """
+    """A Numba type to represent a dpctl.SyclQueue PyObject."""
 
     def __init__(self, sycl_queue):
         if not isinstance(sycl_queue, SyclQueue):
             raise TypeError("The argument sycl_queue is not of type SyclQueue.")
 
-        self._sycl_queue = sycl_queue
+        # XXX: Storing the device filter string is a temporary workaround till
+        # the compute follows data inference pass is fixed to use SyclQueue
+        self._device = sycl_queue.sycl_device.filter_string
+
         try:
-            self._unique_id = hash(self._sycl_queue)
+            self._unique_id = hash(sycl_queue)
         except Exception:
             self._unique_id = self.rand_digit_str(16)
         super(DpctlSyclQueue, self).__init__(name="DpctlSyclQueue")
@@ -38,8 +36,14 @@ def rand_digit_str(self, n):
         )
 
     @property
-    def sycl_queue(self):
-        return self._sycl_queue
+    def sycl_device(self):
+        """Returns the SYCL oneAPI extension filter string associated with the
+        queue.
+
+        Returns:
+            str: A SYCL oneAPI extension filter string
+        """
+        return self._device
 
     @property
     def key(self):
@@ -69,11 +73,8 @@ def unbox_sycl_queue(typ, obj, c):
     qptr = qstruct._getpointer()
     ptr = c.builder.bitcast(qptr, c.pyapi.voidptr)
 
-    if c.context.enable_nrt:
-        dpexrtCtx = dpexrt.DpexRTContext(c.context)
-        errcode = dpexrtCtx.queuestruct_from_python(c.pyapi, obj, ptr)
-    else:
-        raise UnreachableError
+    dpexrtCtx = dpexrt.DpexRTContext(c.context)
+    errcode = dpexrtCtx.queuestruct_from_python(c.pyapi, obj, ptr)
     is_error = cgutils.is_not_null(c.builder, errcode)
 
     # Handle error
diff --git a/numba_dpex/core/types/usm_ndarray_type.py b/numba_dpex/core/types/usm_ndarray_type.py
index 1ef0216dca..f6eb08564f 100644
--- a/numba_dpex/core/types/usm_ndarray_type.py
+++ b/numba_dpex/core/types/usm_ndarray_type.py
@@ -12,6 +12,7 @@
 from numba.core.types.npytypes import Array
 from numba.np.numpy_support import from_dtype
 
+from numba_dpex.core.types.dpctl_types import DpctlSyclQueue
 from numba_dpex.utils import address_space
 
 
@@ -31,22 +32,28 @@ def __init__(
         aligned=True,
         addrspace=address_space.GLOBAL,
     ):
-        if queue and not isinstance(queue, types.misc.Omitted) and device:
+        if (
+            queue is not None
+            and not (
+                isinstance(queue, types.misc.Omitted)
+                or isinstance(queue, types.misc.NoneType)
+            )
+            and device is not None
+        ):
             raise TypeError(
                 "numba_dpex.core.types.usm_ndarray_type.USMNdArray.__init__(): "
-                "`device` and `sycl_queue` are exclusive keywords, i.e. use one or other."
+                "`device` and `sycl_queue` are exclusive keywords, "
+                "i.e. use one or other."
             )
 
-        self.usm_type = usm_type
-        self.addrspace = addrspace
-
-        if queue and not isinstance(queue, types.misc.Omitted):
-            if not isinstance(queue, dpctl.SyclQueue):
+        if queue is not None and not (
+            isinstance(queue, types.misc.Omitted)
+            or isinstance(queue, types.misc.NoneType)
+        ):
+            if not isinstance(queue, DpctlSyclQueue):
                 raise TypeError(
-                    "numba_dpex.core.types.usm_ndarray_type.USMNdArray.__init__(): "
-                    "The queue keyword arg should be a dpctl.SyclQueue object or None."
-                    "Found type(queue) ="
-                    + str(type(queue) + " and queue =" + queue)
+                    "The queue keyword arg should be either DpctlSyclQueue or "
+                    "NoneType. Found type(queue) = " + str(type(queue))
                 )
             self.queue = queue
         else:
@@ -55,24 +62,23 @@ def __init__(
             else:
                 if not isinstance(device, str):
                     raise TypeError(
-                        "numba_dpex.core.types.usm_ndarray_type.USMNdArray.__init__(): "
-                        "The device keyword arg should be a str object specifying "
-                        "a SYCL filter selector."
+                        "The device keyword arg should be a str object "
+                        "specifying a SYCL filter selector."
                     )
                 sycl_device = dpctl.SyclDevice(device)
 
-            self.queue = dpctl._sycl_queue_manager.get_device_cached_queue(
+            sycl_queue = dpctl._sycl_queue_manager.get_device_cached_queue(
                 sycl_device
             )
+            self.queue = DpctlSyclQueue(sycl_queue=sycl_queue)
 
-        self.device = self.queue.sycl_device.filter_string
+        self.device = self.queue.sycl_device
+        self.usm_type = usm_type
+        self.addrspace = addrspace
 
         if not dtype:
             dummy_tensor = dpctl.tensor.empty(
-                1,
-                order=layout,
-                usm_type=usm_type,
-                sycl_queue=self.queue,
+                1, order=layout, usm_type=usm_type, device=self.device
             )
             # convert dpnp type to numba/numpy type
             _dtype = dummy_tensor.dtype
diff --git a/numba_dpex/core/typing/typeof.py b/numba_dpex/core/typing/typeof.py
index 99ff02117c..f4cd6da6e3 100644
--- a/numba_dpex/core/typing/typeof.py
+++ b/numba_dpex/core/typing/typeof.py
@@ -42,7 +42,10 @@ def _typeof_helper(val, array_class_type):
             "The usm_type for the usm_ndarray could not be inferred"
         )
 
-    assert val.sycl_queue is not None
+    if not val.sycl_queue:
+        raise AssertionError
+
+    ty_queue = DpctlSyclQueue(sycl_queue=val.sycl_queue)
 
     return array_class_type(
         dtype=dtype,
@@ -50,7 +53,7 @@ def _typeof_helper(val, array_class_type):
         layout=layout,
         readonly=readonly,
         usm_type=usm_type,
-        queue=val.sycl_queue,
+        queue=ty_queue,
         addrspace=address_space.GLOBAL,
     )
 
diff --git a/numba_dpex/dpnp_iface/_intrinsic.py b/numba_dpex/dpnp_iface/_intrinsic.py
index d998dd1585..f806f16ce8 100644
--- a/numba_dpex/dpnp_iface/_intrinsic.py
+++ b/numba_dpex/dpnp_iface/_intrinsic.py
@@ -4,6 +4,7 @@
 
 from collections import namedtuple
 
+from dpctl import get_device_cached_queue
 from llvmlite import ir as llvmir
 from llvmlite.ir import Constant
 from llvmlite.ir.types import DoubleType, FloatType
@@ -24,9 +25,13 @@
 from numba_dpex.core.types import DpnpNdArray
 from numba_dpex.core.types.dpctl_types import DpctlSyclQueue
 
+_QueueRefPayload = namedtuple(
+    "QueueRefPayload", ["queue_ref", "py_dpctl_sycl_queue_addr", "pyapi"]
+)
+
 
 # XXX: The function should be moved into DpexTargetContext
-def make_queue(context, builder, arrtype):
+def make_queue(context, builder, py_dpctl_sycl_queue):
     """Utility function used for allocating a new queue.
 
     This function will allocates a new queue (e.g. SYCL queue)
@@ -40,12 +45,7 @@ def make_queue(context, builder, arrtype):
             (e.g. `numba.core.cpu.CPUContext`).
         builder (llvmlite.ir.builder.IRBuilder): The IR builder
             from `llvmlite` for code generation.
-        arrtype (numba_dpex.core.types.dpnp_ndarray_type.DpnpNdArray):
-            Any of the array types derived from
-            `numba.core.types.nptypes.Array`,
-            e.g. `numba_dpex.core.types.dpnp_ndarray_type.DpnpNdArray`.
-            Refer to `numba_dpex.dpnp_iface._intrinsic.alloc_empty_arrayobj()`
-            function for details on how to construct this argument.
+        py_dpctl_sycl_queue (dpctl.SyclQueue): A Python dpctl.SyclQueue object.
 
     Returns:
         ret (namedtuple): A namedtuple containing
@@ -56,17 +56,17 @@ def make_queue(context, builder, arrtype):
 
     pyapi = context.get_python_api(builder)
     queue_struct_proxy = cgutils.create_struct_proxy(
-        DpctlSyclQueue(arrtype.queue)
+        DpctlSyclQueue(py_dpctl_sycl_queue)
     )(context, builder)
     queue_struct_ptr = queue_struct_proxy._getpointer()
     queue_struct_voidptr = builder.bitcast(queue_struct_ptr, cgutils.voidptr_t)
 
-    address = context.get_constant(types.intp, id(arrtype.queue))
-    queue_address_ptr = builder.inttoptr(address, cgutils.voidptr_t)
+    address = context.get_constant(types.intp, id(py_dpctl_sycl_queue))
+    py_dpctl_sycl_queue_addr = builder.inttoptr(address, cgutils.voidptr_t)
 
     dpexrtCtx = dpexrt.DpexRTContext(context)
     dpexrtCtx.queuestruct_from_python(
-        pyapi, queue_address_ptr, queue_struct_voidptr
+        pyapi, py_dpctl_sycl_queue_addr, queue_struct_voidptr
     )
 
     queue_struct = builder.load(queue_struct_ptr)
@@ -75,12 +75,79 @@ def make_queue(context, builder, arrtype):
     return_values = namedtuple(
         "return_values", "queue_ref queue_address_ptr pyapi"
     )
-    ret = return_values(queue_ref, queue_address_ptr, pyapi)
+    ret = return_values(queue_ref, py_dpctl_sycl_queue_addr, pyapi)
+
+    return ret
+
+
+def _get_queue_ref(context, builder, sig, args):
+    """Returns an LLVM IR Value pointer to a DpctlSyclQueueRef
+
+    The _get_queue_ref function is used by the intinsic functions that implement
+    the overloads for dpnp array constructors: ``empty``, ``empty_like``,
+    ``zeros``, ``zeros_like``, ``ones``, ``ones_like``, ``full``, ``full_like``.
+
+    The args contains the list of LLVM IR values passed in to the dpnp
+    overloads. The convention we follow is that the queue arg is always the
+    penultimate arg passed to the intrinsic. For that reason, we can extract the
+    queue argument as args[-2] and the type of the argument from the signature
+    as sig.args[-2].
+
+    Depending on whether the ``sycl_queue`` argument was explicitly specified,
+    or was omitted, the queue_arg will be either a DpctlSyclQueue type or a
+    numba NoneType/Omitted type. If a DpctlSyclQueue, then we directly extract
+    the queue_ref from the unboxed native struct representation of a
+    dpctl.SyclQueue. If a queue was not explicitly provided and the type is
+    NoneType/Omitted, we get a cached dpctl.SyclQueue from dpctl and unbox it
+    on the fly and return the queue_ref.
+
+    Args:
+        context (numba.core.base.BaseContext): Any of the context
+            derived from Numba's BaseContext
+            (e.g. `numba.core.cpu.CPUContext`).
+        builder (llvmlite.ir.builder.IRBuilder): The IR builder
+            from `llvmlite` for code generation.
+        sig: Signature of the overload function
+        args (list): LLVM IR values corresponding to the args passed to the LLVM
+            function created for a dpnp overload.
+
+    Return:
+        A namedtuple wrapping the queue_ref pointer, an optional address to
+        a dpctl.SyclQueue Python object, and an option instance of the python
+        api wrapper in the CPUContext.
 
+    """
+
+    queue_arg = args[-2]
+    queue_arg_ty = sig.args[-2]
+
+    queue_ref = None
+    py_dpctl_sycl_queue_addr = None
+    pyapi = None
+
+    if isinstance(queue_arg_ty, DpctlSyclQueue):
+        if not isinstance(queue_arg.type, llvmir.LiteralStructType):
+            raise AssertionError
+        queue_ref = builder.extract_value(queue_arg, 1)
+
+    elif isinstance(queue_arg_ty, types.misc.NoneType) or isinstance(
+        queue_arg_ty, types.misc.Omitted
+    ):
+        if not isinstance(queue_arg.type, llvmir.PointerType):
+            # TODO: check if the pointer is null
+            raise AssertionError
+
+        ty_sycl_queue = sig.return_type.queue
+        py_dpctl_sycl_queue = get_device_cached_queue(ty_sycl_queue.sycl_device)
+        (queue_ref, py_dpctl_sycl_queue_addr, pyapi) = make_queue(
+            context, builder, py_dpctl_sycl_queue
+        )
+
+    ret = _QueueRefPayload(queue_ref, py_dpctl_sycl_queue_addr, pyapi)
     return ret
 
 
-def _empty_nd_impl(context, builder, arrtype, shapes):
+def _empty_nd_impl(context, builder, arrtype, shapes, queue_ref):
     """Utility function used for allocating a new array.
 
     This function is used for allocating a new array during LLVM code
@@ -139,16 +206,13 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
             ),
         )
 
-    (queue_ref, queue_ptr, pyapi) = make_queue(context, builder, arrtype)
-
-    # The queue_ref returned by make_queue if used to allocate a MemInfo
-    # object needs to be copied first. The reason for the copy is to
-    # properly manage the lifetime of the queue_ref object. The original
-    # object is owned by the parent dpctl.SyclQueue object and is deleted
-    # when the dpctl.SyclQueue is garbage collected. Whereas, the copied
-    # queue_ref is to be owned by the NRT_External_Allocator object of
-    # MemInfo, and its lifetime is tied to the MemInfo object.
-
+    # The passed in queue_ref if used to allocate a MemInfo object needs to be
+    # copied first. The reason for the copy is to properly manage the lifetime
+    # of the queue_ref object. The original object is owned by the parent
+    # dpctl.SyclQueue object and is deleted when the dpctl.SyclQueue is garbage
+    # collected. Whereas, the copied queue_ref is to be owned by the
+    # NRT_External_Allocator object of MemInfo, and its lifetime is tied to the
+    # MemInfo object.
     dpexrtCtx = dpexrt.DpexRTContext(context)
     queue_ref_copy = dpexrtCtx.copy_queue(builder, queue_ref)
 
@@ -182,7 +246,6 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
     fnop.get_call_type(context.typing_context, sig.args, {})
     eqfn = context.get_function(fnop, sig)
     meminfo = eqfn(builder, args)
-    pyapi.decref(queue_ptr)
     data = context.nrt.meminfo_data(builder, meminfo)
 
     intp_t = context.get_value_type(types.intp)
@@ -198,10 +261,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
         meminfo=meminfo,
     )
 
-    return_values = namedtuple("return_values", "ary queue_ref")
-    ret = return_values(ary, queue_ref)
-
-    return ret
+    return ary
 
 
 @overload_classmethod(DpnpNdArray, "_usm_allocate")
@@ -240,7 +300,7 @@ def codegen(context, builder, signature, args):
     return sig, codegen
 
 
-def alloc_empty_arrayobj(context, builder, sig, args, is_like=False):
+def alloc_empty_arrayobj(context, builder, sig, queue_ref, args, is_like=False):
     """Construct an empty numba.np.arrayobj.make_array.<locals>.ArrayStruct
 
     Args:
@@ -250,26 +310,24 @@ def alloc_empty_arrayobj(context, builder, sig, args, is_like=False):
             llvmlite.
         sig (numba.core.typing.templates.Signature): A numba's function
             signature object.
+        queue_ref (llvmlite.ir.PointerType): Pointer to a DpctlSyclQueueRef
+            object cast to i8*
         args (tuple): A tuple of args to be parsed as the arguments of
             an np.empty(), np.zeros() or np.ones() call.
         is_like (bool, optional): Decides on how to parse the args.
             Defaults to False.
 
-    Returns:
-        tuple(numba.np.arrayobj.make_array.<locals>.ArrayStruct,
-            numba_dpex.core.types.dpnp_ndarray_type.DpnpNdArray):
-                A tuple of allocated array and constructed array type info
-                in DpnpNdArray.
+    Returns: The LLVM IR value that stores the empty array
     """
 
-    arrtype = (
+    arrtype, shape = (
         _parse_empty_like_args(context, builder, sig, args)
         if is_like
         else _parse_empty_args(context, builder, sig, args)
     )
-    ary, queue = _empty_nd_impl(context, builder, *arrtype)
+    ary = _empty_nd_impl(context, builder, arrtype, shape, queue_ref)
 
-    return ary, arrtype, queue
+    return ary
 
 
 def fill_arrayobj(context, builder, ary, arrtype, queue_ref, fill_value):
@@ -296,9 +354,7 @@ def fill_arrayobj(context, builder, ary, arrtype, queue_ref, fill_value):
                 in DpnpNdArray.
     """
 
-    itemsize = context.get_constant(
-        types.intp, get_itemsize(context, arrtype[0])
-    )
+    itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype))
 
     if isinstance(fill_value.type, DoubleType) or isinstance(
         fill_value.type, FloatType
@@ -307,7 +363,7 @@ def fill_arrayobj(context, builder, ary, arrtype, queue_ref, fill_value):
     else:
         value_is_float = context.get_constant(types.boolean, 0)
 
-    if isinstance(arrtype[0].dtype, types.scalars.Float):
+    if isinstance(arrtype.dtype, types.scalars.Float):
         dest_is_float = context.get_constant(types.boolean, 1)
     else:
         dest_is_float = context.get_constant(types.boolean, 0)
@@ -377,7 +433,17 @@ def impl_dpnp_empty(
     )
 
     def codegen(context, builder, sig, args):
-        ary, _, _ = alloc_empty_arrayobj(context, builder, sig, args)
+        qref_payload: _QueueRefPayload = _get_queue_ref(
+            context, builder, sig, args
+        )
+
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args
+        )
+
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -432,13 +498,24 @@ def impl_dpnp_zeros(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
+        qref_payload: _QueueRefPayload = _get_queue_ref(
             context, builder, sig, args
         )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args
+        )
         fill_value = context.get_constant(types.intp, 0)
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -493,13 +570,24 @@ def impl_dpnp_ones(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
+        qref_payload: _QueueRefPayload = _get_queue_ref(
             context, builder, sig, args
         )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args
+        )
         fill_value = context.get_constant(types.intp, 1)
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -561,13 +649,24 @@ def impl_dpnp_full(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
+        qref_payload: _QueueRefPayload = _get_queue_ref(
             context, builder, sig, args
         )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args
+        )
         fill_value = context.get_argument_value(builder, sig.args[1], args[1])
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return signature, codegen
@@ -629,9 +728,17 @@ def impl_dpnp_empty_like(
     )
 
     def codegen(context, builder, sig, args):
-        ary, _, _ = alloc_empty_arrayobj(
-            context, builder, sig, args, is_like=True
+        qref_payload: _QueueRefPayload = _get_queue_ref(
+            context, builder, sig, args
+        )
+
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args, is_like=True
         )
+
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -693,13 +800,24 @@ def impl_dpnp_zeros_like(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
-            context, builder, sig, args, is_like=True
+        qref_payload: _QueueRefPayload = _get_queue_ref(
+            context, builder, sig, args
+        )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args, is_like=True
         )
         fill_value = context.get_constant(types.intp, 0)
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -761,13 +879,24 @@ def impl_dpnp_ones_like(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
-            context, builder, sig, args, is_like=True
+        qref_payload: _QueueRefPayload = _get_queue_ref(
+            context, builder, sig, args
+        )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args, is_like=True
         )
         fill_value = context.get_constant(types.intp, 1)
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return sig, codegen
@@ -833,13 +962,24 @@ def impl_dpnp_full_like(
     )
 
     def codegen(context, builder, sig, args):
-        ary, arrtype, queue_ref = alloc_empty_arrayobj(
-            context, builder, sig, args, is_like=True
+        qref_payload: _QueueRefPayload = _get_queue_ref(
+            context, builder, sig, args
+        )
+        ary = alloc_empty_arrayobj(
+            context, builder, sig, qref_payload.queue_ref, args, is_like=True
         )
         fill_value = context.get_argument_value(builder, sig.args[1], args[1])
         ary, _ = fill_arrayobj(
-            context, builder, ary, arrtype, queue_ref, fill_value
+            context,
+            builder,
+            ary,
+            sig.return_type,
+            qref_payload.queue_ref,
+            fill_value,
         )
+        if qref_payload.py_dpctl_sycl_queue_addr:
+            qref_payload.pyapi.decref(qref_payload.py_dpctl_sycl_queue_addr)
+
         return ary._getvalue()
 
     return signature, codegen
diff --git a/numba_dpex/dpnp_iface/arrayobj.py b/numba_dpex/dpnp_iface/arrayobj.py
index bc9507cd82..da73ac8ac7 100644
--- a/numba_dpex/dpnp_iface/arrayobj.py
+++ b/numba_dpex/dpnp_iface/arrayobj.py
@@ -153,78 +153,6 @@ def _parse_device_filter_string(device):
         )
 
 
-def _parse_sycl_queue(sycl_queue):
-    return (
-        (
-            None
-            if isinstance(sycl_queue, types.misc.NoneType)
-            else sycl_queue.sycl_queue
-        )
-        if not isinstance(sycl_queue, types.misc.Omitted)
-        else sycl_queue
-    )
-
-
-def build_dpnp_ndarray(
-    ndim,
-    layout="C",
-    dtype=None,
-    usm_type="device",
-    device=None,
-    sycl_queue=None,
-):
-    """Constructs `DpnpNdArray` from the parameters provided.
-
-    Args:
-        ndim (int): The dimension of the array.
-        layout ("C", or F"): memory layout for the array. Default: "C".
-        dtype (numba.core.types.functions.NumberClass, optional):
-            Data type of the array. Can be typestring, a `numpy.dtype`
-            object, `numpy` char string, or a numpy scalar type.
-            Default: None.
-        usm_type (numba.core.types.misc.StringLiteral, optional):
-            The type of SYCL USM allocation for the output array.
-            Allowed values are "device"|"shared"|"host".
-            Default: `"device"`.
-        device (optional): array API concept of device where the
-            output array is created. `device` can be `None`, a oneAPI
-            filter selector string, an instance of :class:`dpctl.SyclDevice`
-            corresponding to a non-partitioned SYCL device, an instance of
-            :class:`dpctl.SyclQueue`, or a `Device` object returnedby
-            `dpctl.tensor.usm_array.device`. Default: `None`.
-        sycl_queue (:class:`numba_dpex.core.types.dpctl_types.DpctlSyclQueue`,
-            optional): The SYCL queue to use for output array allocation and
-            copying. sycl_queue and device are exclusive keywords, i.e. use
-            one or another. If both are specified, a TypeError is raised. If
-            both are None, a cached queue targeting default-selected device
-            is used for allocation and copying. Default: `None`.
-
-    Raises:
-        errors.TypingError: If both `device` and `sycl_queue` are provided.
-
-    Returns:
-        DpnpNdArray: The Numba type to represent an dpnp.ndarray.
-            The type has the same structure as USMNdArray used to
-            represent dpctl.tensor.usm_ndarray.
-    """
-
-    # If a dtype value was passed in, then try to convert it to the
-    # corresponding Numba type. If None was passed, the default, then pass None
-    # to the DpnpNdArray constructor. The default dtype will be derived based
-    # on the behavior defined in dpctl.tensor.usm_ndarray.
-
-    ret_ty = DpnpNdArray(
-        ndim=ndim,
-        layout=layout,
-        dtype=dtype,
-        usm_type=usm_type,
-        device=device,
-        queue=sycl_queue,
-    )
-
-    return ret_ty
-
-
 # =========================================================================
 #                       Dpnp array constructor overloads
 # =========================================================================
@@ -284,17 +212,17 @@ def ol_dpnp_empty(
     _layout = _parse_layout(order)
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
     if _ndim:
-        ret_ty = build_dpnp_ndarray(
-            _ndim,
+        ret_ty = DpnpNdArray(
+            ndim=_ndim,
             layout=_layout,
             dtype=_dtype,
             usm_type=_usm_type,
             device=_device,
-            sycl_queue=_sycl_queue,
+            queue=sycl_queue,
         )
+
         if ret_ty:
 
             def impl(
@@ -381,16 +309,15 @@ def ol_dpnp_zeros(
     _layout = _parse_layout(order)
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
     if _ndim:
-        ret_ty = build_dpnp_ndarray(
-            _ndim,
+        ret_ty = DpnpNdArray(
+            ndim=_ndim,
             layout=_layout,
             dtype=_dtype,
             usm_type=_usm_type,
             device=_device,
-            sycl_queue=_sycl_queue,
+            queue=sycl_queue,
         )
         if ret_ty:
 
@@ -476,16 +403,15 @@ def ol_dpnp_ones(
     _layout = _parse_layout(order)
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
     if _ndim:
-        ret_ty = build_dpnp_ndarray(
-            _ndim,
+        ret_ty = DpnpNdArray(
+            ndim=_ndim,
             layout=_layout,
             dtype=_dtype,
             usm_type=_usm_type,
             device=_device,
-            sycl_queue=_sycl_queue,
+            queue=sycl_queue,
         )
         if ret_ty:
 
@@ -582,16 +508,15 @@ def ol_dpnp_full(
     _layout = _parse_layout(order)
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
     if _ndim:
-        ret_ty = build_dpnp_ndarray(
-            _ndim,
+        ret_ty = DpnpNdArray(
+            ndim=_ndim,
             layout=_layout,
             dtype=_dtype,
             usm_type=_usm_type,
             device=_device,
-            sycl_queue=_sycl_queue,
+            queue=sycl_queue,
         )
         if ret_ty:
 
@@ -694,18 +619,17 @@ def ol_dpnp_empty_like(
 
     _ndim = _parse_dim(x1)
     _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype)
-    _order = x1.layout if order is None else order
+    _layout = x1.layout if order is None else order
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
-    ret_ty = build_dpnp_ndarray(
-        _ndim,
-        layout=_order,
+    ret_ty = DpnpNdArray(
+        ndim=_ndim,
+        layout=_layout,
         dtype=_dtype,
         usm_type=_usm_type,
         device=_device,
-        sycl_queue=_sycl_queue,
+        queue=sycl_queue,
     )
 
     if ret_ty:
@@ -723,7 +647,7 @@ def impl(
             return impl_dpnp_empty_like(
                 x1,
                 _dtype,
-                _order,
+                _layout,
                 subok,
                 shape,
                 _device,
@@ -807,18 +731,17 @@ def ol_dpnp_zeros_like(
 
     _ndim = _parse_dim(x1)
     _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype)
-    _order = x1.layout if order is None else order
+    _layout = x1.layout if order is None else order
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
-    ret_ty = build_dpnp_ndarray(
-        _ndim,
-        layout=_order,
+    ret_ty = DpnpNdArray(
+        ndim=_ndim,
+        layout=_layout,
         dtype=_dtype,
         usm_type=_usm_type,
         device=_device,
-        sycl_queue=_sycl_queue,
+        queue=sycl_queue,
     )
     if ret_ty:
 
@@ -835,7 +758,7 @@ def impl(
             return impl_dpnp_zeros_like(
                 x1,
                 _dtype,
-                _order,
+                _layout,
                 subok,
                 shape,
                 _device,
@@ -919,19 +842,19 @@ def ol_dpnp_ones_like(
 
     _ndim = _parse_dim(x1)
     _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype)
-    _order = x1.layout if order is None else order
+    _layout = x1.layout if order is None else order
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
-    ret_ty = build_dpnp_ndarray(
-        _ndim,
-        layout=_order,
+    ret_ty = DpnpNdArray(
+        ndim=_ndim,
+        layout=_layout,
         dtype=_dtype,
         usm_type=_usm_type,
         device=_device,
-        sycl_queue=_sycl_queue,
+        queue=sycl_queue,
     )
+
     if ret_ty:
 
         def impl(
@@ -947,7 +870,7 @@ def impl(
             return impl_dpnp_ones_like(
                 x1,
                 _dtype,
-                _order,
+                _layout,
                 subok,
                 shape,
                 _device,
@@ -1040,18 +963,17 @@ def ol_dpnp_full_like(
         if isinstance(x1, types.Array)
         else (_parse_dtype(dtype) if dtype is not None else fill_value)
     )
-    _order = x1.layout if order is None else order
+    _layout = x1.layout if order is None else order
     _usm_type = _parse_usm_type(usm_type) if usm_type else "device"
     _device = _parse_device_filter_string(device) if device else None
-    _sycl_queue = _parse_sycl_queue(sycl_queue) if sycl_queue else None
 
-    ret_ty = build_dpnp_ndarray(
-        _ndim,
-        layout=_order,
+    ret_ty = DpnpNdArray(
+        ndim=_ndim,
+        layout=_layout,
         dtype=_dtype,
         usm_type=_usm_type,
         device=_device,
-        sycl_queue=_sycl_queue,
+        queue=sycl_queue,
     )
 
     if ret_ty:
@@ -1071,7 +993,7 @@ def impl(
                 x1,
                 fill_value,
                 _dtype,
-                _order,
+                _layout,
                 subok,
                 shape,
                 _device,
diff --git a/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py b/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
index 2236860e17..7c0094cb02 100644
--- a/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
+++ b/numba_dpex/tests/core/types/USMNdArray/test_array_creation_errors.py
@@ -1,61 +1,43 @@
 import dpctl
+import pytest
 
-from numba_dpex.core.types import USMNdArray
-
-
-def test_init():
-    usma = USMNdArray(1, device=None, queue=None)
-    assert usma.dtype.name == "float64"
-    assert usma.ndim == 1
-    assert usma.layout == "C"
-    assert usma.addrspace == 1
-    assert usma.usm_type == "device"
-    assert (
-        str(usma.queue.sycl_device.device_type) == "device_type.cpu"
-        or str(usma.queue.sycl_device.device_type) == "device_type.gpu"
-    )
-
-    device = dpctl.SyclDevice().filter_string
-
-    usma = USMNdArray(1, device=device, queue=None)
-    assert usma.dtype.name == "float64"
-    assert usma.ndim == 1
-    assert usma.layout == "C"
-    assert usma.addrspace == 1
-    assert usma.usm_type == "device"
-    assert (
-        str(usma.queue.sycl_device.device_type) == "device_type.cpu"
-        or str(usma.queue.sycl_device.device_type) == "device_type.gpu"
-    )
-
-    # usma = USMNdArray(1, device="gpu", queue=None)
-    # assert usma.dtype.name == "int64"
-    # assert usma.ndim == 1
-    # assert usma.layout == "C"
-    # assert usma.addrspace == 1
-    # assert usma.usm_type == "device"
-    # assert str(usma.queue.sycl_device.device_type) == "device_type.gpu"
-
-    queue = dpctl.SyclQueue()
-    usma = USMNdArray(1, device=None, queue=queue)
-    assert usma.dtype.name == "float64"
-    assert usma.ndim == 1
-    assert usma.layout == "C"
-    assert usma.addrspace == 1
-    assert usma.usm_type == "device"
-    assert usma.queue.addressof_ref() > 0
-
-    try:
-        usma = USMNdArray(1, device=device, queue=queue)
-    except Exception as e:
-        assert "exclusive keywords" in str(e)
-
-    try:
-        usma = USMNdArray(1, queue=0)
-    except Exception as e:
-        assert "queue keyword arg" in str(e)
-
-    try:
-        usma = USMNdArray(1, device=0)
-    except Exception as e:
-        assert "SYCL filter selector" in str(e)
+from numba_dpex.core.types import USMNdArray, dpctl_types
+
+
+def test_usmndarray_negative_tests():
+    default_device = dpctl.SyclDevice().filter_string
+
+    usmarr1 = USMNdArray(1, device=None, queue=None)
+    assert usmarr1.dtype.name == "float64"
+    assert usmarr1.ndim == 1
+    assert usmarr1.layout == "C"
+    assert usmarr1.addrspace == 1
+    assert usmarr1.usm_type == "device"
+
+    assert usmarr1.queue.sycl_device == default_device
+
+    usmarr2 = USMNdArray(1, device=default_device, queue=None)
+    assert usmarr2.dtype.name == "float64"
+    assert usmarr2.ndim == 1
+    assert usmarr2.layout == "C"
+    assert usmarr2.addrspace == 1
+    assert usmarr2.usm_type == "device"
+    assert usmarr2.queue.sycl_device == default_device
+
+    queue = dpctl_types.DpctlSyclQueue(dpctl.SyclQueue())
+
+    usmarr3 = USMNdArray(1, device=None, queue=queue)
+    assert usmarr3.dtype.name == "float64"
+    assert usmarr3.ndim == 1
+    assert usmarr3.layout == "C"
+    assert usmarr3.addrspace == 1
+    assert usmarr3.usm_type == "device"
+
+    with pytest.raises(TypeError):
+        USMNdArray(1, device=default_device, queue=queue)
+
+    with pytest.raises(TypeError):
+        USMNdArray(1, queue=0)
+
+    with pytest.raises(TypeError):
+        USMNdArray(1, device=0)
diff --git a/numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_creation.py b/numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_creation.py
index 8767cdc3cd..a6673b5a64 100644
--- a/numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_creation.py
+++ b/numba_dpex/tests/core/types/USMNdArray/test_usm_ndarray_creation.py
@@ -1,7 +1,7 @@
 import dpctl
 import pytest
 
-from numba_dpex.core.types import USMNdArray
+from numba_dpex.core.types import DpctlSyclQueue, USMNdArray
 
 """Negative tests for expected exceptions raised during USMNdArray creation.
 
@@ -49,24 +49,26 @@ def test_type_creation_with_device():
 
     if usma.queue != cached_queue:
         pytest.xfail(
-            "Returned queue does not have the same queue as cached against the device."
+            "Returned queue does not have the same queue as cached "
+            "against the device."
         )
 
 
 def test_type_creation_with_queue():
     """Tests creating a USMNdArray with a queue arg and no device"""
-    queue = dpctl.SyclQueue()
-    usma = USMNdArray(1, queue=queue)
+    ty_queue = DpctlSyclQueue(dpctl.SyclQueue())
+    usma = USMNdArray(1, queue=ty_queue)
 
     assert usma.ndim == 1
     assert usma.layout == "C"
     assert usma.addrspace == 1
     assert usma.usm_type == "device"
 
-    assert usma.device == queue.sycl_device.filter_string
-    if usma.queue != queue:
+    assert usma.device == ty_queue.sycl_device
+    if usma.queue != ty_queue:
         pytest.xfail(
-            "Returned queue does not have the same queue as the one passed to the dpnp function."
+            "Returned queue does not have the same queue as the one passed "
+            "to the dpnp function."
         )
 
 
diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty.py
index b2c3c6fcda..3bedc9e142 100644
--- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty.py
+++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty.py
@@ -124,9 +124,6 @@ def func(shape, queue):
         c = dpnp.empty(shape, sycl_queue=queue, device=device)
         return c
 
-    try:
+    with pytest.raises(errors.TypingError):
         queue = dpctl.SyclQueue()
         func(10, queue)
-    except Exception as e:
-        assert isinstance(e, errors.TypingError)
-        assert "`device` and `sycl_queue` are exclusive keywords" in str(e)
diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py
index bcde1e762a..b92a4b38be 100644
--- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py
+++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py
@@ -176,11 +176,5 @@ def func(shape):
         x = dpnp.empty_like(shape)
         return x
 
-    try:
+    with pytest.raises(errors.TypingError):
         func(shape)
-    except Exception as e:
-        assert isinstance(e, errors.TypingError)
-        assert (
-            "No implementation of function Function(<function empty_like"
-            in str(e)
-        )
diff --git a/numba_dpex/tests/dpjit_tests/test_dpjit_reduction.py b/numba_dpex/tests/dpjit_tests/test_dpjit_reduction.py
index aafc953d16..06fb8f2671 100644
--- a/numba_dpex/tests/dpjit_tests/test_dpjit_reduction.py
+++ b/numba_dpex/tests/dpjit_tests/test_dpjit_reduction.py
@@ -65,7 +65,6 @@ def input_arrays(request):
     return a, b
 
 
-@pytest.mark.skip(reason="Gives segfault, need to fix.")
 def test_dpjit_array_arg_types_add1(input_arrays):
     """Tests passing float and int type dpnp arrays to a dpjit
     prange function.
diff --git a/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py b/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py
index 5d6ffda255..158ba6e320 100644
--- a/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py
+++ b/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py
@@ -46,7 +46,8 @@ def input_arrays(request):
 
 
 def test_numeric_kernel_arg_types1(input_arrays):
-    """Tests passing float, int and complex type dpnp arrays to a kernel function.
+    """Tests passing float, int and complex type dpnp arrays to a kernel
+    function.
 
     Args:
         input_arrays (dpnp.ndarray): Array arguments to be passed to a kernel.