Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed CLK_ prefix from mem fences #844

Merged
merged 1 commit into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/user_guides/kernel_programming_guide/synchronization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ barrier, at which point it returns control to all its callers.

``numba_dpex.barrier()`` supports two memory fence options:

- ``numba_dpex.CLK_GLOBAL_MEM_FENCE``: The barrier function will queue a memory
- ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory
fence to ensure correct ordering of memory operations to global memory. Using
the option can be useful when work-items, for example, write to buffer or
image objects and then want to read the updated data. Passing no arguments to
Expand All @@ -20,7 +20,7 @@ barrier, at which point it returns control to all its callers.
.. literalinclude:: ../../../numba_dpex/examples/barrier.py
:pyobject: no_arg_barrier_support

- ``numba_dpex.CLK_LOCAL_MEM_FENCE``: The barrier function will either flush
- ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush
any variables stored in local memory or queue a memory fence to ensure
correct ordering of memory operations to local memory. For example,

Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/device_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

# Re export
from .ocl.stubs import (
CLK_GLOBAL_MEM_FENCE,
CLK_LOCAL_MEM_FENCE,
GLOBAL_MEM_FENCE,
LOCAL_MEM_FENCE,
atomic,
barrier,
get_global_id,
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/kernel_private_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def private_memory_kernel(A):

# preload
memory[0] = i
numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence

# memory will not hold correct deterministic result if it is not
# private to each thread.
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/sum_reduction_ocl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def sum_reduction_kernel(A, partial_sums):
stride = group_size // 2
while stride > 0:
# Waiting for each 2x2 addition into given workgroup
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE)
dpex.barrier(dpex.LOCAL_MEM_FENCE)

# Add elements 2 by 2 between local_id and local_id + stride
if local_id < stride:
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/sum_reduction_recursive_ocl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def sum_reduction_kernel(A, input_size, partial_sums):
stride = group_size // 2
while stride > 0:
# Waiting for each 2x2 addition into given workgroup
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE)
dpex.barrier(dpex.LOCAL_MEM_FENCE)

# Add elements 2 by 2 between local_id and local_id + stride
if local_id < stride:
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/ocl/oclimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def barrier_no_arg_impl(context, builder, sig, args):
barrier = _declare_function(
context, builder, "barrier", sig, ["unsigned int"]
)
flags = context.get_constant(types.uint32, stubs.CLK_GLOBAL_MEM_FENCE)
flags = context.get_constant(types.uint32, stubs.GLOBAL_MEM_FENCE)
builder.call(barrier, [flags])
return _void_value

Expand All @@ -175,7 +175,7 @@ def sub_group_barrier_impl(context, builder, sig, args):
barrier = _declare_function(
context, builder, "barrier", sig, ["unsigned int"]
)
flags = context.get_constant(types.uint32, stubs.CLK_LOCAL_MEM_FENCE)
flags = context.get_constant(types.uint32, stubs.LOCAL_MEM_FENCE)
builder.call(barrier, [flags])
return _void_value

Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/ocl/stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
_stub_error = NotImplementedError("This is a stub.")

# mem fence
CLK_LOCAL_MEM_FENCE = 0x1
CLK_GLOBAL_MEM_FENCE = 0x2
LOCAL_MEM_FENCE = 0x1
GLOBAL_MEM_FENCE = 0x2


def get_global_id(*args, **kargs):
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/tests/kernel_tests/test_atomic_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def get_func_local(op_type, dtype):
def f(a):
lm = dpex.local.array(1, dtype)
lm[0] = a[0]
dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE)
dpex.barrier(dpex.GLOBAL_MEM_FENCE)
op(lm, 0, 1)
dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE)
dpex.barrier(dpex.GLOBAL_MEM_FENCE)
a[0] = lm[0]

return f
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/tests/kernel_tests/test_barrier.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_proper_lowering(filter_str):
def twice(A):
i = dpex.get_global_id(0)
d = A[i]
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence
A[i] = d * 2

N = 256
Expand Down Expand Up @@ -66,7 +66,7 @@ def reverse_array(A):
# preload
lm[i] = A[i]
# barrier local or global will both work as we only have one work group
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence
# write
A[i] += lm[blocksize - 1 - i]

Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/tests/kernel_tests/test_private_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def private_memory_kernel(A):
i = numba_dpex.get_global_id(0)
prvt_mem = numba_dpex.private.array(shape=1, dtype=np.float32)
prvt_mem[0] = i
numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence
A[i] = prvt_mem[0] * 2

N = 64
Expand Down