Skip to content

Commit

Permalink
Merge pull request #844 from IntelPython/samaid_cleanup
Browse files Browse the repository at this point in the history
Removed CLK_ prefix from mem fences
  • Loading branch information
samaid authored Dec 9, 2022
2 parents 3cef533 + c8f2180 commit 46bef56
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions docs/user_guides/kernel_programming_guide/synchronization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ barrier, at which point it returns control to all its callers.

``numba_dpex.barrier()`` supports two memory fence options:

- ``numba_dpex.CLK_GLOBAL_MEM_FENCE``: The barrier function will queue a memory
- ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory
fence to ensure correct ordering of memory operations to global memory. Using
the option can be useful when work-items, for example, write to buffer or
image objects and then want to read the updated data. Passing no arguments to
Expand All @@ -20,7 +20,7 @@ barrier, at which point it returns control to all its callers.
.. literalinclude:: ../../../numba_dpex/examples/barrier.py
:pyobject: no_arg_barrier_support

- ``numba_dpex.CLK_LOCAL_MEM_FENCE``: The barrier function will either flush
- ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush
any variables stored in local memory or queue a memory fence to ensure
correct ordering of memory operations to local memory. For example,

Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/device_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

# Re export
from .ocl.stubs import (
CLK_GLOBAL_MEM_FENCE,
CLK_LOCAL_MEM_FENCE,
GLOBAL_MEM_FENCE,
LOCAL_MEM_FENCE,
atomic,
barrier,
get_global_id,
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/kernel_private_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def private_memory_kernel(A):

# preload
memory[0] = i
numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence

# memory will not hold correct deterministic result if it is not
# private to each thread.
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/sum_reduction_ocl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def sum_reduction_kernel(A, partial_sums):
stride = group_size // 2
while stride > 0:
# Waiting for each 2x2 addition into given workgroup
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE)
dpex.barrier(dpex.LOCAL_MEM_FENCE)

# Add elements 2 by 2 between local_id and local_id + stride
if local_id < stride:
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/examples/sum_reduction_recursive_ocl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def sum_reduction_kernel(A, input_size, partial_sums):
stride = group_size // 2
while stride > 0:
# Waiting for each 2x2 addition into given workgroup
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE)
dpex.barrier(dpex.LOCAL_MEM_FENCE)

# Add elements 2 by 2 between local_id and local_id + stride
if local_id < stride:
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/ocl/oclimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def barrier_no_arg_impl(context, builder, sig, args):
barrier = _declare_function(
context, builder, "barrier", sig, ["unsigned int"]
)
flags = context.get_constant(types.uint32, stubs.CLK_GLOBAL_MEM_FENCE)
flags = context.get_constant(types.uint32, stubs.GLOBAL_MEM_FENCE)
builder.call(barrier, [flags])
return _void_value

Expand All @@ -175,7 +175,7 @@ def sub_group_barrier_impl(context, builder, sig, args):
barrier = _declare_function(
context, builder, "barrier", sig, ["unsigned int"]
)
flags = context.get_constant(types.uint32, stubs.CLK_LOCAL_MEM_FENCE)
flags = context.get_constant(types.uint32, stubs.LOCAL_MEM_FENCE)
builder.call(barrier, [flags])
return _void_value

Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/ocl/stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
_stub_error = NotImplementedError("This is a stub.")

# mem fence
CLK_LOCAL_MEM_FENCE = 0x1
CLK_GLOBAL_MEM_FENCE = 0x2
LOCAL_MEM_FENCE = 0x1
GLOBAL_MEM_FENCE = 0x2


def get_global_id(*args, **kargs):
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/tests/kernel_tests/test_atomic_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def get_func_local(op_type, dtype):
def f(a):
lm = dpex.local.array(1, dtype)
lm[0] = a[0]
dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE)
dpex.barrier(dpex.GLOBAL_MEM_FENCE)
op(lm, 0, 1)
dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE)
dpex.barrier(dpex.GLOBAL_MEM_FENCE)
a[0] = lm[0]

return f
Expand Down
4 changes: 2 additions & 2 deletions numba_dpex/tests/kernel_tests/test_barrier.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_proper_lowering(filter_str):
def twice(A):
i = dpex.get_global_id(0)
d = A[i]
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence
A[i] = d * 2

N = 256
Expand Down Expand Up @@ -66,7 +66,7 @@ def reverse_array(A):
# preload
lm[i] = A[i]
# barrier local or global will both work as we only have one work group
dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence
# write
A[i] += lm[blocksize - 1 - i]

Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/tests/kernel_tests/test_private_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def private_memory_kernel(A):
i = numba_dpex.get_global_id(0)
prvt_mem = numba_dpex.private.array(shape=1, dtype=np.float32)
prvt_mem[0] = i
numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence
A[i] = prvt_mem[0] * 2

N = 64
Expand Down

0 comments on commit 46bef56

Please sign in to comment.