Skip to content

Commit

Permalink
Merge pull request #186 from vzhurba01/docstrings
Browse files Browse the repository at this point in the history
Docstrings for cuda.core
  • Loading branch information
vzhurba01 authored Nov 7, 2024
2 parents f953ab3 + 7e25688 commit 72acaaf
Show file tree
Hide file tree
Showing 16 changed files with 615 additions and 75 deletions.
177 changes: 160 additions & 17 deletions cuda_core/cuda/core/experimental/_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,31 @@


class Device:
"""Represent a GPU and act as an entry point for cuda.core features.
This is a singleton object that helps ensure interoperability
across multiple libraries imported in the process to both see
and use the same GPU device.
While acting as the entry point, many other CUDA resources can be
allocated such as streams and buffers. Any :obj:`Context` dependent
resource created through this device, will continue to refer to
this device's context.
Newly returend :obj:`Device` object are is a thread-local singleton
for a specified device.
Note
----
Will not initialize the GPU.
Parameters
----------
device_id : int, optional
Device ordinal to return a :obj:`Device` object for.
Default value of `None` return the currently used device.
"""
__slots__ = ("_id", "_mr", "_has_inited")

def __new__(cls, device_id=None):
Expand Down Expand Up @@ -54,15 +78,29 @@ def _check_context_initialized(self, *args, **kwargs):

@property
def device_id(self) -> int:
"""Return device ordinal."""
return self._id

@property
def pci_bus_id(self) -> str:
"""Return a PCI Bus Id string for this device."""
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
return bus_id[:12].decode()

@property
def uuid(self) -> str:
"""Return a UUID for the device.
Returns 16-octets identifying the device. If the device is in
MIG mode, returns its MIG UUID which uniquely identifies the
subscribed MIG compute instance.
Note
----
MIG UUID is only returned when device is in MIG mode and the
driver is older than CUDA 11.4.
"""
driver_ver = handle_return(cuda.cuDriverGetVersion())
if driver_ver >= 11040:
uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
Expand All @@ -74,19 +112,21 @@ def uuid(self) -> str:

@property
def name(self) -> str:
# assuming a GPU name is less than 128 characters...
name = handle_return(cuda.cuDeviceGetName(128, self._id))
"""Return the device name."""
# Use 256 characters to be consistent with CUDA Runtime
name = handle_return(cuda.cuDeviceGetName(256, self._id))
name = name.split(b'\0')[0]
return name.decode()

@property
def properties(self) -> dict:
"""Return information about the compute-device."""
# TODO: pythonize the key names
return handle_return(cudart.cudaGetDeviceProperties(self._id))

@property
def compute_capability(self) -> ComputeCapability:
"""Returns a named tuple with 2 fields: major and minor. """
"""Return a named tuple with 2 fields: major and minor."""
major = handle_return(cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id))
minor = handle_return(cudart.cudaDeviceGetAttribute(
Expand All @@ -96,12 +136,20 @@ def compute_capability(self) -> ComputeCapability:
@property
@precondition(_check_context_initialized)
def context(self) -> Context:
"""Return the current :obj:`Context` associated with this device.
Note
----
Device must be initialized.
"""
ctx = handle_return(cuda.cuCtxGetCurrent())
assert int(ctx) != 0
return Context._from_ctx(ctx, self._id)

@property
def memory_resource(self) -> MemoryResource:
"""Return :obj:`MemoryResource` associated with this device."""
return self._mr

@memory_resource.setter
Expand All @@ -112,27 +160,53 @@ def memory_resource(self, mr):

@property
def default_stream(self) -> Stream:
"""Return default CUDA :obj:`Stream` associated with this device.
The type of default stream returned depends on if the environment
variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
If set, returns a per-thread default stream. Otherwise returns
the legacy stream.
"""
return default_stream()

def __int__(self):
"""Return device_id."""
return self._id

def __repr__(self):
return f"<Device {self._id} ({self.name})>"

def set_current(self, ctx: Context=None) -> Union[Context, None]:
"""
Entry point of this object. Users always start a code by
"""Set device to be used for GPU executions.
Initializes CUDA and sets the calling thread to a valid CUDA
context. By default the primary context is used, but optional `ctx`
parameter can be used to explicitly supply a :obj:`Context` object.
Providing a `ctx` causes the previous set context to be popped and returned.
Parameters
----------
ctx : :obj:`Context`, optional
Optional context to push onto this device's current thread stack.
Returns
-------
Union[:obj:`Context`, None], optional
Popped context.
Examples
--------
Acts as an entry point of this object. Users always start a code by
calling this method, e.g.
>>> from cuda.core.experimental import Device
>>> dev0 = Device(0)
>>> dev0.set_current()
>>> # ... do work on device 0 ...
The optional ctx argument is for advanced users to bind a
CUDA context with the device. In this case, the previously
set context is popped and returned to the user.
"""
if ctx is not None:
if not isinstance(ctx, Context):
Expand Down Expand Up @@ -163,25 +237,94 @@ def set_current(self, ctx: Context=None) -> Union[Context, None]:
self._has_inited = True

def create_context(self, options: ContextOptions = None) -> Context:
# Create a Context object (but do NOT set it current yet!).
# ContextOptions is a dataclass for setting e.g. affinity or CIG
# options.
"""Create a new :obj:`Context` object.
Note
----
The newly context will not be set as current.
Parameters
----------
options : :obj:`ContextOptions`, optional
Customizable dataclass for context creation options.
Returns
-------
:obj:`Context`
Newly created context object.
"""
raise NotImplementedError("TODO")

@precondition(_check_context_initialized)
def create_stream(self, obj=None, options: StreamOptions=None) -> Stream:
# Create a Stream object by either holding a newly created
# CUDA stream or wrapping an existing foreign object supporting
# the __cuda_stream__ protocol. In the latter case, a reference
# to obj is held internally so that its lifetime is managed.
"""Create a Stream object.
New stream objects can be created in two different ways:
1) Create a new CUDA stream with customizable `options`.
2) Wrap an existing foreign `obj` supporting the __cuda_stream__ protocol.
Option (2) internally holds a reference to the foreign object
such that the lifetime is managed.
Note
----
Device must be initialized.
Parameters
----------
obj : Any, optional
Any object supporting the __cuda_stream__ protocol.
options : :obj:`StreamOptions`, optional
Customizable dataclass for stream creation options.
Returns
-------
:obj:`Stream`
Newly created stream object.
"""
return Stream._init(obj=obj, options=options)

@precondition(_check_context_initialized)
def allocate(self, size, stream=None) -> Buffer:
"""Allocate device memory from a specified stream.
Allocates device memory of `size` bytes on the specified `stream`
using the memory resource currently associated with this Device.
Parameter `stream` is optional, using a default stream by default.
Note
----
Device must be initialized.
Parameters
----------
size : int
Number of bytes to allocate.
stream : :obj:`Stream`, optional
The stream establishing the stream ordering semantic.
Default value of `None` uses default stream.
Returns
-------
:obj:`Buffer`
Newly created buffer object.
"""
if stream is None:
stream = default_stream()
return self._mr.allocate(size, stream)

@precondition(_check_context_initialized)
def sync(self):
"""Synchronize the device.
Note
----
Device must be initialized.
"""
handle_return(cudart.cudaDeviceSynchronize())
54 changes: 46 additions & 8 deletions cuda_core/cuda/core/experimental/_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,46 @@

@dataclass
class EventOptions:
"""Customizable :obj:`Event` options.
Attributes
----------
enable_timing : bool, optional
Event will record timing data. (Default to False)
busy_waited_sync : bool, optional
If True, event will use blocking synchronization. When a CPU
thread calls synchronize, the call will block until the event
has actually been completed.
Otherwise, the CPU thread will busy-wait until the event has
been completed. (Default to False)
support_ipc : bool, optional
Event will be suitable for interprocess use.
Note that enable_timing must be False. (Default to False)
"""
enable_timing: Optional[bool] = False
busy_waited_sync: Optional[bool] = False
support_ipc: Optional[bool] = False


class Event:
"""Represent a record at a specific point of execution within a CUDA stream.
Applications can asynchronously record events at any point in
the program. An event keeps a record of all previous work within
the last recorded stream.
Events can be used to monitor device's progress, query completion
of work up to event's record, and help establish dependencies
between GPU work submissions.
Directly creating an :obj:`Event` is not supported due to ambiguity,
and they should instead be created through a :obj:`Stream` object.
"""
__slots__ = ("_handle", "_timing_disabled", "_busy_waited")

def __init__(self):
# minimal requirements for the destructor
self._handle = None
raise NotImplementedError(
"directly creating an Event object can be ambiguous. Please call "
Expand Down Expand Up @@ -51,37 +80,45 @@ def _init(options: Optional[EventOptions]=None):
return self

def __del__(self):
"""Return close(self)"""
self.close()

def close(self):
# Destroy the event.
"""Destroy the event."""
if self._handle:
handle_return(cuda.cuEventDestroy(self._handle))
self._handle = None

@property
def is_timing_disabled(self) -> bool:
# Check if this instance can be used for the timing purpose.
"""Return True if the event does not record timing data, otherwise False."""
return self._timing_disabled

@property
def is_sync_busy_waited(self) -> bool:
# Check if the event synchronization would keep the CPU busy-waiting.
"""Return True if the event synchronization would keep the CPU busy-waiting, otherwise False."""
return self._busy_waited

@property
def is_ipc_supported(self) -> bool:
# Check if this instance can be used for IPC.
"""Return True if this event can be used as an interprocess event, otherwise False."""
raise NotImplementedError("TODO")

def sync(self):
# Sync over the event.
"""Synchronize until the event completes.
If the event was created with busy_waited_sync, then the
calling CPU thread will block until the event has been
completed by the device.
Otherwise the CPU thread will busy-wait until the event
has been completed.
"""
handle_return(cuda.cuEventSynchronize(self._handle))

@property
def is_done(self) -> bool:
# Return True if all captured works have been completed,
# otherwise False.
"""Return True if all captured works have been completed, otherwise False."""
result, = cuda.cuEventQuery(self._handle)
if result == cuda.CUresult.CUDA_SUCCESS:
return True
Expand All @@ -92,4 +129,5 @@ def is_done(self) -> bool:

@property
def handle(self) -> int:
"""Return the underlying cudaEvent_t pointer address as Python int."""
return int(self._handle)
Loading

0 comments on commit 72acaaf

Please sign in to comment.