Merge pull request #186 from vzhurba01/docstrings

Docstrings for cuda.core
NVIDIA · Nov 7, 2024 · 72acaaf · 72acaaf
2 parents f953ab3 + 7e25688
commit 72acaaf
Show file tree

Hide file tree

Showing 16 changed files with 615 additions and 75 deletions.
diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
@@ -19,7 +19,31 @@
 
 
 class Device:
+    """Represent a GPU and act as an entry point for cuda.core features.
 
+    This is a singleton object that helps ensure interoperability
+    across multiple libraries imported in the process to both see
+    and use the same GPU device.
+
+    While acting as the entry point, many other CUDA resources can be
+    allocated such as streams and buffers. Any :obj:`Context` dependent
+    resource created through this device, will continue to refer to
+    this device's context.
+
+    Newly returend :obj:`Device` object are is a thread-local singleton
+    for a specified device.
+
+    Note
+    ----
+    Will not initialize the GPU.
+
+    Parameters
+    ----------
+    device_id : int, optional
+        Device ordinal to return a :obj:`Device` object for.
+        Default value of `None` return the currently used device.
+
+    """
     __slots__ = ("_id", "_mr", "_has_inited")
 
     def __new__(cls, device_id=None):
@@ -54,15 +78,29 @@ def _check_context_initialized(self, *args, **kwargs):
 
     @property
     def device_id(self) -> int:
+        """Return device ordinal."""
         return self._id
 
     @property
     def pci_bus_id(self) -> str:
+        """Return a PCI Bus Id string for this device."""
         bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
         return bus_id[:12].decode()
 
     @property
     def uuid(self) -> str:
+        """Return a UUID for the device.
+
+        Returns 16-octets identifying the device. If the device is in
+        MIG mode, returns its MIG UUID which uniquely identifies the
+        subscribed MIG compute instance.
+
+        Note
+        ----
+        MIG UUID is only returned when device is in MIG mode and the
+        driver is older than CUDA 11.4.
+
+        """
         driver_ver = handle_return(cuda.cuDriverGetVersion())
         if driver_ver >= 11040:
             uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
@@ -74,19 +112,21 @@ def uuid(self) -> str:
 
     @property
     def name(self) -> str:
-        # assuming a GPU name is less than 128 characters...
-        name = handle_return(cuda.cuDeviceGetName(128, self._id))
+        """Return the device name."""
+        # Use 256 characters to be consistent with CUDA Runtime
+        name = handle_return(cuda.cuDeviceGetName(256, self._id))
         name = name.split(b'\0')[0]
         return name.decode()
 
     @property
     def properties(self) -> dict:
+        """Return information about the compute-device."""
         # TODO: pythonize the key names
         return handle_return(cudart.cudaGetDeviceProperties(self._id))
 
     @property
     def compute_capability(self) -> ComputeCapability:
-        """Returns a named tuple with 2 fields: major and minor. """
+        """Return a named tuple with 2 fields: major and minor."""
         major = handle_return(cudart.cudaDeviceGetAttribute(
             cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id))
         minor = handle_return(cudart.cudaDeviceGetAttribute(
@@ -96,12 +136,20 @@ def compute_capability(self) -> ComputeCapability:
     @property
     @precondition(_check_context_initialized)
     def context(self) -> Context:
+        """Return the current :obj:`Context` associated with this device.
+
+        Note
+        ----
+        Device must be initialized.
+
+        """
         ctx = handle_return(cuda.cuCtxGetCurrent())
         assert int(ctx) != 0
         return Context._from_ctx(ctx, self._id)
 
     @property
     def memory_resource(self) -> MemoryResource:
+        """Return :obj:`MemoryResource` associated with this device."""
         return self._mr
 
     @memory_resource.setter
@@ -112,27 +160,53 @@ def memory_resource(self, mr):
 
     @property
     def default_stream(self) -> Stream:
+        """Return default CUDA :obj:`Stream` associated with this device.
+
+        The type of default stream returned depends on if the environment
+        variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
+
+        If set, returns a per-thread default stream. Otherwise returns
+        the legacy stream.
+
+        """
         return default_stream()
 
     def __int__(self):
+        """Return device_id."""
         return self._id
 
     def __repr__(self):
         return f"<Device {self._id} ({self.name})>"
 
     def set_current(self, ctx: Context=None) -> Union[Context, None]:
-        """
-        Entry point of this object. Users always start a code by
+        """Set device to be used for GPU executions.
+
+        Initializes CUDA and sets the calling thread to a valid CUDA
+        context. By default the primary context is used, but optional `ctx`
+        parameter can be used to explicitly supply a :obj:`Context` object.
+
+        Providing a `ctx` causes the previous set context to be popped and returned.
+
+        Parameters
+        ----------
+        ctx : :obj:`Context`, optional
+            Optional context to push onto this device's current thread stack.
+
+        Returns
+        -------
+        Union[:obj:`Context`, None], optional
+            Popped context.
+
+        Examples
+        --------
+        Acts as an entry point of this object. Users always start a code by
         calling this method, e.g.
-        
+
         >>> from cuda.core.experimental import Device
         >>> dev0 = Device(0)
         >>> dev0.set_current()
         >>> # ... do work on device 0 ...
-        
-        The optional ctx argument is for advanced users to bind a
-        CUDA context with the device. In this case, the previously
-        set context is popped and returned to the user.
+
         """
         if ctx is not None:
             if not isinstance(ctx, Context):
@@ -163,25 +237,94 @@ def set_current(self, ctx: Context=None) -> Union[Context, None]:
             self._has_inited = True
 
     def create_context(self, options: ContextOptions = None) -> Context:
-        # Create a Context object (but do NOT set it current yet!).
-        # ContextOptions is a dataclass for setting e.g. affinity or CIG
-        # options. 
+        """Create a new :obj:`Context` object.
+
+        Note
+        ----
+        The newly context will not be set as current.
+
+        Parameters
+        ----------
+        options : :obj:`ContextOptions`, optional
+            Customizable dataclass for context creation options.
+
+        Returns
+        -------
+        :obj:`Context`
+            Newly created context object.
+
+        """
         raise NotImplementedError("TODO")
 
     @precondition(_check_context_initialized)
     def create_stream(self, obj=None, options: StreamOptions=None) -> Stream:
-        # Create a Stream object by either holding a newly created
-        # CUDA stream or wrapping an existing foreign object supporting
-        # the __cuda_stream__ protocol. In the latter case, a reference
-        # to obj is held internally so that its lifetime is managed.
+        """Create a Stream object.
+
+        New stream objects can be created in two different ways:
+
+        1) Create a new CUDA stream with customizable `options`.
+        2) Wrap an existing foreign `obj` supporting the __cuda_stream__ protocol.
+
+        Option (2) internally holds a reference to the foreign object
+        such that the lifetime is managed.
+
+        Note
+        ----
+        Device must be initialized.
+
+        Parameters
+        ----------
+        obj : Any, optional
+            Any object supporting the __cuda_stream__ protocol.
+        options : :obj:`StreamOptions`, optional
+            Customizable dataclass for stream creation options.
+
+        Returns
+        -------
+        :obj:`Stream`
+            Newly created stream object.
+
+        """
         return Stream._init(obj=obj, options=options)
 
     @precondition(_check_context_initialized)
     def allocate(self, size, stream=None) -> Buffer:
+        """Allocate device memory from a specified stream.
+
+        Allocates device memory of `size` bytes on the specified `stream`
+        using the memory resource currently associated with this Device.
+
+        Parameter `stream` is optional, using a default stream by default.
+
+        Note
+        ----
+        Device must be initialized.
+
+        Parameters
+        ----------
+        size : int
+            Number of bytes to allocate.
+        stream : :obj:`Stream`, optional
+            The stream establishing the stream ordering semantic.
+            Default value of `None` uses default stream.
+
+        Returns
+        -------
+        :obj:`Buffer`
+            Newly created buffer object.
+
+        """
         if stream is None:
             stream = default_stream()
         return self._mr.allocate(size, stream)
 
     @precondition(_check_context_initialized)
     def sync(self):
+        """Synchronize the device.
+
+        Note
+        ----
+        Device must be initialized.
+
+        """
         handle_return(cudart.cudaDeviceSynchronize())
diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py
@@ -13,17 +13,46 @@
 
 @dataclass
 class EventOptions:
+    """Customizable :obj:`Event` options.
+
+    Attributes
+    ----------
+    enable_timing : bool, optional
+        Event will record timing data. (Default to False)
+    busy_waited_sync : bool, optional
+        If True, event will use blocking synchronization. When a CPU
+        thread calls synchronize, the call will block until the event
+        has actually been completed.
+        Otherwise, the CPU thread will busy-wait until the event has
+        been completed. (Default to False)
+    support_ipc : bool, optional
+        Event will be suitable for interprocess use.
+        Note that enable_timing must be False. (Default to False)
+
+    """
     enable_timing: Optional[bool] = False
     busy_waited_sync: Optional[bool] = False
     support_ipc: Optional[bool] = False
 
 
 class Event:
+    """Represent a record at a specific point of execution within a CUDA stream.
 
+    Applications can asynchronously record events at any point in
+    the program. An event keeps a record of all previous work within
+    the last recorded stream.
+
+    Events can be used to monitor device's progress, query completion
+    of work up to event's record, and help establish dependencies
+    between GPU work submissions.
+
+    Directly creating an :obj:`Event` is not supported due to ambiguity,
+    and they should instead be created through a :obj:`Stream` object.
+
+    """
     __slots__ = ("_handle", "_timing_disabled", "_busy_waited")
 
     def __init__(self):
-        # minimal requirements for the destructor
         self._handle = None
         raise NotImplementedError(
             "directly creating an Event object can be ambiguous. Please call "
@@ -51,37 +80,45 @@ def _init(options: Optional[EventOptions]=None):
         return self
 
     def __del__(self):
+        """Return close(self)"""
         self.close()
 
     def close(self):
-        # Destroy the event.
+        """Destroy the event."""
         if self._handle:
             handle_return(cuda.cuEventDestroy(self._handle))
             self._handle = None
 
     @property
     def is_timing_disabled(self) -> bool:
-        # Check if this instance can be used for the timing purpose.
+        """Return True if the event does not record timing data, otherwise False."""
         return self._timing_disabled
 
     @property
     def is_sync_busy_waited(self) -> bool:
-        # Check if the event synchronization would keep the CPU busy-waiting.
+        """Return True if the event synchronization would keep the CPU busy-waiting, otherwise False."""
         return self._busy_waited
 
     @property
     def is_ipc_supported(self) -> bool:
-        # Check if this instance can be used for IPC.
+        """Return True if this event can be used as an interprocess event, otherwise False."""
         raise NotImplementedError("TODO")
 
     def sync(self):
-        # Sync over the event.
+        """Synchronize until the event completes.
+
+        If the event was created with busy_waited_sync, then the
+        calling CPU thread will block until the event has been
+        completed by the device.
+        Otherwise the CPU thread will busy-wait until the event
+        has been completed.
+
+        """
         handle_return(cuda.cuEventSynchronize(self._handle))
 
     @property
     def is_done(self) -> bool:
-        # Return True if all captured works have been completed,
-        # otherwise False.
+        """Return True if all captured works have been completed, otherwise False."""
         result, = cuda.cuEventQuery(self._handle)
         if result == cuda.CUresult.CUDA_SUCCESS:
             return True
@@ -92,4 +129,5 @@ def is_done(self) -> bool:
 
     @property
     def handle(self) -> int:
+        """Return the underlying cudaEvent_t pointer address as Python int."""
         return int(self._handle)