Skip to content

Commit

Permalink
Update to opencl3 0.9.1 crate
Browse files Browse the repository at this point in the history
 - Update rust version to 1.56.1

 - Certain opencl3 APIs now required to be called as unsafe
  • Loading branch information
Shawn Rader committed Sep 28, 2022
1 parent 7900e77 commit f156ffb
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 39 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "rust-gpu-tools"
version = "0.6.1"
authors = ["Keyvan Kambakhsh <[email protected]>", "porcuquine <[email protected]>"]
description = "Rust OpenCL tools"
edition = "2018"
edition = "2021"
homepage = "https://github.com/filecoin-project/rust-gpu-tools"
license = "MIT/Apache-2.0"
repository = "https://github.com/filecoin-project/rust-gpu-tools"
Expand All @@ -22,7 +22,7 @@ lazy_static = "1.2"
log = "0.4.11"
hex = "0.4.3"

opencl3 = { version = "0.6", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
opencl3 = { version = "0.9.1", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true }
once_cell = "1.8.0"
temp-env = "0.2.0"
4 changes: 2 additions & 2 deletions examples/add.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ pub fn main() {
let device = *Device::all().first().unwrap();

// First we run it on CUDA.
let cuda_program = cuda(&device);
let cuda_program = cuda(device);
let cuda_result = cuda_program.run(closures, ()).unwrap();
assert_eq!(cuda_result, [6, 8, 10, 12]);
println!("CUDA result: {:?}", cuda_result);

// Then we run it on OpenCL.
let opencl_program = opencl(&device);
let opencl_program = opencl(device);
let opencl_result = opencl_program.run(closures, ()).unwrap();
assert_eq!(opencl_result, [6, 8, 10, 12]);
println!("OpenCL result: {:?}", opencl_result);
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.51.0
1.56.1
2 changes: 1 addition & 1 deletion src/cuda/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl Device {
/// The majority of methods are the same as [`crate::opencl::Program`], so you can write code using this
/// API, which will then work with OpenCL as well as CUDA kernels.
// When compiled without the `opencl` feature, then the intra-doc link above will be broken.
#[allow(broken_intra_doc_links)]
#[allow(rustdoc::broken_intra_doc_links)]
#[derive(Debug)]
pub struct Program {
context: rustacuda::context::UnownedContext,
Expand Down
6 changes: 3 additions & 3 deletions src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl TryFrom<&str> for PciId {

fn try_from(pci_id: &str) -> GPUResult<Self> {
let mut bytes = [0; mem::size_of::<u16>()];
hex::decode_to_slice(pci_id.replace(":", ""), &mut bytes).map_err(|_| {
hex::decode_to_slice(pci_id.replace(':', ""), &mut bytes).map_err(|_| {
GPUError::InvalidId(format!(
"Cannot parse PCI ID, expected hex-encoded string formated as aa:bb, got {0}.",
pci_id
Expand Down Expand Up @@ -111,7 +111,7 @@ impl TryFrom<&str> for DeviceUuid {

fn try_from(uuid: &str) -> GPUResult<Self> {
let mut bytes = [0; UUID_SIZE];
hex::decode_to_slice(uuid.replace("-", ""), &mut bytes)
hex::decode_to_slice(uuid.replace('-', ""), &mut bytes)
.map_err(|_| {
GPUError::InvalidId(format!("Cannot parse UUID, expected hex-encoded string formated as aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee, got {0}.", uuid))
})?;
Expand All @@ -137,7 +137,7 @@ impl fmt::Display for DeviceUuid {

impl fmt::Debug for DeviceUuid {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.to_string())
write!(f, "{}", self)
}
}

Expand Down
77 changes: 47 additions & 30 deletions src/opencl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ impl Device {
///
/// The majority of methods are the same as [`crate::cuda::Program`], so you can write code using this
/// API, which will then work with OpenCL as well as CUDA kernels.
#[allow(broken_intra_doc_links)]
#[allow(rustdoc::broken_intra_doc_links)]
pub struct Program {
device_name: String,
queue: CommandQueue,
Expand Down Expand Up @@ -157,7 +157,7 @@ impl Program {
"Building kernel ({}) from source: done.",
cached.to_string_lossy()
);
let queue = CommandQueue::create(&context, context.default_device(), 0)?;
let queue = CommandQueue::create_default(&context, 0)?;
let kernels = opencl3::kernel::create_program_kernels(&program)?;
let kernels_by_name = kernels
.into_iter()
Expand Down Expand Up @@ -185,13 +185,14 @@ impl Program {
debug!("Creating OpenCL program from binary.");
let context = Context::from_device(&device.device)?;
let bins = vec![&bin[..]];
let mut program =
opencl3::program::Program::create_from_binary(&context, context.devices(), &bins)?;
let mut program = unsafe {
opencl3::program::Program::create_from_binary(&context, context.devices(), &bins)
}?;
if let Err(build_error) = program.build(context.devices(), "") {
let log = program.get_build_log(context.devices()[0])?;
return Err(GPUError::Opencl3(build_error, Some(log)));
}
let queue = CommandQueue::create(&context, context.default_device(), 0)?;
let queue = CommandQueue::create_default(&context, 0)?;
let kernels = opencl3::kernel::create_program_kernels(&program)?;
let kernels_by_name = kernels
.into_iter()
Expand Down Expand Up @@ -248,21 +249,25 @@ impl Program {
// The underlying buffer is `u8`, hence we need the number of bytes.
let bytes_len = length * std::mem::size_of::<T>();

let mut buffer = opencl3::memory::Buffer::create(
&self.context,
CL_MEM_READ_WRITE,
bytes_len,
ptr::null_mut(),
)?;
let mut buffer = unsafe {
opencl3::memory::Buffer::create(
&self.context,
CL_MEM_READ_WRITE,
bytes_len,
ptr::null_mut(),
)?
};
// Transmuting types is safe as long a sizes match.
let bytes = unsafe {
std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len)
};
// Write some data right-away. This makes a significant performance different.
self.queue
.enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &[0u8], &[])?;
self.queue
.enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &bytes, &[])?;
unsafe {
self.queue
.enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &[0u8], &[])?;
self.queue
.enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, bytes, &[])?;
};

Ok(Buffer::<T> {
buffer,
Expand All @@ -287,7 +292,7 @@ impl Program {
.kernels_by_name
.get(name)
.ok_or_else(|| GPUError::KernelNotFound(name.to_string()))?;
let mut builder = ExecuteKernel::new(&kernel);
let mut builder = ExecuteKernel::new(kernel);
builder.set_global_work_size(global_work_size * local_work_size);
builder.set_local_work_size(local_work_size);
Ok(Kernel {
Expand All @@ -314,9 +319,10 @@ impl Program {
data.len() * std::mem::size_of::<T>(),
)
};
self.queue
.enqueue_write_buffer(&mut buffer.buffer, CL_BLOCKING, 0, &bytes, &[])?;

unsafe {
self.queue
.enqueue_write_buffer(&mut buffer.buffer, CL_BLOCKING, 0, bytes, &[])?;
}
Ok(())
}

Expand All @@ -325,15 +331,16 @@ impl Program {
assert!(data.len() <= buffer.length, "Buffer is too small");

// It is safe as long as the sizes match.
let mut bytes = unsafe {
let bytes = unsafe {
std::slice::from_raw_parts_mut(
data.as_mut_ptr() as *mut T as *mut u8,
data.len() * std::mem::size_of::<T>(),
)
};
self.queue
.enqueue_read_buffer(&buffer.buffer, CL_BLOCKING, 0, &mut bytes, &[])?;

unsafe {
self.queue
.enqueue_read_buffer(&buffer.buffer, CL_BLOCKING, 0, bytes, &[])?;
};
Ok(())
}

Expand Down Expand Up @@ -362,27 +369,35 @@ pub trait KernelArgument {

impl<T> KernelArgument for Buffer<T> {
fn push(&self, kernel: &mut Kernel) {
kernel.builder.set_arg(&self.buffer);
unsafe {
kernel.builder.set_arg(&self.buffer);
}
}
}

impl KernelArgument for i32 {
fn push(&self, kernel: &mut Kernel) {
kernel.builder.set_arg(self);
unsafe {
kernel.builder.set_arg(self);
}
}
}

impl KernelArgument for u32 {
fn push(&self, kernel: &mut Kernel) {
kernel.builder.set_arg(self);
unsafe {
kernel.builder.set_arg(self);
}
}
}

impl<T> KernelArgument for LocalBuffer<T> {
fn push(&self, kernel: &mut Kernel) {
kernel
.builder
.set_arg_local_buffer(self.length * std::mem::size_of::<T>());
unsafe {
kernel
.builder
.set_arg_local_buffer(self.length * std::mem::size_of::<T>());
}
kernel.num_local_buffers += 1;
}
}
Expand Down Expand Up @@ -431,7 +446,9 @@ impl<'a> Kernel<'a> {
"There cannot be more than one `LocalBuffer`.".to_string(),
));
}
self.builder.enqueue_nd_range(&self.queue)?;
unsafe {
self.builder.enqueue_nd_range(self.queue)?;
}
Ok(())
}
}

0 comments on commit f156ffb

Please sign in to comment.