From f156ffb08ddc83724aafa2101c501206316e2751 Mon Sep 17 00:00:00 2001 From: Shawn Rader Date: Thu, 15 Sep 2022 12:14:22 -0700 Subject: [PATCH] Update to opencl3 0.9.1 crate - Update rust version to 1.56.1 - Certain opencl3 APIs now required to be called as unsafe --- Cargo.toml | 4 +-- examples/add.rs | 4 +-- rust-toolchain | 2 +- src/cuda/mod.rs | 2 +- src/device.rs | 6 ++-- src/opencl/mod.rs | 77 +++++++++++++++++++++++++++++------------------ 6 files changed, 56 insertions(+), 39 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 43abd8a..7c0516f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "rust-gpu-tools" version = "0.6.1" authors = ["Keyvan Kambakhsh ", "porcuquine "] description = "Rust OpenCL tools" -edition = "2018" +edition = "2021" homepage = "https://github.com/filecoin-project/rust-gpu-tools" license = "MIT/Apache-2.0" repository = "https://github.com/filecoin-project/rust-gpu-tools" @@ -22,7 +22,7 @@ lazy_static = "1.2" log = "0.4.11" hex = "0.4.3" -opencl3 = { version = "0.6", default-features = false, features = ["CL_VERSION_1_2"], optional = true } +opencl3 = { version = "0.9.1", default-features = false, features = ["CL_VERSION_1_2"], optional = true } rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true } once_cell = "1.8.0" temp-env = "0.2.0" diff --git a/examples/add.rs b/examples/add.rs index a73b02e..5c31702 100644 --- a/examples/add.rs +++ b/examples/add.rs @@ -59,13 +59,13 @@ pub fn main() { let device = *Device::all().first().unwrap(); // First we run it on CUDA. - let cuda_program = cuda(&device); + let cuda_program = cuda(device); let cuda_result = cuda_program.run(closures, ()).unwrap(); assert_eq!(cuda_result, [6, 8, 10, 12]); println!("CUDA result: {:?}", cuda_result); // Then we run it on OpenCL. - let opencl_program = opencl(&device); + let opencl_program = opencl(device); let opencl_result = opencl_program.run(closures, ()).unwrap(); assert_eq!(opencl_result, [6, 8, 10, 12]); println!("OpenCL result: {:?}", opencl_result); diff --git a/rust-toolchain b/rust-toolchain index ba0a719..43c989b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.51.0 +1.56.1 diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index ef39e4d..7a242e7 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -114,7 +114,7 @@ impl Device { /// The majority of methods are the same as [`crate::opencl::Program`], so you can write code using this /// API, which will then work with OpenCL as well as CUDA kernels. // When compiled without the `opencl` feature, then the intra-doc link above will be broken. -#[allow(broken_intra_doc_links)] +#[allow(rustdoc::broken_intra_doc_links)] #[derive(Debug)] pub struct Program { context: rustacuda::context::UnownedContext, diff --git a/src/device.rs b/src/device.rs index eca012f..3fb238a 100644 --- a/src/device.rs +++ b/src/device.rs @@ -69,7 +69,7 @@ impl TryFrom<&str> for PciId { fn try_from(pci_id: &str) -> GPUResult { let mut bytes = [0; mem::size_of::()]; - hex::decode_to_slice(pci_id.replace(":", ""), &mut bytes).map_err(|_| { + hex::decode_to_slice(pci_id.replace(':', ""), &mut bytes).map_err(|_| { GPUError::InvalidId(format!( "Cannot parse PCI ID, expected hex-encoded string formated as aa:bb, got {0}.", pci_id @@ -111,7 +111,7 @@ impl TryFrom<&str> for DeviceUuid { fn try_from(uuid: &str) -> GPUResult { let mut bytes = [0; UUID_SIZE]; - hex::decode_to_slice(uuid.replace("-", ""), &mut bytes) + hex::decode_to_slice(uuid.replace('-', ""), &mut bytes) .map_err(|_| { GPUError::InvalidId(format!("Cannot parse UUID, expected hex-encoded string formated as aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee, got {0}.", uuid)) })?; @@ -137,7 +137,7 @@ impl fmt::Display for DeviceUuid { impl fmt::Debug for DeviceUuid { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.to_string()) + write!(f, "{}", self) } } diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index 068d844..664d255 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -121,7 +121,7 @@ impl Device { /// /// The majority of methods are the same as [`crate::cuda::Program`], so you can write code using this /// API, which will then work with OpenCL as well as CUDA kernels. -#[allow(broken_intra_doc_links)] +#[allow(rustdoc::broken_intra_doc_links)] pub struct Program { device_name: String, queue: CommandQueue, @@ -157,7 +157,7 @@ impl Program { "Building kernel ({}) from source: done.", cached.to_string_lossy() ); - let queue = CommandQueue::create(&context, context.default_device(), 0)?; + let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; let kernels_by_name = kernels .into_iter() @@ -185,13 +185,14 @@ impl Program { debug!("Creating OpenCL program from binary."); let context = Context::from_device(&device.device)?; let bins = vec![&bin[..]]; - let mut program = - opencl3::program::Program::create_from_binary(&context, context.devices(), &bins)?; + let mut program = unsafe { + opencl3::program::Program::create_from_binary(&context, context.devices(), &bins) + }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; return Err(GPUError::Opencl3(build_error, Some(log))); } - let queue = CommandQueue::create(&context, context.default_device(), 0)?; + let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; let kernels_by_name = kernels .into_iter() @@ -248,21 +249,25 @@ impl Program { // The underlying buffer is `u8`, hence we need the number of bytes. let bytes_len = length * std::mem::size_of::(); - let mut buffer = opencl3::memory::Buffer::create( - &self.context, - CL_MEM_READ_WRITE, - bytes_len, - ptr::null_mut(), - )?; + let mut buffer = unsafe { + opencl3::memory::Buffer::create( + &self.context, + CL_MEM_READ_WRITE, + bytes_len, + ptr::null_mut(), + )? + }; // Transmuting types is safe as long a sizes match. let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len) }; // Write some data right-away. This makes a significant performance different. - self.queue - .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &[0u8], &[])?; - self.queue - .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &bytes, &[])?; + unsafe { + self.queue + .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, &[0u8], &[])?; + self.queue + .enqueue_write_buffer(&mut buffer, CL_BLOCKING, 0, bytes, &[])?; + }; Ok(Buffer:: { buffer, @@ -287,7 +292,7 @@ impl Program { .kernels_by_name .get(name) .ok_or_else(|| GPUError::KernelNotFound(name.to_string()))?; - let mut builder = ExecuteKernel::new(&kernel); + let mut builder = ExecuteKernel::new(kernel); builder.set_global_work_size(global_work_size * local_work_size); builder.set_local_work_size(local_work_size); Ok(Kernel { @@ -314,9 +319,10 @@ impl Program { data.len() * std::mem::size_of::(), ) }; - self.queue - .enqueue_write_buffer(&mut buffer.buffer, CL_BLOCKING, 0, &bytes, &[])?; - + unsafe { + self.queue + .enqueue_write_buffer(&mut buffer.buffer, CL_BLOCKING, 0, bytes, &[])?; + } Ok(()) } @@ -325,15 +331,16 @@ impl Program { assert!(data.len() <= buffer.length, "Buffer is too small"); // It is safe as long as the sizes match. - let mut bytes = unsafe { + let bytes = unsafe { std::slice::from_raw_parts_mut( data.as_mut_ptr() as *mut T as *mut u8, data.len() * std::mem::size_of::(), ) }; - self.queue - .enqueue_read_buffer(&buffer.buffer, CL_BLOCKING, 0, &mut bytes, &[])?; - + unsafe { + self.queue + .enqueue_read_buffer(&buffer.buffer, CL_BLOCKING, 0, bytes, &[])?; + }; Ok(()) } @@ -362,27 +369,35 @@ pub trait KernelArgument { impl KernelArgument for Buffer { fn push(&self, kernel: &mut Kernel) { - kernel.builder.set_arg(&self.buffer); + unsafe { + kernel.builder.set_arg(&self.buffer); + } } } impl KernelArgument for i32 { fn push(&self, kernel: &mut Kernel) { - kernel.builder.set_arg(self); + unsafe { + kernel.builder.set_arg(self); + } } } impl KernelArgument for u32 { fn push(&self, kernel: &mut Kernel) { - kernel.builder.set_arg(self); + unsafe { + kernel.builder.set_arg(self); + } } } impl KernelArgument for LocalBuffer { fn push(&self, kernel: &mut Kernel) { - kernel - .builder - .set_arg_local_buffer(self.length * std::mem::size_of::()); + unsafe { + kernel + .builder + .set_arg_local_buffer(self.length * std::mem::size_of::()); + } kernel.num_local_buffers += 1; } } @@ -431,7 +446,9 @@ impl<'a> Kernel<'a> { "There cannot be more than one `LocalBuffer`.".to_string(), )); } - self.builder.enqueue_nd_range(&self.queue)?; + unsafe { + self.builder.enqueue_nd_range(self.queue)?; + } Ok(()) } }