// Copyright (c) 2021 Via Technology Ltd. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #![cfg(feature = "CL_VERSION_2_0")] extern crate opencl3; use cl3::device::{ CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, CL_DEVICE_TYPE_ALL, CL_DEVICE_TYPE_GPU, }; use opencl3::command_queue::CommandQueue; use opencl3::context::Context; use opencl3::device::Device; use opencl3::kernel::{create_program_kernels, ExecuteKernel, Kernel}; use opencl3::platform::get_platforms; use opencl3::program::{Program, CL_STD_2_0}; use opencl3::svm::SvmVec; use opencl3::types::cl_int; use opencl3::Result; use std::ptr; // The OpenCL kernels in PROGRAM_SOURCE below use built-in work-group functions: // work_group_reduce_add, work_group_scan_inclusive_add and work_group_broadcast // which were introduced in OpenCL 2.0. const PROGRAM_SOURCE: &str = r#" kernel void sum_int (global int* sums, global int const* values) { int value = work_group_reduce_add(values[get_global_id(0)]); if (0u == get_local_id(0)) sums[get_group_id(0)] = value; } kernel void inclusive_scan_int (global int* output, global int const* values) { int sum = 0; size_t lid = get_local_id(0); size_t lsize = get_local_size(0); size_t num_groups = get_num_groups(0); for (size_t i = 0u; i < num_groups; ++i) { size_t lidx = i * lsize + lid; int value = work_group_scan_inclusive_add(values[lidx]); output[lidx] = sum + value; sum += work_group_broadcast(value, lsize - 1); } }"#; const SUM_KERNEL_NAME: &str = "sum_int"; const INCLUSIVE_SCAN_KERNEL_NAME: &str = "inclusive_scan_int"; #[test] #[ignore] fn test_opencl_2_kernel_example() -> Result<()> { let platforms = get_platforms()?; assert!(0 < platforms.len()); ///////////////////////////////////////////////////////////////////// // Query OpenCL compute environment let opencl_2: &str = "OpenCL 2"; let opencl_3: &str = "OpenCL 3"; // Find an OpenCL fine grained SVM, platform and device let mut device_id = ptr::null_mut(); let mut is_fine_grained_svm: bool = false; for p in platforms { let platform_version = p.version()?; if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) { let devices = p .get_devices(CL_DEVICE_TYPE_GPU) .expect("Platform::get_devices failed"); for dev_id in devices { let device = Device::new(dev_id); let svm_mem_capability = device.svm_mem_capability(); is_fine_grained_svm = 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; if is_fine_grained_svm { device_id = dev_id; break; } } } } if is_fine_grained_svm { // Create OpenCL context from the OpenCL svm device let device = Device::new(device_id); let vendor = device.vendor()?; let vendor_id = device.vendor_id()?; println!("OpenCL device vendor name: {}", vendor); println!("OpenCL device vendor id: {:X}", vendor_id); ///////////////////////////////////////////////////////////////////// // Initialise OpenCL compute environment // Create a Context on the OpenCL device let context = Context::from_device(&device).expect("Context::from_device failed"); // Build the OpenCL program source. let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0) .expect("Program::create_and_build_from_source failed"); // Create the kernels from the OpenCL program source. let kernels = create_program_kernels(&program)?; assert!(0 < kernels.len()); let kernel_0_name = kernels[0].function_name()?; println!("OpenCL kernel_0_name: {}", kernel_0_name); let sum_kernel = if SUM_KERNEL_NAME == kernel_0_name { &kernels[0] } else { &kernels[1] }; let inclusive_scan_kernel = if INCLUSIVE_SCAN_KERNEL_NAME == kernel_0_name { &kernels[0] } else { &kernels[1] }; // Create a command_queue on the Context's device let queue = CommandQueue::create_default_with_properties(&context, 0, 0) .expect("CommandQueue::create_with_properties failed"); // Get the svm capability of all the devices in the context. let svm_capability = context.get_svm_mem_capability(); assert!(0 < svm_capability); // Create SVM vectors for the input and output data // The input data const ARRAY_SIZE: usize = 8; let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2]; // Copy into an OpenCL SVM vector let mut test_values = SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); test_values.copy_from_slice(&value_array); // Make test_values immutable let test_values = test_values; // The output data, an OpenCL SVM vector let mut results = SvmVec::::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed"); // Run the sum kernel on the input data let sum_kernel_event = unsafe { ExecuteKernel::new(sum_kernel) .set_arg_svm(results.as_mut_ptr()) .set_arg_svm(test_values.as_ptr()) .set_global_work_size(ARRAY_SIZE) .enqueue_nd_range(&queue)? }; // Wait for the kernel to complete execution on the device sum_kernel_event.wait()?; // Can access OpenCL SVM directly, no need to map or read the results println!("sum results: {:?}", results); assert_eq!(33, results[0]); assert_eq!(0, results[ARRAY_SIZE - 1]); // Run the inclusive scan kernel on the input data let kernel_event = unsafe { ExecuteKernel::new(inclusive_scan_kernel) .set_arg_svm(results.as_mut_ptr()) .set_arg_svm(test_values.as_ptr()) .set_global_work_size(ARRAY_SIZE) .enqueue_nd_range(&queue)? }; kernel_event.wait()?; println!("inclusive_scan results: {:?}", results); assert_eq!(value_array[0], results[0]); assert_eq!(33, results[ARRAY_SIZE - 1]); } else { println!("OpenCL fine grained SVM capable device not found"); } Ok(()) } #[test] #[ignore] fn test_opencl_2_system_svm_example() -> Result<()> { let platforms = get_platforms()?; assert!(0 < platforms.len()); ///////////////////////////////////////////////////////////////////// // Query OpenCL compute environment let opencl_2: &str = "OpenCL 2"; let opencl_3: &str = "OpenCL 3"; // Find an OpenCL fine grained SVM, platform and device let mut device_id = ptr::null_mut(); let mut is_fine_grained_system_svm: bool = false; for p in platforms { let platform_version = p.version()?; if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) { let devices = p .get_devices(CL_DEVICE_TYPE_ALL) .expect("Platform::get_devices failed"); for dev_id in devices { let device = Device::new(dev_id); let svm_mem_capability = device.svm_mem_capability(); is_fine_grained_system_svm = 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; if is_fine_grained_system_svm { device_id = dev_id; break; } } } } if is_fine_grained_system_svm { // Create OpenCL context from the OpenCL svm device let device = Device::new(device_id); let vendor = device.vendor().expect("Device.vendor failed"); let vendor_id = device.vendor_id().expect("Device.vendor_id failed"); println!("OpenCL device vendor name: {}", vendor); println!("OpenCL device vendor id: {:X}", vendor_id); ///////////////////////////////////////////////////////////////////// // Initialise OpenCL compute environment // Create a Context on the OpenCL svm device let context = Context::from_device(&device).expect("Context::from_device failed"); // Build the OpenCL program source and create the kernel. let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") .expect("Program::create_and_build_from_source failed"); let kernel = Kernel::create(&program, SUM_KERNEL_NAME).expect("Kernel::create failed"); // Create a command_queue on the Context's device let queue = CommandQueue::create_default_with_properties(&context, 0, 0) .expect("CommandQueue::create_default_with_properties failed"); // The input data const ARRAY_SIZE: usize = 8; let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2]; // Copy into an OpenCL SVM vector let mut test_values = SvmVec::::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed"); test_values.copy_from_slice(&value_array); // Make test_values immutable let test_values = test_values; // The output data, an OpenCL SVM vector let mut results = SvmVec::::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed"); // Run the sum kernel on the input data let sum_kernel_event = unsafe { ExecuteKernel::new(&kernel) .set_arg_svm(results.as_mut_ptr()) .set_arg_svm(test_values.as_ptr()) .set_global_work_size(ARRAY_SIZE) .enqueue_nd_range(&queue)? }; // Wait for the kernel to complete execution on the device sum_kernel_event.wait()?; // Can access OpenCL SVM directly, no need to map or read the results println!("sum results: {:?}", results); assert_eq!(33, results[0]); assert_eq!(0, results[ARRAY_SIZE - 1]); } else { println!("OpenCL fine grained system SVM device not found") } Ok(()) }