use std::cmp::max; use std::cmp::min; use std::ffi::c_void; use std::ffi::CStr; use std::intrinsics::copy_nonoverlapping; use std::mem::size_of; use std::mem::MaybeUninit; use std::os::raw::c_char; use std::rc::Rc; use std::slice; use log::error; use log::info; use log::trace; use log::warn; use std::{ffi::CString, io::Read, slice::Windows}; use widestring::WideCStr; use cgmath::{prelude::*, Vector2, Vector4}; use cgmath::{Matrix4, Vector3}; use memoffset::offset_of; use rand::Rng; use winit::{ event::{Event, WindowEvent}, event_loop::{ControlFlow, EventLoop}, platform::windows::WindowExtWindows, window::WindowBuilder, }; use static_assertions::assert_eq_size; use rusty_d3d12::*; fn wait_for_debugger() { while unsafe { winapi::um::debugapi::IsDebuggerPresent() } == 0 { std::thread::sleep_ms(1000); } } #[no_mangle] pub static D3D12SDKVersion: u32 = 600; #[no_mangle] pub static D3D12SDKPath: &[u8; 9] = b".\\D3D12\\\0"; #[allow(clippy::not_unsafe_ptr_arg_deref)] #[no_mangle] pub extern "C" fn debug_callback( category: i32, severity: i32, id: i32, description: *const c_char, _context: *mut c_void, ) { let severity: MessageSeverity = unsafe { std::mem::transmute(severity) }; let category: MessageCategory = unsafe { std::mem::transmute(category) }; let description = unsafe { CStr::from_ptr(description) }; match severity { MessageSeverity::Message | MessageSeverity::Info => { info!( "[D3D12 Message][{}][{}][{:#x}] {}", severity, category, id as i32, description .to_str() .expect("Cannot make Rust string from D3D12 layer message") ); } MessageSeverity::Warning => { warn!( "[D3D12 Message][{}][{}][{:#x}] {}", severity, category, id as i32, description .to_str() .expect("Cannot make Rust string from D3D12 layer message") ); } _ => { error!( "[D3D12 Message][{}][{}][{:#x}] {}", severity, category, id as i32, description .to_str() .expect("Cannot make Rust string from D3D12 layer message") ); } } } const DEVICE_COUNT: usize = 2; const WINDOW_WIDTH: u32 = 640; const WINDOW_HEIGHT: u32 = 480; const FRAMES_IN_FLIGHT: usize = 3; const USE_DEBUG: bool = true; const USE_WARP_ADAPTER: bool = false; const MAX_TRIANGLE_COUNT: u32 = 15000; const TRIANGLE_HALF_WIDTH: f32 = 0.025; const TRIANGLE_DEPTH: f32 = 1.; const CLEAR_COLOR: [f32; 4] = [0.0, 0.2, 0.3, 1.0]; const MOVING_AVERAGE_FRAME_COUNT: usize = 20; const TIMESTAMP_PRINT_FREQUENCY: usize = 200; const ALLOW_DRAW_DYNAMIC_WORKLOAD: bool = false; const ALLOW_SHADER_DYNAMIC_WORKLOAD: bool = false; type Mat4 = Matrix4; type Vec3 = Vector3; type Vec4 = Vector4; type Vec2 = Vector2; #[derive(Debug, Copy, Clone)] pub struct Degrees(pub f32); // ToDo: move all this to utils since it's duplicated in multiple examples #[derive(Debug, Copy, Clone)] pub struct Camera { pub near: f32, pub far: f32, pub fov: Degrees, pub aspect: f32, pub position: Vec3, pub look_at: Vec3, } impl Default for Camera { fn default() -> Self { Self { near: 0.01, far: 100.0, fov: Degrees(45.), aspect: WINDOW_WIDTH as f32 / WINDOW_HEIGHT as f32, // position: Vec3::new(0., 0.5, -1.), position: Vec3::new(0., 0., -1.), look_at: Vec3::new(0., 0., 1.), } } } fn make_projection_matrix(camera: &Camera) -> Mat4 { use cgmath::prelude::*; Matrix4::from_cols( Vec4 { x: 1. / (camera.aspect * cgmath::Deg(camera.fov.0 / 2.).tan()), y: 0., z: 0., w: 0., }, Vec4 { x: 0., y: 1. / cgmath::Deg(camera.fov.0 / 2.).tan(), z: 0., w: 0., }, Vec4 { x: 0., y: 0., z: camera.far / (camera.far - camera.near), w: 1., }, Vec4 { x: 0., y: 0., z: -camera.near * camera.far / (camera.far - camera.near), w: 0., }, ) } fn make_view_matrix(camera_pos: Vec3, look_at: Vec3) -> Mat4 { let cam_k = (look_at - camera_pos).normalize(); let wrld_up = Vec3::new(0., 1., 0.); let cam_i = wrld_up.cross(cam_k).normalize(); let cam_j = cam_k.cross(cam_i); let orientation = Matrix4::from_cols( cam_i.extend(0.), cam_j.extend(0.), cam_k.extend(0.), Vec4::new(0., 0., 0., 1.), ); // trace!("orientation matrix: {:?}", &orientation); let translation = Matrix4::from_cols( Vec4::new(1., 0., 0., 0.), Vec4::new(0., 1., 0., 0.), Vec4::new(0., 0., 1., 0.), Vec4::new(camera_pos[0], camera_pos[1], camera_pos[2], 1.), ); let result = translation * orientation; result.invert().expect("No matrix inverse") } #[repr(C)] pub struct Vertex { position: Vec3, } impl Vertex { pub fn make_desc() -> Vec> { vec![InputElementDesc::default() .with_semantic_name("POSITION") .unwrap() .with_format(Format::R32G32B32Float) .with_input_slot(0) .with_aligned_byte_offset(ByteCount::from(offset_of!(Self, position)))] } } #[repr(C)] pub struct BlurVertex { position: Vec4, uv: Vec2, } impl BlurVertex { pub fn make_desc() -> Vec> { vec![ InputElementDesc::default() .with_semantic_name("POSITION") .unwrap() .with_format(Format::R32G32B32Float) .with_input_slot(0) .with_aligned_byte_offset(ByteCount::from(offset_of!(Self, position))), InputElementDesc::default() .with_semantic_name("TEXCOORD") .unwrap() .with_format(Format::R32G32Float) .with_input_slot(0) .with_aligned_byte_offset(ByteCount::from(offset_of!(Self, uv))), ] } } #[repr(C)] struct SceneConstantBuffer { velocity: Vec4, offset: Vec4, color: Vec4, projection: Matrix4, // Constant buffers are 256-byte aligned. Add padding in the struct to allow multiple buffers // to be array-indexed. padding: [f32; 36], } static_assertions::const_assert!(size_of::() == 256); #[repr(C)] struct BlurConstantBufferData { texture_dimensions: Vec2, offset: f32, padding: [f32; 61], } static_assertions::const_assert!(size_of::() == 256); #[repr(C)] struct WorkloadConstantBufferData { loop_count: u32, padding: [f32; 63], } static_assertions::const_assert!( size_of::() == 256 ); struct HeterogeneousMultiadapterSample { pipeline: Pipeline, } impl HeterogeneousMultiadapterSample { fn new(hwnd: *mut std::ffi::c_void) -> Self { let mut pipeline = Pipeline::new(hwnd); pipeline.update(); pipeline.render(); HeterogeneousMultiadapterSample { pipeline } } fn draw(&mut self) { self.pipeline.update(); self.pipeline.render(); } } impl Drop for HeterogeneousMultiadapterSample { fn drop(&mut self) { if USE_DEBUG { self.pipeline .debug_devices .as_ref() .expect("No debug devices created")[0] .report_live_device_objects() .expect("Device cannot report live objects"); self.pipeline .debug_devices .as_ref() .expect("No debug devices created")[1] .report_live_device_objects() .expect("Device cannot report live objects"); } } } struct Pipeline { devices: [Device; DEVICE_COUNT], debug_devices: Option<[DebugDevice; DEVICE_COUNT]>, info_queues: Option<[Rc; DEVICE_COUNT]>, direct_command_queues: [CommandQueue; DEVICE_COUNT], direct_command_queue_timestamp_frequencies: [u64; DEVICE_COUNT], copy_command_queue: CommandQueue, swapchain: Swapchain, frame_index: usize, frames_since_last_update: u32, // static var in OnUpdate() viewport: Viewport, scissor_rect: Rect, triangle_count: u32, rtv_heaps: [DescriptorHeap; DEVICE_COUNT], rtv_descriptor_handle_sizes: [ByteCount; DEVICE_COUNT], dsv_heap: DescriptorHeap, cbv_srv_heap: DescriptorHeap, cbv_srv_descriptor_handle_sizes: [ByteCount; DEVICE_COUNT], timestamp_result_buffers: [Resource; DEVICE_COUNT], current_times_index: usize, query_heaps: [QueryHeap; DEVICE_COUNT], render_targets: [[Resource; FRAMES_IN_FLIGHT]; DEVICE_COUNT], direct_command_allocators: [[CommandAllocator; FRAMES_IN_FLIGHT]; DEVICE_COUNT], copy_allocators: [CommandAllocator; FRAMES_IN_FLIGHT], cross_adapter_textures_supported: bool, heap_primary: Heap, heap_secondary: Heap, cross_adapter_resources: [[Resource; FRAMES_IN_FLIGHT]; DEVICE_COUNT], secondary_adapter_textures: [Resource; FRAMES_IN_FLIGHT], intermediate_blur_render_target: Resource, root_signature: RootSignature, blur_root_signature: RootSignature, pipeline_state: PipelineState, blur_pipeline_states: [PipelineState; 2], direct_command_lists: [CommandList; DEVICE_COUNT], copy_command_list: CommandList, vertex_buffer: Resource, vertex_buffer_upload: Resource, vertex_buffer_view: VertexBufferView, quad_vertex_buffer: Resource, quad_vertex_buffer_upload: Resource, quad_vertex_buffer_view: VertexBufferView, depth_stencil: Resource, triangle_constant_buffer: Resource, triangle_cb_data: Vec, triangle_cb_mapped_data: *mut u8, ps_loop_count: u32, workload_constant_buffer: Resource, workload_cb_data: WorkloadConstantBufferData, workload_cb_mapped_data: *mut u8, blur_workload_constant_buffer: Resource, blur_workload_cb_data: WorkloadConstantBufferData, blur_workload_cb_mapped_data: *mut u8, blur_ps_loop_count: u32, blur_constant_buffer: Resource, frame_fence: Fence, render_fence: Fence, cross_adapter_fences: [Fence; DEVICE_COUNT], fence_events: [Win32Event; 2], current_present_fence_value: u64, current_render_fence_value: u64, current_cross_adapter_fence_value: u64, frame_fence_values: [u64; FRAMES_IN_FLIGHT], draw_times: [u64; MOVING_AVERAGE_FRAME_COUNT], blur_times: [u64; MOVING_AVERAGE_FRAME_COUNT], draw_time_moving_average: u64, blur_time_moving_average: u64, } impl Pipeline { // aka LoadPipeline() in the original sample fn new(hwnd: *mut c_void) -> Self { let mut factory_flags = CreateFactoryFlags::None; if USE_DEBUG { let debug_controller = Debug::new().expect("Cannot create debug controller"); debug_controller.enable_debug_layer(); debug_controller.enable_gpu_based_validation(); debug_controller.enable_object_auto_name(); factory_flags = CreateFactoryFlags::Debug; } let factory = Factory::new(factory_flags).expect("Cannot create factory"); let (devices, is_software_adapter) = create_devices(&factory); let debug_devices; if USE_DEBUG { let mut temp_debug_devices: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { temp_debug_devices[device_idx] = MaybeUninit::new( DebugDevice::new(&devices[device_idx]) .expect("Cannot create debug device"), ); } debug_devices = Some(unsafe { std::mem::transmute(temp_debug_devices) }); } else { debug_devices = None; } let info_queues; if USE_DEBUG { let mut temp_info_queues: [MaybeUninit>; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { let info_queue = Rc::from( InfoQueue::new( &devices[device_idx], // Some(&[ // MessageSeverity::Corruption, // MessageSeverity::Error, // MessageSeverity::Warning, // ]), None, ) .expect("Cannot create debug info queue"), ); #[cfg(feature = "debug_callback")] info_queue .register_callback( debug_callback, MessageCallbackFlags::FlagNone, ) .expect("Cannot set debug callback on info queue"); temp_info_queues[device_idx] = MaybeUninit::new(info_queue); } info_queues = Some(unsafe { std::mem::transmute(temp_info_queues) }); } else { info_queues = None; } let mut direct_command_queues: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; let mut direct_command_queue_timestamp_frequencies: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { direct_command_queues[device_idx] = MaybeUninit::new( devices[device_idx] .create_command_queue( &CommandQueueDesc::default() .with_queue_type(CommandListType::Direct), ) .expect("Cannot create direct command queue"), ); } let direct_command_queues: [CommandQueue; DEVICE_COUNT] = unsafe { std::mem::transmute(direct_command_queues) }; for device_idx in 0..DEVICE_COUNT { direct_command_queue_timestamp_frequencies[device_idx] = MaybeUninit::new( direct_command_queues[device_idx] .get_timestamp_frequency() .expect("Cannot get queue timestamp frequency"), ); } let direct_command_queue_timestamp_frequencies: [u64; DEVICE_COUNT] = unsafe { std::mem::transmute(direct_command_queue_timestamp_frequencies) }; let copy_command_queue = devices[0] .create_command_queue( &CommandQueueDesc::default() .with_queue_type(CommandListType::Copy), ) .expect("Cannot create copy command queue"); let swapchain = create_swapchain(factory, &direct_command_queues[1], hwnd); let frame_index = swapchain.get_current_back_buffer_index() as usize; trace!("Swapchain returned frame index {}", frame_index); let viewport = Viewport::default() .with_top_left_x(0.) .with_top_left_y(0.) .with_width(WINDOW_WIDTH as f32) .with_height(WINDOW_HEIGHT as f32); let scissor_rect = Rect::default() .with_left(0) .with_top(0) .with_right(WINDOW_WIDTH as i32) .with_bottom(WINDOW_HEIGHT as i32); let (rtv_heaps, dsv_heap, cbv_srv_heap) = create_descriptor_heaps(&devices, &swapchain); let rtv_descriptor_handle_sizes = [ devices[0] .get_descriptor_handle_increment_size(DescriptorHeapType::Rtv), devices[1] .get_descriptor_handle_increment_size(DescriptorHeapType::Rtv), ]; let cbv_srv_descriptor_handle_sizes = [ devices[0].get_descriptor_handle_increment_size( DescriptorHeapType::CbvSrvUav, ), devices[1].get_descriptor_handle_increment_size( DescriptorHeapType::CbvSrvUav, ), ]; let (timestamp_result_buffers, query_heaps) = create_query_heaps(&devices); let (render_targets, direct_command_allocators, copy_allocators) = create_frame_resources( &devices, &rtv_heaps, &swapchain, rtv_descriptor_handle_sizes, ); let ( cross_adapter_textures_supported, texture_size, cross_adapter_desc, ) = create_shared_resource_descs(&devices); let heap_primary = devices[0] .create_heap( &HeapDesc::default() .with_properties( HeapProperties::default() .with_heap_type(HeapType::Default), ) .with_size_in_bytes(texture_size * FRAMES_IN_FLIGHT) .with_flags( HeapFlags::Shared | HeapFlags::SharedCrossAdapter, ), ) .expect("Cannot create heap"); let heap_as_dc: DeviceChild = heap_primary.clone().into(); let heap_handle = devices[0] .create_shared_handle(&heap_as_dc, "SharedHeapHandle") .expect("Cannot create shared heap handle"); let heap_secondary = devices[1] .open_shared_heap_handle(heap_handle) .expect("Cannot open shared heap handle"); heap_handle.close(); trace!("Successfully created and opened heaps"); let mut cross_adapter_resources: [[MaybeUninit; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; let mut secondary_adapter_textures: [MaybeUninit; FRAMES_IN_FLIGHT] = unsafe { MaybeUninit::uninit().assume_init() }; for frame_idx in 0..FRAMES_IN_FLIGHT { let resource_primary = devices[0] .create_placed_resource( &heap_primary, frame_idx * texture_size, &cross_adapter_desc, ResourceStates::CopyDest, None, ) .expect("Cannot create placed resource on primary adapter"); resource_primary .set_name(&format!("resource {} on primary device", frame_idx)) .expect("Cannot set resource name"); cross_adapter_resources[0][frame_idx] = MaybeUninit::new(resource_primary); let resource_secondary = devices[1] .create_placed_resource( &heap_secondary, frame_idx * texture_size, &cross_adapter_desc, match cross_adapter_textures_supported { true => ResourceStates::PixelShaderResource, false => ResourceStates::CopySource, }, None, ) .expect("Cannot create placed resource on secondary adapter"); resource_secondary .set_name(&format!( "resource {} on secondary device", frame_idx )) .expect("Cannot set resource name"); cross_adapter_resources[1][frame_idx] = MaybeUninit::new(resource_secondary); if !cross_adapter_textures_supported { let secondary_adapter_texture = devices[1] .create_committed_resource( &HeapProperties::default() .with_heap_type(HeapType::Default), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Texture2D) .with_format(Format::R8G8B8A8Unorm) .with_width(WINDOW_WIDTH.into()) .with_height(WINDOW_HEIGHT.into()) .with_flags(ResourceFlags::AllowRenderTarget), ResourceStates::Common, None, ) .expect("Cannot create render target on secondary adapter"); secondary_adapter_texture .set_name(&format!( "secondary_adapter_texture {} on secondary device", frame_idx )) .expect("Cannot set resource name"); secondary_adapter_textures[frame_idx] = MaybeUninit::new(secondary_adapter_texture); } } let cross_adapter_resources: [[Resource; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { std::mem::transmute(cross_adapter_resources) }; let secondary_adapter_textures: [Resource; FRAMES_IN_FLIGHT] = unsafe { std::mem::transmute(secondary_adapter_textures) }; let intermediate_blur_render_target; { let intermediate_render_target_desc = render_targets[0][0].get_desc(); intermediate_blur_render_target = devices[1] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Default), HeapFlags::None, &intermediate_render_target_desc, ResourceStates::Common, None, ) .expect("Cannot create intermediate render target on secondary adapter"); intermediate_blur_render_target .set_name("intermediate_blur_render_target") .expect("Cannot set resource name"); let rtv_handle = rtv_heaps[1] .get_cpu_descriptor_handle_for_heap_start() .advance( FRAMES_IN_FLIGHT as u32, rtv_descriptor_handle_sizes[1], ); devices[1].create_render_target_view( &intermediate_blur_render_target, rtv_handle, ); } { let mut srv_handle = cbv_srv_heap.get_cpu_descriptor_handle_for_heap_start(); for frame_idx in 0..FRAMES_IN_FLIGHT { let resource = match cross_adapter_textures_supported { true => &cross_adapter_resources[1][frame_idx], false => &secondary_adapter_textures[frame_idx], }; devices[1] .create_shader_resource_view(resource, None, srv_handle); srv_handle = srv_handle.advance(1, cbv_srv_descriptor_handle_sizes[1]); } devices[1].create_shader_resource_view( &intermediate_blur_render_target, None, srv_handle, ); } // This block corresponds to LoadAssets() in the original sample let (root_signature, blur_root_signature) = create_root_signatures(&devices); trace!("Created root signatures"); let (pso, blur_pso_u, blur_pso_v) = create_psos(&devices, &root_signature, &blur_root_signature); let (direct_command_lists, copy_command_list) = create_command_lists( &devices, frame_index, &direct_command_allocators, ©_allocators, &pso, &blur_pso_u, ); trace!("Created command lists"); let (vertex_buffer, vertex_buffer_upload, vertex_buffer_view) = create_primary_vertex_buffer(&devices, &direct_command_lists); let ( quad_vertex_buffer, quad_vertex_buffer_upload, quad_vertex_buffer_view, ) = create_blur_vertex_buffer(&devices, &direct_command_lists); trace!("Created secondary adapter quad vertex buffer"); let depth_stencil = create_depth_stencil(&devices, &dsv_heap); trace!("Created depth stencil on primary adapter"); let ( triangle_constant_buffer, triangle_cb_data, triangle_cb_mapped_data, ) = create_triangle_constant_buffer(&devices); trace!("Created triangle constant buffer"); let ps_loop_count = 0; let ( workload_constant_buffer, workload_cb_data, workload_cb_mapped_data, ) = create_workload_constant_buffer(&devices, ps_loop_count); trace!("Created workload constant buffer"); let blur_ps_loop_count = 0; let ( blur_workload_constant_buffer, blur_workload_cb_data, blur_workload_cb_mapped_data, ) = create_blur_workload_constant_buffer(&devices, blur_ps_loop_count); trace!("Created blur workload constant buffer"); let blur_constant_buffer = create_blur_constant_buffer(&devices); trace!("Created blur constant buffer"); { for device_idx in 0..DEVICE_COUNT { direct_command_lists[device_idx] .close() .expect("Cannot close command list"); direct_command_queues[device_idx].execute_command_lists( slice::from_ref(&direct_command_lists[device_idx]), ); } } trace!("Executed command lists"); let ( frame_fence, render_fence, cross_adapter_fences, fence_events, cross_adapter_fence_value, ) = create_fences(&devices, &direct_command_queues); trace!("Created fences"); Self { devices, debug_devices, info_queues, direct_command_queues, direct_command_queue_timestamp_frequencies, copy_command_queue, swapchain, frame_index, frames_since_last_update: 0, viewport, scissor_rect, triangle_count: if is_software_adapter { MAX_TRIANGLE_COUNT / 50 } else { MAX_TRIANGLE_COUNT / 2 }, rtv_heaps, dsv_heap, cbv_srv_heap, timestamp_result_buffers, current_times_index: 0, query_heaps, render_targets, direct_command_allocators, copy_allocators, cross_adapter_textures_supported, heap_primary, heap_secondary, cross_adapter_resources, secondary_adapter_textures, intermediate_blur_render_target, root_signature, blur_root_signature, pipeline_state: pso, blur_pipeline_states: [blur_pso_u, blur_pso_v], direct_command_lists, copy_command_list, vertex_buffer, vertex_buffer_upload, vertex_buffer_view, quad_vertex_buffer, quad_vertex_buffer_upload, quad_vertex_buffer_view, depth_stencil, triangle_constant_buffer, triangle_cb_data, triangle_cb_mapped_data, ps_loop_count: 0, workload_constant_buffer, workload_cb_data, workload_cb_mapped_data, blur_workload_constant_buffer, blur_workload_cb_data, blur_workload_cb_mapped_data, blur_ps_loop_count: 0, blur_constant_buffer, frame_fence, render_fence, cross_adapter_fences, fence_events, current_present_fence_value: 1, current_render_fence_value: 1, current_cross_adapter_fence_value: cross_adapter_fence_value, frame_fence_values: [0; FRAMES_IN_FLIGHT], draw_times: [0; MOVING_AVERAGE_FRAME_COUNT], blur_times: [0; MOVING_AVERAGE_FRAME_COUNT], draw_time_moving_average: 0, blur_time_moving_average: 0, rtv_descriptor_handle_sizes, cbv_srv_descriptor_handle_sizes, } } fn populate_command_lists(&mut self) { // Command list to render target the triangles on the primary adapter. self.populate_primary_adapter_direct_command_list(); trace!("Populated direct command list on primary adapter"); // Command list to copy the render target to the shared heap on the primary adapter. self.populate_copy_command_list(); trace!("Populated copy command list"); // Command list to blur the render target and present. self.populate_secondary_adapter_command_list(); trace!("Populated direct command list on secondary adapter"); } fn populate_secondary_adapter_command_list(&mut self) { let adapter_idx = 1usize; // secondary self.direct_command_allocators[adapter_idx][self.frame_index] .reset() .expect( "Cannot reset direct command allocator on secondary adapter", ); self.direct_command_lists[adapter_idx] .reset( &self.direct_command_allocators[adapter_idx][self.frame_index], Some(&self.blur_pipeline_states[0]), ) .expect("Cannot reset direct command list on secondary adapter"); if !self.cross_adapter_textures_supported { self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.secondary_adapter_textures[self.frame_index], ) .with_state_before(ResourceStates::PixelShaderResource) .with_state_after(ResourceStates::CopyDest), )), ); let secondary_adapter_texture_desc = self.secondary_adapter_textures[self.frame_index].get_desc(); let (texture_layout, _, _, _) = self.devices[adapter_idx] .get_copyable_footprints( &secondary_adapter_texture_desc, 0, 1, ByteCount(0), ); let dest = TextureCopyLocation::new_subresource_index( &self.secondary_adapter_textures[self.frame_index], 0, ); let src = TextureCopyLocation::new_placed_footprint( &self.cross_adapter_resources[adapter_idx][self.frame_index], texture_layout[0], ); let resource_box = Box::default() .with_left(0) .with_top(0) .with_right(WINDOW_WIDTH) .with_bottom(WINDOW_HEIGHT); self.direct_command_lists[adapter_idx].copy_texture_region( dest, 0, 0, 0, src, Some(&resource_box), ); self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.secondary_adapter_textures[self.frame_index], ) .with_state_before(ResourceStates::CopyDest) .with_state_after(ResourceStates::PixelShaderResource), )), ); } let timestamp_heap_index = 2 * self.frame_index; self.direct_command_lists[adapter_idx].end_query( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32, ); self.direct_command_lists[adapter_idx] .set_graphics_root_signature(&self.blur_root_signature); self.direct_command_lists[adapter_idx] .set_descriptor_heaps(slice::from_ref(&self.cbv_srv_heap)); self.direct_command_lists[adapter_idx] .set_viewports(slice::from_ref(&self.viewport)); self.direct_command_lists[adapter_idx] .set_scissor_rects(slice::from_ref(&self.scissor_rect)); self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource(&self.intermediate_blur_render_target) .with_state_before(ResourceStates::PixelShaderResource) .with_state_after(ResourceStates::RenderTarget), )), ); self.direct_command_lists[adapter_idx] .set_primitive_topology(PrimitiveTopology::TriangleStrip); self.direct_command_lists[adapter_idx].set_vertex_buffers( 0, slice::from_ref(&self.quad_vertex_buffer_view), ); self.direct_command_lists[adapter_idx] .set_graphics_root_constant_buffer_view( 0, self.blur_constant_buffer.get_gpu_virtual_address(), ); self.direct_command_lists[adapter_idx] .set_graphics_root_constant_buffer_view( 2, GpuVirtualAddress( self.blur_workload_constant_buffer .get_gpu_virtual_address() .0 + (self.frame_index * size_of::()) as u64, ), ); // Draw the fullscreen quad - Blur pass #1. { let srv_handle = self .cbv_srv_heap .get_gpu_descriptor_handle_for_heap_start() .advance( self.frame_index as u32, self.cbv_srv_descriptor_handle_sizes[adapter_idx], ); self.direct_command_lists[adapter_idx] .set_graphics_root_descriptor_table(1, srv_handle); let rtv_handle = self.rtv_heaps[adapter_idx] .get_cpu_descriptor_handle_for_heap_start() .advance( FRAMES_IN_FLIGHT as u32, self.rtv_descriptor_handle_sizes[adapter_idx], ); self.direct_command_lists[adapter_idx].set_render_targets( slice::from_ref(&rtv_handle), false, None, ); self.direct_command_lists[adapter_idx].draw_instanced(4, 1, 0, 0); } // Draw the fullscreen quad - Blur pass #2. { self.direct_command_lists[adapter_idx] .set_pipeline_state(&self.blur_pipeline_states[1]); let barriers = [ ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.render_targets[adapter_idx][self.frame_index], ) .with_state_before(ResourceStates::Common) .with_state_after(ResourceStates::RenderTarget), ), ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource(&self.intermediate_blur_render_target) .with_state_before(ResourceStates::RenderTarget) .with_state_after(ResourceStates::PixelShaderResource), ), ]; self.direct_command_lists[adapter_idx].resource_barrier(&barriers); let srv_handle = self .cbv_srv_heap .get_gpu_descriptor_handle_for_heap_start() .advance( FRAMES_IN_FLIGHT as u32, self.cbv_srv_descriptor_handle_sizes[adapter_idx], ); self.direct_command_lists[adapter_idx] .set_graphics_root_descriptor_table(1, srv_handle); let rtv_handle = self.rtv_heaps[adapter_idx] .get_cpu_descriptor_handle_for_heap_start() .advance( self.frame_index as u32, self.rtv_descriptor_handle_sizes[adapter_idx], ); self.direct_command_lists[adapter_idx].set_render_targets( slice::from_ref(&rtv_handle), false, None, ); self.direct_command_lists[adapter_idx].draw_instanced(4, 1, 0, 0); } self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.render_targets[adapter_idx][self.frame_index], ) .with_state_before(ResourceStates::RenderTarget) .with_state_after(ResourceStates::Common), )), ); self.direct_command_lists[adapter_idx].end_query( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32 + 1, ); self.direct_command_lists[adapter_idx].resolve_query_data( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32, 2, &self.timestamp_result_buffers[adapter_idx], ByteCount::from(timestamp_heap_index * size_of::()), ); self.direct_command_lists[adapter_idx] .close() .expect("Cannot close command list on secondary adapter"); } fn populate_copy_command_list(&mut self) { let adapter_idx = 0usize; self.copy_allocators[self.frame_index] .reset() .expect("Cannot reset copy command allocator on primary device"); self.copy_command_list .reset(&self.copy_allocators[self.frame_index], None) .expect("Cannot reset copy command list on primary adapter"); if self.cross_adapter_textures_supported { self.copy_command_list.copy_resource( &self.render_targets[adapter_idx][self.frame_index], &self.cross_adapter_resources[adapter_idx][self.frame_index], ); } else { let render_target_desc = self.render_targets[adapter_idx][self.frame_index].get_desc(); let (render_target_layout, _, _, _) = self.devices[adapter_idx] .get_copyable_footprints( &render_target_desc, 0, 1, ByteCount(0), ); let dest = TextureCopyLocation::new_placed_footprint( &self.cross_adapter_resources[adapter_idx][self.frame_index], render_target_layout[0], ); let src = TextureCopyLocation::new_subresource_index( &self.render_targets[adapter_idx][self.frame_index], 0, ); let resource_box = Box::default() .with_left(0) .with_top(0) .with_right(u32::from(WINDOW_WIDTH)) .with_bottom(u32::from(WINDOW_HEIGHT)); self.copy_command_list.copy_texture_region( dest, 0, 0, 0, src, Some(&resource_box), ); } self.copy_command_list .close() .expect("Cannot close copy command list"); } fn populate_primary_adapter_direct_command_list(&mut self) { let adapter_idx = 0usize; // primary self.direct_command_allocators[adapter_idx][self.frame_index] .reset() .expect("Cannot reset direct command allocator on primary device"); self.direct_command_lists[adapter_idx] .reset( &self.direct_command_allocators[adapter_idx][self.frame_index], Some(&self.pipeline_state), ) .expect("Cannot reset direct command list on primary adapter"); let timestamp_heap_index = 2 * self.frame_index; self.direct_command_lists[adapter_idx].end_query( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32, ); self.direct_command_lists[adapter_idx] .set_graphics_root_signature(&self.root_signature); self.direct_command_lists[adapter_idx] .set_viewports(slice::from_ref(&self.viewport)); self.direct_command_lists[adapter_idx] .set_scissor_rects(slice::from_ref(&self.scissor_rect)); self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.render_targets[adapter_idx][self.frame_index], ) .with_state_before(ResourceStates::Common) .with_state_after(ResourceStates::RenderTarget), )), ); let rtv_handle = self.rtv_heaps[adapter_idx] .get_cpu_descriptor_handle_for_heap_start() .advance( self.frame_index as u32, self.rtv_descriptor_handle_sizes[adapter_idx], ); let dsv_handle = self.dsv_heap.get_cpu_descriptor_handle_for_heap_start(); self.direct_command_lists[adapter_idx].set_render_targets( slice::from_ref(&rtv_handle), false, Some(dsv_handle), ); self.direct_command_lists[adapter_idx].clear_render_target_view( rtv_handle, CLEAR_COLOR, &[], ); self.direct_command_lists[adapter_idx].clear_depth_stencil_view( dsv_handle, ClearFlags::Depth, 1., 0, &[], ); self.direct_command_lists[adapter_idx] .set_primitive_topology(PrimitiveTopology::TriangleStrip); self.direct_command_lists[adapter_idx] .set_vertex_buffers(0, slice::from_ref(&self.vertex_buffer_view)); self.direct_command_lists[adapter_idx] .set_graphics_root_constant_buffer_view( 1, GpuVirtualAddress( self.workload_constant_buffer.get_gpu_virtual_address().0 + (self.frame_index * size_of::()) as u64, ), ); let cb_virtual_address = self.triangle_constant_buffer.get_gpu_virtual_address(); for tri_idx in 0..self.triangle_count { let cb_location = GpuVirtualAddress( cb_virtual_address.0 + (self.frame_index * MAX_TRIANGLE_COUNT as usize * size_of::()) as u64 + (tri_idx as usize * size_of::()) as u64, ); self.direct_command_lists[adapter_idx] .set_graphics_root_constant_buffer_view(0, cb_location); self.direct_command_lists[adapter_idx].draw_instanced(3, 1, 0, 0); } self.direct_command_lists[adapter_idx].resource_barrier( slice::from_ref(&ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource( &self.render_targets[adapter_idx][self.frame_index], ) .with_state_before(ResourceStates::RenderTarget) .with_state_after(ResourceStates::Common), )), ); self.direct_command_lists[adapter_idx].end_query( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32 + 1, ); self.direct_command_lists[adapter_idx].resolve_query_data( &self.query_heaps[adapter_idx], QueryType::Timestamp, timestamp_heap_index as u32, 2, &self.timestamp_result_buffers[adapter_idx], ByteCount::from(timestamp_heap_index * size_of::()), ); self.direct_command_lists[adapter_idx] .close() .expect("Cannot close command list"); } fn render(&mut self) { self.populate_command_lists(); self.execute_command_lists(); self.swapchain .present(1, PresentFlags::None) .expect("Cannot present"); self.direct_command_queues[1] .signal(&self.frame_fence, self.current_present_fence_value) .expect("Cannot signal on queue"); self.frame_fence_values[self.frame_index] = self.current_present_fence_value; self.current_present_fence_value += 1; self.move_to_next_frame(); } fn move_to_next_frame(&mut self) { { self.frame_index = self.swapchain.get_current_back_buffer_index() as usize; let completed_fence_value = self.frame_fence.get_completed_value(); if completed_fence_value < self.frame_fence_values[self.frame_index] { self.frame_fence .set_event_on_completion( self.frame_fence_values[self.frame_index], &self.fence_events[1], ) .expect("Cannot set fence event"); self.fence_events[1].wait(None); } } } fn execute_command_lists(&mut self) { { self.direct_command_queues[0].execute_command_lists( slice::from_ref(&self.direct_command_lists[0]), ); self.direct_command_queues[0] .signal(&self.render_fence, self.current_render_fence_value) .expect("Cannot signal direct command queue 0"); } { self.copy_command_queue .wait(&self.render_fence, self.current_render_fence_value) .expect("Cannot wait on fence"); self.current_render_fence_value += 1; self.copy_command_queue .execute_command_lists(slice::from_ref( &self.copy_command_list, )); self.copy_command_queue .signal( &self.cross_adapter_fences[0], self.current_cross_adapter_fence_value, ) .expect("Cannot signal copy command queue"); } { self.direct_command_queues[1] .wait( &self.cross_adapter_fences[1], self.current_cross_adapter_fence_value, ) .expect("Cannot wait on fence"); self.current_cross_adapter_fence_value += 1; self.direct_command_queues[1].execute_command_lists( slice::from_ref(&self.direct_command_lists[1]), ); } } fn update(&mut self) { self.get_timestamps_data(); trace!("Got timestamp data"); self.adjust_workload_sizes(); trace!("Adjusted workloads"); self.update_workload_constant_buffers(); trace!("Updated workload constant buffers"); self.update_scene_constant_buffer(); trace!("Updated scene constant buffer"); } fn update_scene_constant_buffer(&mut self) { let offset_bounds = 2.5f32; let mut rng = rand::thread_rng(); for tri_idx in 0..self.triangle_count as usize { self.triangle_cb_data[tri_idx].offset.x += self.triangle_cb_data[tri_idx].velocity.x; if self.triangle_cb_data[tri_idx].offset.x > offset_bounds { self.triangle_cb_data[tri_idx].velocity.x = rng.gen_range(0.01..0.02); self.triangle_cb_data[tri_idx].offset.x = -offset_bounds; } } let dest = unsafe { self.triangle_cb_mapped_data.offset( self.frame_index as isize * MAX_TRIANGLE_COUNT as isize * size_of::() as isize, ) as *mut SceneConstantBuffer }; unsafe { copy_nonoverlapping( self.triangle_cb_data.as_ptr(), dest, self.triangle_count as usize, ) }; } fn update_workload_constant_buffers(&mut self) { { let workload_dst = unsafe { self.workload_cb_mapped_data.add(self.frame_index) as *mut WorkloadConstantBufferData }; self.workload_cb_data.loop_count = self.ps_loop_count; let workload_src = &self.workload_cb_data as *const WorkloadConstantBufferData; unsafe { copy_nonoverlapping(workload_src, workload_dst, 1); } let blur_workload_dst = unsafe { self.blur_workload_cb_mapped_data.add(self.frame_index) as *mut WorkloadConstantBufferData }; self.blur_workload_cb_data.loop_count = self.blur_ps_loop_count; let blur_workload_src = &self.blur_workload_cb_data as *const WorkloadConstantBufferData; unsafe { copy_nonoverlapping(blur_workload_src, blur_workload_dst, 1); } } } fn adjust_workload_sizes(&mut self) { self.frames_since_last_update += 1; if self.frames_since_last_update > MOVING_AVERAGE_FRAME_COUNT as u32 { self.draw_time_moving_average = 0; self.blur_time_moving_average = 0; for frame_idx in 0..MOVING_AVERAGE_FRAME_COUNT { self.draw_time_moving_average += self.draw_times[frame_idx]; self.blur_time_moving_average += self.blur_times[frame_idx]; } self.draw_time_moving_average /= MOVING_AVERAGE_FRAME_COUNT as u64; self.blur_time_moving_average /= MOVING_AVERAGE_FRAME_COUNT as u64; self.frames_since_last_update = 0; if ALLOW_SHADER_DYNAMIC_WORKLOAD { let desired_blur_ps_time_us = 20000u64; if self.blur_time_moving_average < desired_blur_ps_time_us || self.blur_ps_loop_count != 0 { let time_delta = (desired_blur_ps_time_us as f32 - self.blur_time_moving_average as f32) / self.blur_time_moving_average as f32; if !(-0.05..=0.01).contains(&time_delta) { let step_size = 1f32.max(self.blur_ps_loop_count as f32); let loop_count_delta = (step_size * time_delta) as i32; if loop_count_delta > 0 { self.blur_ps_loop_count += loop_count_delta as u32; } else { self.blur_ps_loop_count -= loop_count_delta as u32; } } } } let desired_draw_ps_time_us = self.blur_time_moving_average + (self.blur_time_moving_average as f32 * 0.1) as u64; let time_delta = (desired_draw_ps_time_us as f32 - self.draw_time_moving_average as f32) / self.draw_time_moving_average as f32; if !(-0.1..=0.01).contains(&time_delta) { if ALLOW_DRAW_DYNAMIC_WORKLOAD { let step_size = 1f32.max(self.triangle_count as f32); self.triangle_count = min( self.triangle_count + (step_size * time_delta) as u32, MAX_TRIANGLE_COUNT, ); } else if ALLOW_SHADER_DYNAMIC_WORKLOAD { let step_size = 1f32.max(self.ps_loop_count as f32); let loop_count_delta = (step_size * time_delta) as i32; if loop_count_delta > 0 { self.ps_loop_count += loop_count_delta as u32; } else { self.ps_loop_count -= loop_count_delta as u32; } } } } if self.frames_since_last_update % TIMESTAMP_PRINT_FREQUENCY as u32 == 0 { trace!("{} triangles; render: {} us, ps loop count: {}; blur: {} us, ps loop count: {} ", self.triangle_count, self.draw_time_moving_average, self.ps_loop_count, self.blur_time_moving_average, self.blur_ps_loop_count ); } } fn get_timestamps_data(&mut self) { let oldest_frame_index = self.frame_index; assert!( self.frame_fence_values[oldest_frame_index] <= self.frame_fence.get_completed_value() ); let empty_range = Range::default(); let moving_average = [&mut self.draw_times, &mut self.blur_times]; for device_idx in 0..DEVICE_COUNT { let range_begin = ByteCount::from( 2 * oldest_frame_index as u32 * size_of::() as u32, ); let read_range = Range::default() .with_begin(range_begin) .with_end(range_begin + ByteCount(2 * size_of::() as u64)); let mapped_data = self.timestamp_result_buffers[device_idx] .map(0, Some(&read_range)) .expect("Cannot map timestamp result buffer") as *mut u64; let (begin, end) = unsafe { ( *mapped_data.offset(read_range.begin().0 as isize), *mapped_data.offset(read_range.begin().0 as isize + 1), ) }; let timestamp_delta = end - begin; self.timestamp_result_buffers[device_idx] .unmap(0, Some(&empty_range)); let gpu_time_ms = (timestamp_delta * 1000000) / self.direct_command_queue_timestamp_frequencies[device_idx]; moving_average[device_idx][self.current_times_index] = gpu_time_ms; } self.current_times_index = (self.current_times_index + 1) % MOVING_AVERAGE_FRAME_COUNT; } } fn create_fences( devices: &[Device; DEVICE_COUNT], direct_command_queues: &[CommandQueue; DEVICE_COUNT], ) -> (Fence, Fence, [Fence; DEVICE_COUNT], [Win32Event; 2], u64) { let frame_fence = devices[1] .create_fence(0, FenceFlags::None) .expect("Cannot create fence"); let render_fence = devices[0] .create_fence(0, FenceFlags::None) .expect("Cannot create fence"); let cross_adapter_fence_primary = devices[0] .create_fence(0, FenceFlags::Shared | FenceFlags::CrossAdapter) .expect("Cannot create fence"); let fence_handle = devices[0] .create_shared_handle( &cross_adapter_fence_primary.clone().into(), "CrossAdapterFence", ) .expect("Cannot create shared handle for cross adapter fence"); let cross_adapter_fence_secondary = devices[1] .open_shared_fence_handle(fence_handle) .expect("Cannot open shared fence handle"); fence_handle.close(); let cross_adapter_fences = [cross_adapter_fence_primary, cross_adapter_fence_secondary]; let mut cross_adapter_fence_value = 1; let mut fence_events: [MaybeUninit; 2] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { fence_events[device_idx] = MaybeUninit::new(Win32Event::default()); direct_command_queues[device_idx] .signal( &cross_adapter_fences[device_idx], cross_adapter_fence_value, ) .expect("Cannot signal command queue"); cross_adapter_fences[device_idx] .set_event_on_completion(cross_adapter_fence_value, unsafe { fence_events[device_idx].assume_init_ref() }) .expect("Cannot set event on fence"); unsafe { fence_events[device_idx].assume_init_ref() }.wait(None); cross_adapter_fence_value += 1; } ( frame_fence, render_fence, cross_adapter_fences, unsafe { std::mem::transmute(fence_events) }, cross_adapter_fence_value, ) } fn create_blur_constant_buffer(devices: &[Device; DEVICE_COUNT]) -> Resource { let blur_constant_buffer = devices[1] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(size_of::() as u64), ResourceStates::GenericRead, None, ) .expect("Cannot create blur constant buffer"); blur_constant_buffer .set_name("Blur constant buffer") .expect("Cannot set name on resource"); let buffer_data = BlurConstantBufferData { texture_dimensions: Vec2::new( WINDOW_WIDTH as f32, WINDOW_HEIGHT as f32, ), offset: 0.5, padding: [0.; 61], }; let mapped_data = blur_constant_buffer .map(0, None) .expect("Cannot map blur_constant_buffer"); unsafe { copy_nonoverlapping( &buffer_data, mapped_data as *mut BlurConstantBufferData, 1, ); } blur_constant_buffer.unmap(0, None); blur_constant_buffer } fn create_blur_workload_constant_buffer( devices: &[Device; DEVICE_COUNT], blur_ps_loop_count: u32, ) -> (Resource, WorkloadConstantBufferData, *mut u8) { let blur_workload_constant_buffer_size = ByteCount::from( size_of::() as u32 * FRAMES_IN_FLIGHT as u32, ); let blur_workload_constant_buffer = devices[1] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(blur_workload_constant_buffer_size.0.into()), ResourceStates::GenericRead, None, ) .expect("Cannot create blur workload constant buffer"); blur_workload_constant_buffer .set_name("Blur workload constant buffer") .expect("Cannot set name on resource"); let buffer_data = WorkloadConstantBufferData { loop_count: blur_ps_loop_count, padding: [0.; 63], }; let mapped_data = blur_workload_constant_buffer .map(0, None) .expect("Cannot map blur_workload_constant_buffer"); unsafe { copy_nonoverlapping( &buffer_data, mapped_data as *mut WorkloadConstantBufferData, 1, ); } (blur_workload_constant_buffer, buffer_data, mapped_data) } fn create_workload_constant_buffer( devices: &[Device; DEVICE_COUNT], ps_loop_count: u32, ) -> (Resource, WorkloadConstantBufferData, *mut u8) { let workload_constant_buffer_size = ByteCount::from( size_of::() as u32 * FRAMES_IN_FLIGHT as u32, ); let workload_constant_buffer = devices[0] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(workload_constant_buffer_size.0.into()), ResourceStates::GenericRead, None, ) .expect("Cannot create workload constant buffer"); workload_constant_buffer .set_name("Workload constant buffer") .expect("Cannot set name on resource"); let buffer_data = WorkloadConstantBufferData { loop_count: ps_loop_count, padding: [0.; 63], }; let mapped_data = workload_constant_buffer .map(0, None) .expect("Cannot map workload_constant_buffer buffer"); unsafe { copy_nonoverlapping( &buffer_data, mapped_data as *mut WorkloadConstantBufferData, 1, ); } (workload_constant_buffer, buffer_data, mapped_data) } fn create_triangle_constant_buffer( devices: &[Device; DEVICE_COUNT], ) -> (Resource, Vec, *mut u8) { let constant_buffer_size = ByteCount::from( size_of::() as u32 * MAX_TRIANGLE_COUNT * FRAMES_IN_FLIGHT as u32, ); let constant_buffer = devices[0] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(constant_buffer_size.0.into()), ResourceStates::GenericRead, None, ) .expect("Cannot create constant buffer"); constant_buffer .set_name("Constant buffer") .expect("Cannot set name on resource"); let camera = Camera::default(); let world = Mat4::identity(); let view = make_view_matrix(camera.position, camera.look_at); let proj = make_projection_matrix(&camera); let mut rng = rand::thread_rng(); let mut constant_buffer_data = vec![]; for tri_idx in 0..MAX_TRIANGLE_COUNT as usize { constant_buffer_data.push(SceneConstantBuffer { velocity: Vec4::new(rng.gen_range(0.01..0.02), 0., 0., 0.), offset: Vec4::new( rng.gen_range(-5. ..-1.5), rng.gen_range(-1. ..1.), rng.gen_range(0. ..2.), 0., ), color: Vec4::new( rng.gen_range(0.5..1.), rng.gen_range(0.5..1.), rng.gen_range(0.5..1.), 1., ), projection: proj * view * world, padding: [0.; 36], }); } let mapped_data = constant_buffer .map(0, None) .expect("Cannot map constant buffer"); unsafe { copy_nonoverlapping( constant_buffer_data.as_ptr(), mapped_data as *mut SceneConstantBuffer, MAX_TRIANGLE_COUNT as usize, ); } (constant_buffer, constant_buffer_data, mapped_data) } fn create_depth_stencil( devices: &[Device; DEVICE_COUNT], dsv_heap: &DescriptorHeap, ) -> Resource { let depth_stencil_desc = DepthStencilViewDesc::default() .with_format(Format::D32Float) .with_view_dimension(DsvDimension::Texture2D); let clear_value = ClearValue::default() .with_format(Format::D32Float) .with_depth_stencil( &DepthStencilValue::default().with_depth(1.).with_stencil(0), ); let depth_stencil = devices[0] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Default), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Texture2D) .with_format(Format::D32Float) .with_width(WINDOW_WIDTH.into()) .with_height(WINDOW_HEIGHT.into()) .with_flags(ResourceFlags::AllowDepthStencil), ResourceStates::DepthWrite, Some(&clear_value), ) .expect("Cannot create depth stencil on primary adapter"); depth_stencil .set_name("Depth stencil") .expect("Cannot set name on resource"); devices[0].create_depth_stencil_view( &depth_stencil, &depth_stencil_desc, dsv_heap.get_cpu_descriptor_handle_for_heap_start(), ); depth_stencil } fn create_blur_vertex_buffer( devices: &[Device; DEVICE_COUNT], direct_command_lists: &[CommandList; DEVICE_COUNT], ) -> (Resource, Resource, VertexBufferView) { let quad_vertices = [ BlurVertex { position: Vec4::new(-1.0, -1.0, 0.0, 1.0), uv: Vec2::new(0.0, 0.0), }, BlurVertex { position: Vec4::new(-1.0, 1.0, 0.0, 1.0), uv: Vec2::new(0.0, 1.0), }, BlurVertex { position: Vec4::new(1.0, -1.0, 0.0, 1.0), uv: Vec2::new(1.0, 0.0), }, BlurVertex { position: Vec4::new(1.0, 1.0, 0.0, 1.0), uv: Vec2::new(1.0, 1.0), }, ]; let quad_vertex_buffer_size = ByteCount::from(quad_vertices.len() * size_of::()); let quad_vertex_buffer = devices[1] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Default), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(quad_vertex_buffer_size.0.into()), ResourceStates::CopyDest, None, ) .expect("Cannot create quad_vertex_buffer"); quad_vertex_buffer .set_name("Quad vertex buffer") .expect("Cannot set name on resource"); let quad_vertex_buffer_upload = devices[1] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(quad_vertex_buffer_size.0.into()), ResourceStates::GenericRead, None, ) .expect("Cannot create quad_vertex_buffer"); quad_vertex_buffer_upload .set_name("Quad vertex buffer upload") .expect("Cannot set name on resource"); let quad_vertex_data = SubresourceData::default() .with_data(&quad_vertices) .with_row_pitch(quad_vertex_buffer_size) .with_slice_pitch(quad_vertex_buffer_size); direct_command_lists[1] .update_subresources_heap_alloc( &quad_vertex_buffer, &quad_vertex_buffer_upload, ByteCount(0), 0, 1, slice::from_ref(&quad_vertex_data), ) .expect("Cannot upload quad_vertex buffer"); trace!("Uploaded quad vertex buffer"); direct_command_lists[1].resource_barrier(slice::from_ref( &ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource(&quad_vertex_buffer) .with_state_before(ResourceStates::CopyDest) .with_state_after(ResourceStates::VertexAndConstantBuffer), ), )); let quad_vertex_buffer_view = VertexBufferView::default() .with_buffer_location(quad_vertex_buffer.get_gpu_virtual_address()) .with_size_in_bytes(quad_vertex_buffer_size) .with_stride_in_bytes( ByteCount::from(std::mem::size_of::()), ); ( quad_vertex_buffer, quad_vertex_buffer_upload, quad_vertex_buffer_view, ) } fn create_primary_vertex_buffer( devices: &[Device; DEVICE_COUNT], direct_command_lists: &[CommandList; DEVICE_COUNT], ) -> (Resource, Resource, VertexBufferView) { let triangle_vertices = [ Vertex { position: Vec3::new(0., TRIANGLE_HALF_WIDTH, TRIANGLE_DEPTH), }, Vertex { position: Vec3::new( TRIANGLE_HALF_WIDTH, -TRIANGLE_HALF_WIDTH, TRIANGLE_DEPTH, ), }, Vertex { position: Vec3::new( -TRIANGLE_HALF_WIDTH, -TRIANGLE_HALF_WIDTH, TRIANGLE_DEPTH, ), }, ]; let vertex_buffer_size = ByteCount::from(triangle_vertices.len() * size_of::()); let vertex_buffer = devices[0] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Default), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(vertex_buffer_size.0.into()), ResourceStates::CopyDest, None, ) .expect("Cannot create vertex_buffer"); vertex_buffer .set_name("Vertex buffer") .expect("Cannot set name on resource"); let vertex_buffer_upload = devices[0] .create_committed_resource( &HeapProperties::default().with_heap_type(HeapType::Upload), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_layout(TextureLayout::RowMajor) .with_width(vertex_buffer_size.0.into()), ResourceStates::GenericRead, None, ) .expect("Cannot create vertex_buffer"); vertex_buffer_upload .set_name("Vertex buffer upload") .expect("Cannot set name on resource"); let vertex_data = SubresourceData::default() .with_data(&triangle_vertices) .with_row_pitch(vertex_buffer_size) .with_slice_pitch(vertex_buffer_size); direct_command_lists[0] .update_subresources_heap_alloc( &vertex_buffer, &vertex_buffer_upload, ByteCount(0), 0, 1, slice::from_ref(&vertex_data), ) .expect("Cannot upload vertex buffer"); trace!("Uploaded vertex buffer"); direct_command_lists[0].resource_barrier(slice::from_ref( &ResourceBarrier::new_transition( &ResourceTransitionBarrier::default() .with_resource(&vertex_buffer) .with_state_before(ResourceStates::CopyDest) .with_state_after(ResourceStates::VertexAndConstantBuffer), ), )); let vertex_buffer_view = VertexBufferView::default() .with_buffer_location(vertex_buffer.get_gpu_virtual_address()) .with_stride_in_bytes(ByteCount::from(std::mem::size_of::())) .with_size_in_bytes(vertex_buffer_size); trace!("Created primary adapter vertex buffer"); (vertex_buffer, vertex_buffer_upload, vertex_buffer_view) } fn create_command_lists( devices: &[Device; DEVICE_COUNT], frame_index: usize, direct_command_allocators: &[[CommandAllocator; FRAMES_IN_FLIGHT]; DEVICE_COUNT], copy_allocators: &[CommandAllocator; FRAMES_IN_FLIGHT], pso: &PipelineState, blur_pso_u: &PipelineState, ) -> ([CommandList; DEVICE_COUNT], CommandList) { let direct_command_lists = [ devices[0] .create_command_list( CommandListType::Direct, &direct_command_allocators[0][frame_index], Some(pso), // None, ) .expect("Cannot create direct command list"), devices[1] .create_command_list( CommandListType::Direct, &direct_command_allocators[1][frame_index], Some(blur_pso_u), // None, ) .expect("Cannot create direct command list"), ]; let copy_command_list = devices[0] .create_command_list( CommandListType::Copy, ©_allocators[frame_index], Some(pso), // None, ) .expect("Cannot create copy command list"); copy_command_list .close() .expect("Cannot close command list"); (direct_command_lists, copy_command_list) } fn create_psos( devices: &[Device; DEVICE_COUNT], root_signature: &RootSignature, blur_root_signature: &RootSignature, ) -> (PipelineState, PipelineState, PipelineState) { let vertex_shader = compile_shader( "VertexShader", &std::fs::read_to_string("assets/hm_shaders.hlsl") .expect("Cannot open vertex shader file"), "VShader", "vs_6_0", &[], &[], ) .expect("Cannot compile vertex shader"); let pixel_shader = compile_shader( "PixelShader", &std::fs::read_to_string("assets/hm_shaders.hlsl") .expect("Cannot open pixel shader file"), "PShader", "ps_6_0", &[], &[], ) .expect("Cannot compile pixel shader"); let input_layout = Vertex::make_desc(); let vs_bytecode = ShaderBytecode::new(&vertex_shader); let ps_bytecode = ShaderBytecode::new(&pixel_shader); let vsps_layout = InputLayoutDesc::default().with_input_elements(&input_layout); let pso_desc = GraphicsPipelineStateDesc::default() .with_input_layout(&vsps_layout) .with_root_signature(root_signature) .with_vs_bytecode(&vs_bytecode) .with_ps_bytecode(&ps_bytecode) .with_rasterizer_state( RasterizerDesc::default().with_depth_clip_enable(false), ) .with_blend_state(BlendDesc::default()) .with_depth_stencil_state( DepthStencilDesc::default().with_depth_enable(false), ) .with_primitive_topology_type(PrimitiveTopologyType::Triangle) .with_rtv_formats(&[Format::R8G8B8A8Unorm]) .with_dsv_format(Format::D32Float); let pso = devices[0] .create_graphics_pipeline_state(&pso_desc) .expect("Cannot create PSO"); pso.set_name("Main PSO").expect("Cannot set name on pso"); trace!("Created PSO"); let blur_vertex_shader = compile_shader( "BlurVertexShader", &std::fs::read_to_string("assets/hm_blur_shaders.hlsl") .expect("Cannot open vertex shader file"), "VSSimpleBlur", "vs_6_0", &[], &[], ) .expect("Cannot compile blur vertex shader"); let blur_pixel_shader_u = compile_shader( "BlurPixelShaderU", &std::fs::read_to_string("assets/hm_blur_shaders.hlsl") .expect("Cannot open pixel shader file"), "PSSimpleBlurU", "ps_6_0", &[], &[], ) .expect("Cannot compile blur pixel shader U"); let blur_pixel_shader_v = compile_shader( "BlurPixelShaderV", &std::fs::read_to_string("assets/hm_blur_shaders.hlsl") .expect("Cannot open pixel shader file"), "PSSimpleBlurV", "ps_6_0", &[], &[], ) .expect("Cannot compile blur pixel shader V"); let blur_input_layout = BlurVertex::make_desc(); let blur_layout = InputLayoutDesc::default().with_input_elements(&blur_input_layout); let blur_vs_bytecode = ShaderBytecode::new(&blur_vertex_shader); let blur_ps_bytecode_u = ShaderBytecode::new(&blur_pixel_shader_u); let blur_ps_bytecode_v = ShaderBytecode::new(&blur_pixel_shader_v); let blur_pso_desc_u = GraphicsPipelineStateDesc::default() .with_input_layout(&blur_layout) .with_root_signature(blur_root_signature) .with_vs_bytecode(&blur_vs_bytecode) .with_ps_bytecode(&blur_ps_bytecode_u) .with_rasterizer_state(RasterizerDesc::default()) .with_blend_state(BlendDesc::default()) .with_depth_stencil_state( DepthStencilDesc::default().with_depth_enable(false), ) .with_primitive_topology_type(PrimitiveTopologyType::Triangle) .with_rtv_formats(&[Format::R8G8B8A8Unorm]); let blur_pso_u = devices[1] .create_graphics_pipeline_state(&blur_pso_desc_u) .expect("Cannot create PSO"); blur_pso_u .set_name("Blur PSO U") .expect("Cannot set name on pso"); trace!("Created blur PSO U"); let blur_pso_desc_v = GraphicsPipelineStateDesc::default() .with_input_layout(&blur_layout) .with_root_signature(&blur_root_signature) .with_vs_bytecode(&blur_vs_bytecode) .with_ps_bytecode(&blur_ps_bytecode_v) .with_rasterizer_state(RasterizerDesc::default()) .with_blend_state(BlendDesc::default()) .with_depth_stencil_state( DepthStencilDesc::default().with_depth_enable(false), ) .with_primitive_topology_type(PrimitiveTopologyType::Triangle) .with_rtv_formats(&[Format::R8G8B8A8Unorm]); let blur_pso_v = devices[1] .create_graphics_pipeline_state(&blur_pso_desc_v) .expect("Cannot create PSO"); blur_pso_v .set_name("Blur PSO V") .expect("Cannot set name on pso"); trace!("Created blur PSO V"); (pso, blur_pso_u, blur_pso_v) } fn create_root_signatures( devices: &[Device; DEVICE_COUNT], ) -> (RootSignature, RootSignature) { let (root_signature, blur_root_signature) = { let root_parameters = [ RootParameter::default() .with_shader_visibility(ShaderVisibility::Vertex) .new_descriptor( &RootDescriptor::default().with_shader_register(0), RootParameterType::Cbv, ), RootParameter::default() .with_shader_visibility(ShaderVisibility::Pixel) .new_descriptor( &RootDescriptor::default().with_shader_register(1), RootParameterType::Cbv, ), ]; let root_signature_desc = VersionedRootSignatureDesc::default() .with_desc_1_1( &RootSignatureDesc::default() .with_parameters(&root_parameters) .with_flags( RootSignatureFlags::AllowInputAssemblerInputLayout, ), ); let (serialized_signature, serialization_result) = RootSignature::serialize_versioned(&root_signature_desc); assert!( serialization_result.is_ok(), "Result: {}", &serialization_result.err().unwrap() ); let root_signature = devices[0] .create_root_signature( 0, &ShaderBytecode::new(serialized_signature.get_buffer()), ) .expect("Cannot create root signature on device 0"); let range = DescriptorRange::default() .with_num_descriptors(1) .with_range_type(DescriptorRangeType::Srv) .with_offset_in_descriptors_from_table_start( DescriptorRangeOffset::append(), ); let descriptor_table = RootDescriptorTable::default() .with_descriptor_ranges(slice::from_ref(&range)); let blur_root_parameters = [ RootParameter::default() .new_descriptor( &RootDescriptor::default() .with_shader_register(0) .with_flags(RootDescriptorFlags::DataStatic), RootParameterType::Cbv, ) .with_shader_visibility(ShaderVisibility::Pixel), RootParameter::default() .new_descriptor_table(&descriptor_table) .with_shader_visibility(ShaderVisibility::Pixel), RootParameter::default() .new_descriptor( &RootDescriptor::default() .with_shader_register(1) .with_flags(RootDescriptorFlags::DataStatic), RootParameterType::Cbv, ) .with_shader_visibility(ShaderVisibility::Pixel), ]; let static_point_sampler = StaticSamplerDesc::default() .with_shader_register(0) .with_filter(Filter::MinMagMipPoint) .with_shader_visibility(ShaderVisibility::Pixel); let static_linear_sampler = StaticSamplerDesc::default() .with_shader_register(1) .with_filter(Filter::MinMagMipLinear) .with_shader_visibility(ShaderVisibility::Pixel); let static_samplers = [static_point_sampler, static_linear_sampler]; let root_signature_desc = VersionedRootSignatureDesc::default() .with_desc_1_1( &RootSignatureDesc::default() .with_parameters(&blur_root_parameters) .with_static_samplers(&static_samplers) .with_flags( RootSignatureFlags::AllowInputAssemblerInputLayout, ), ); let (serialized_signature, serialization_result) = RootSignature::serialize_versioned(&root_signature_desc); assert!( serialization_result.is_ok(), "Result: {}", &serialization_result.err().unwrap() ); let blur_root_signature = devices[1] .create_root_signature( 0, &ShaderBytecode::new(serialized_signature.get_buffer()), ) .expect("Cannot create root signature on device 1"); (root_signature, blur_root_signature) }; (root_signature, blur_root_signature) } fn create_shared_resource_descs( devices: &[Device; DEVICE_COUNT], ) -> (bool, ByteCount, ResourceDesc) { let texture_size; let cross_adapter_desc; let mut feature_data = FeatureDataOptions::default(); devices[1] .check_feature_support(Feature::D3D12Options, &mut feature_data) .expect("Cannot check feature support"); let cross_adapter_textures_supported = feature_data.cross_adapter_row_major_texture_supported(); if cross_adapter_textures_supported { info!("Cross adapter textures are supported"); cross_adapter_desc = ResourceDesc::default() .with_dimension(ResourceDimension::Texture2D) .with_format(Format::R8G8B8A8Unorm) .with_width(WINDOW_WIDTH.into()) .with_height(WINDOW_HEIGHT.into()) .with_layout(TextureLayout::RowMajor) .with_flags(ResourceFlags::AllowCrossAdapter); let texture_info = devices[0].get_resource_allocation_info( 0, slice::from_ref(&cross_adapter_desc), ); trace!("cross adapter texture info: {:?}", &texture_info); texture_size = texture_info.size_in_bytes(); } else { info!("Cross adapter textures are not supported"); let (layout, _, _, _) = devices[0].get_copyable_footprints( &ResourceDesc::default() .with_dimension(ResourceDimension::Texture2D) .with_format(Format::R8G8B8A8Unorm) .with_width(WINDOW_WIDTH.into()) .with_height(WINDOW_HEIGHT.into()) .with_flags(ResourceFlags::AllowRenderTarget), 0, 1, ByteCount(0), ); texture_size = align_to_multiple( (layout[0].footprint().row_pitch() * layout[0].footprint().height()) .0, DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT.0, ) .into(); cross_adapter_desc = ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_width(texture_size.0.into()) .with_layout(TextureLayout::RowMajor) .with_flags(ResourceFlags::AllowCrossAdapter); } ( cross_adapter_textures_supported, texture_size, cross_adapter_desc, ) } fn create_frame_resources( devices: &[Device; DEVICE_COUNT], rtv_heaps: &[DescriptorHeap; DEVICE_COUNT], swapchain: &Swapchain, rtv_descriptor_handle_sizes: [ByteCount; DEVICE_COUNT], ) -> ( [[Resource; FRAMES_IN_FLIGHT]; DEVICE_COUNT], [[CommandAllocator; FRAMES_IN_FLIGHT]; DEVICE_COUNT], [CommandAllocator; FRAMES_IN_FLIGHT], ) { let clear_value = ClearValue::default() .with_format(Format::R8G8B8A8Unorm) .with_color(CLEAR_COLOR); let render_target_desc = ResourceDesc::default() .with_dimension(ResourceDimension::Texture2D) .with_format(Format::R8G8B8A8Unorm) .with_width(WINDOW_WIDTH.into()) .with_height(WINDOW_HEIGHT.into()) .with_flags(ResourceFlags::AllowRenderTarget); let mut render_targets: [[MaybeUninit; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { for frame_idx in 0..FRAMES_IN_FLIGHT { if device_idx == 1 { render_targets[device_idx][frame_idx] = MaybeUninit::new( swapchain .get_buffer(frame_idx as u32) .expect("Cannot get buffer from swapchain"), ); } else { render_targets[device_idx][frame_idx] = MaybeUninit::new( devices[device_idx] .create_committed_resource( &HeapProperties::default() .with_heap_type(HeapType::Default), HeapFlags::None, &render_target_desc, ResourceStates::Common, Some(&clear_value), ) .expect("Cannot create render target"), ); } } } let mut direct_command_allocators: [[MaybeUninit; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; let mut copy_allocators: [MaybeUninit; FRAMES_IN_FLIGHT] = unsafe { MaybeUninit::uninit().assume_init() }; let mut render_targets: [[Resource; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { std::mem::transmute(render_targets) }; // We have two separate loops since Rust cares for us, too much from time to time, // so we need to have the entire arrays in valid state to transmute them to // the initialized type for device_idx in 0..DEVICE_COUNT { let mut rtv_handle = rtv_heaps[device_idx].get_cpu_descriptor_handle_for_heap_start(); for frame_idx in 0..FRAMES_IN_FLIGHT { devices[device_idx].create_render_target_view( &render_targets[device_idx][frame_idx], rtv_handle, ); rtv_handle = rtv_handle.advance(1, rtv_descriptor_handle_sizes[device_idx]); direct_command_allocators[device_idx][frame_idx] = MaybeUninit::new( devices[device_idx] .create_command_allocator(CommandListType::Direct) .expect("Cannot create command allocator"), ); if device_idx == 0 { copy_allocators[frame_idx] = MaybeUninit::new( devices[device_idx] .create_command_allocator(CommandListType::Copy) .expect("Cannot create command allocator"), ); } } } trace!("created command allocators"); let direct_command_allocators: [[CommandAllocator; FRAMES_IN_FLIGHT]; DEVICE_COUNT] = unsafe { std::mem::transmute(direct_command_allocators) }; let copy_allocators: [CommandAllocator; FRAMES_IN_FLIGHT] = unsafe { std::mem::transmute(copy_allocators) }; (render_targets, direct_command_allocators, copy_allocators) } fn create_query_heaps( devices: &[Device; DEVICE_COUNT], ) -> ([Resource; DEVICE_COUNT], [QueryHeap; DEVICE_COUNT]) { let query_result_count = (FRAMES_IN_FLIGHT * 2) as u64; let query_results_buffer_size = ByteCount::from(query_result_count * std::mem::size_of::() as u64); let query_heap_desc = QueryHeapDesc::default() .with_heap_type(QueryHeapType::Timestamp) .with_count(query_result_count as u32); let mut timestamp_result_buffers: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; let mut query_heaps: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { timestamp_result_buffers[device_idx] = MaybeUninit::new( devices[device_idx] .create_committed_resource( &HeapProperties::default() .with_heap_type(HeapType::Readback), HeapFlags::None, &ResourceDesc::default() .with_dimension(ResourceDimension::Buffer) .with_width(query_results_buffer_size.0.into()) .with_layout(TextureLayout::RowMajor), ResourceStates::CopyDest, None, ) .expect("Cannot create timestamp results buffer"), ); query_heaps[device_idx] = MaybeUninit::new( devices[device_idx] .create_query_heap(&query_heap_desc) .expect("Cannot create query heap"), ); } let mut timestamp_result_buffers: [Resource; DEVICE_COUNT] = unsafe { std::mem::transmute(timestamp_result_buffers) }; let mut query_heaps: [QueryHeap; DEVICE_COUNT] = unsafe { std::mem::transmute(query_heaps) }; (timestamp_result_buffers, query_heaps) } fn create_descriptor_heaps( devices: &[Device; DEVICE_COUNT], swapchain: &Swapchain, ) -> ( [DescriptorHeap; DEVICE_COUNT], DescriptorHeap, DescriptorHeap, ) { let mut rtv_heaps: [MaybeUninit; DEVICE_COUNT] = unsafe { MaybeUninit::uninit().assume_init() }; for device_idx in 0..DEVICE_COUNT { let num_descriptors = match device_idx { 0 => FRAMES_IN_FLIGHT, 1 => FRAMES_IN_FLIGHT + 1, // add space for the intermediate render target _ => 0, }; let current_heap = devices[device_idx] .create_descriptor_heap( &DescriptorHeapDesc::default() .with_heap_type(DescriptorHeapType::Rtv) .with_num_descriptors(num_descriptors as u32), ) .expect("Cannot create RTV heap"); current_heap .set_name(&format!("RTV heap #{}", device_idx)) .expect("Cannot set RTV heap name"); rtv_heaps[device_idx] = MaybeUninit::new(current_heap); } let mut rtv_heaps: [DescriptorHeap; DEVICE_COUNT] = unsafe { std::mem::transmute(rtv_heaps) }; let dsv_heap = devices[0] .create_descriptor_heap( &DescriptorHeapDesc::default() .with_heap_type(DescriptorHeapType::Dsv) .with_num_descriptors(1), ) .expect("Cannot create DSV heap"); dsv_heap .set_name("DSV heap") .expect("Cannot set DSV heap name"); let cbv_srv_heap = devices[1] .create_descriptor_heap( &DescriptorHeapDesc::default() .with_heap_type(DescriptorHeapType::CbvSrvUav) .with_num_descriptors(FRAMES_IN_FLIGHT as u32 + 1) .with_flags(DescriptorHeapFlags::ShaderVisible), ) .expect("Cannot create CBV_SRV heap"); cbv_srv_heap .set_name("CBV_SRV heap") .expect("Cannot set CBV_SRV heap name"); (rtv_heaps, dsv_heap, cbv_srv_heap) } fn create_swapchain( factory: Factory, command_queue: &CommandQueue, hwnd: *mut std::ffi::c_void, ) -> Swapchain { let swapchain_desc = SwapChainDesc::default() .with_width(WINDOW_WIDTH) .with_height(WINDOW_HEIGHT) .with_buffer_count(FRAMES_IN_FLIGHT as u32); let swapchain = factory .create_swapchain(&command_queue, hwnd as *mut HWND__, &swapchain_desc) .expect("Cannot create swapchain"); factory .make_window_association(hwnd, MakeWindowAssociationFlags::NoAltEnter) .expect("Cannot make window association"); swapchain } fn create_device(factory: &Factory) -> Device { let device; if USE_WARP_ADAPTER { let warp_adapter = factory .enum_warp_adapter() .expect("Cannot enum warp adapter"); device = Device::new(&warp_adapter) .expect("Cannot create device on WARP adapter"); } else { let hw_adapter = factory .enum_adapters() .expect("Cannot enumerate adapters") .remove(0); device = Device::new(&hw_adapter).expect("Cannot create device"); } device } fn get_hardware_adapters(factory: &Factory) -> [Adapter; DEVICE_COUNT] { // Adapter 0 is the adapter that Presents frames to the display. It is assigned as // the "secondary" adapter because it is the adapter that performs the second set // of operations (the blur effect) in this sample. // Adapter 1 is an additional GPU that the app can take advantage of, but it does // not own the presentation step. It is assigned as the "primary" adapter because // it is the adapter that performs the first set of operations (rendering triangles) // in this sample. let mut adapters = factory .enum_adapters_by_gpu_preference(GpuPreference::HighPerformance) .expect("Cannot enumerate adapters"); for adapter in &adapters { let desc = adapter.get_desc().expect("Cannot get adapter desc"); info!("found adapter: {}", desc); } [adapters.remove(1), adapters.remove(0)] } fn create_devices(factory: &Factory) -> ([Device; DEVICE_COUNT], bool) { let adapters; if USE_WARP_ADAPTER { adapters = [ factory .enum_warp_adapter() .expect("Cannot enum warp adapter"), factory .enum_warp_adapter() .expect("Cannot enum warp adapter"), ]; } else { adapters = get_hardware_adapters(factory); } let adapter_descs = [ adapters[0].get_desc().expect("Cannot get adapter desc"), adapters[1].get_desc().expect("Cannot get adapter desc"), ]; info!( "Enumerated adapters: \n\t{}\n\t{}", adapter_descs[0], adapter_descs[1] ); ( [ Device::new(&adapters[0]).unwrap_or_else(|_| { panic!("Cannot create device on adapter {}", adapter_descs[0]) }), Device::new(&adapters[1]).unwrap_or_else(|_| { panic!("Cannot create device on adapter {}", adapter_descs[0]) }), ], adapter_descs[0].is_software(), ) } fn main() { //wait_for_debugger(); simple_logger::init_with_level(log::Level::Info).unwrap(); let event_loop = EventLoop::new(); let window = WindowBuilder::new() .build(&event_loop) .expect("Cannot create window"); window.set_inner_size(winit::dpi::LogicalSize::new( WINDOW_WIDTH, WINDOW_HEIGHT, )); let mut sample = HeterogeneousMultiadapterSample::new(window.hwnd()); event_loop.run(move |event, _, control_flow| { *control_flow = ControlFlow::Poll; match event { Event::WindowEvent { event: WindowEvent::CloseRequested, .. } => *control_flow = ControlFlow::Exit, Event::MainEventsCleared => { window.request_redraw(); } Event::RedrawRequested(_) => { sample.draw(); } _ => (), } }); }