/* Copyright (c) 2017-2018 Hans-Kristian Arntzen * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "buffer.hpp" #include "command_buffer.hpp" #include "command_pool.hpp" #include "fence.hpp" #include "fence_manager.hpp" #include "image.hpp" #include "memory_allocator.hpp" #include "render_pass.hpp" #include "sampler.hpp" #include "semaphore.hpp" #include "semaphore_manager.hpp" #include "event_manager.hpp" #include "shader.hpp" #include "vulkan.hpp" #include "query_pool.hpp" #include "buffer_pool.hpp" #include #include #include #include #ifdef GRANITE_VULKAN_FILESYSTEM #include "shader_manager.hpp" #include "texture_manager.hpp" #endif #ifdef GRANITE_VULKAN_MT #include #include #include #include "thread_group.hpp" #endif #ifdef GRANITE_VULKAN_FOSSILIZE #include "fossilize.hpp" #endif #include "quirks.hpp" namespace Vulkan { enum class SwapchainRenderPass { ColorOnly, Depth, DepthStencil }; struct InitialImageBuffer { BufferHandle buffer; std::vector blits; }; struct HandlePool { VulkanObjectPool buffers; VulkanObjectPool images; VulkanObjectPool linear_images; VulkanObjectPool image_views; VulkanObjectPool buffer_views; VulkanObjectPool samplers; VulkanObjectPool fences; VulkanObjectPool semaphores; VulkanObjectPool events; VulkanObjectPool query; VulkanObjectPool command_buffers; }; class Device #ifdef GRANITE_VULKAN_FOSSILIZE : public Fossilize::StateCreatorInterface #endif { public: // Device-based objects which need to poke at internal data structures when their lifetimes end. // Don't want to expose a lot of internal guts to make this work. friend class QueryPool; friend struct QueryPoolResultDeleter; friend class EventHolder; friend struct EventHolderDeleter; friend class SemaphoreHolder; friend struct SemaphoreHolderDeleter; friend class FenceHolder; friend struct FenceHolderDeleter; friend class Sampler; friend struct SamplerDeleter; friend class Buffer; friend struct BufferDeleter; friend class BufferView; friend struct BufferViewDeleter; friend class ImageView; friend struct ImageViewDeleter; friend class Image; friend struct ImageDeleter; friend struct LinearHostImageDeleter; friend class CommandBuffer; friend struct CommandBufferDeleter; friend class Program; friend class WSI; friend class Cookie; friend class Framebuffer; friend class PipelineLayout; friend class FramebufferAllocator; friend class RenderPass; friend class Texture; friend class DescriptorSetAllocator; friend class Shader; Device(); ~Device(); // No move-copy. void operator=(Device &&) = delete; Device(Device &&) = delete; // Only called by main thread, during setup phase. void set_context(const Context &context); void init_swapchain(const std::vector &swapchain_images, unsigned width, unsigned height, VkFormat format); void init_external_swapchain(const std::vector &swapchain_images); void init_frame_contexts(unsigned count); ImageView &get_swapchain_view(); ImageView &get_swapchain_view(unsigned index); unsigned get_num_swapchain_images() const; unsigned get_num_frame_contexts() const; unsigned get_swapchain_index() const; unsigned get_current_frame_context() const; size_t get_pipeline_cache_size(); bool get_pipeline_cache_data(uint8_t *data, size_t size); bool init_pipeline_cache(const uint8_t *data, size_t size); // Frame-pushing interface. void next_frame_context(); void wait_idle(); void end_frame_context(); // Set names for objects for debuggers and profilers. void set_name(const Buffer &buffer, const char *name); void set_name(const Image &image, const char *name); void set_name(const CommandBuffer &cmd, const char *name); // Submission interface, may be called from any thread at any time. void flush_frame(); CommandBufferHandle request_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic); CommandBufferHandle request_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic); void submit(CommandBufferHandle &cmd, Fence *fence = nullptr, unsigned semaphore_count = 0, Semaphore *semaphore = nullptr); void submit_empty(CommandBuffer::Type type, Fence *fence = nullptr, unsigned semaphore_count = 0, Semaphore *semaphore = nullptr); void add_wait_semaphore(CommandBuffer::Type type, Semaphore semaphore, VkPipelineStageFlags stages, bool flush); CommandBuffer::Type get_physical_queue_type(CommandBuffer::Type queue_type) const; // Request shaders and programs. These objects are owned by the Device. Shader *request_shader(const uint32_t *code, size_t size); Shader *request_shader_by_hash(Util::Hash hash); Program *request_program(const uint32_t *vertex_data, size_t vertex_size, const uint32_t *fragment_data, size_t fragment_size); Program *request_program(const uint32_t *compute_data, size_t compute_size); Program *request_program(Shader *vertex, Shader *fragment); Program *request_program(Shader *compute); // Map and unmap buffer objects. void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access); void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access); void *map_linear_host_image(const LinearHostImage &image, MemoryAccessFlags access); void unmap_linear_host_image_and_sync(const LinearHostImage &image, MemoryAccessFlags access); // Create buffers and images. BufferHandle create_buffer(const BufferCreateInfo &info, const void *initial = nullptr); ImageHandle create_image(const ImageCreateInfo &info, const ImageInitialData *initial = nullptr); ImageHandle create_image_from_staging_buffer(const ImageCreateInfo &info, const InitialImageBuffer *buffer); LinearHostImageHandle create_linear_host_image(const LinearHostImageCreateInfo &info); // Create staging buffers for images. InitialImageBuffer create_image_staging_buffer(const ImageCreateInfo &info, const ImageInitialData *initial); InitialImageBuffer create_image_staging_buffer(const TextureFormatLayout &layout); #ifndef _WIN32 ImageHandle create_imported_image(int fd, VkDeviceSize size, uint32_t memory_type, VkExternalMemoryHandleTypeFlagBitsKHR handle_type, const ImageCreateInfo &create_info); #endif // Create image view, buffer views and samplers. ImageViewHandle create_image_view(const ImageViewCreateInfo &view_info); BufferViewHandle create_buffer_view(const BufferViewCreateInfo &view_info); SamplerHandle create_sampler(const SamplerCreateInfo &info); // Render pass helpers. bool image_format_is_supported(VkFormat format, VkFormatFeatureFlags required, VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL) const; void get_format_properties(VkFormat format, VkFormatProperties *properties); bool get_image_format_properties(VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties *properties); VkFormat get_default_depth_stencil_format() const; VkFormat get_default_depth_format() const; ImageView &get_transient_attachment(unsigned width, unsigned height, VkFormat format, unsigned index = 0, unsigned samples = 1, unsigned layers = 1); RenderPassInfo get_swapchain_render_pass(SwapchainRenderPass style); // Request semaphores. Semaphore request_semaphore(); Semaphore request_external_semaphore(VkSemaphore semaphore, bool signalled); #ifndef _WIN32 Semaphore request_imported_semaphore(int fd, VkExternalSemaphoreHandleTypeFlagBitsKHR handle_type); #endif VkDevice get_device() { return device; } const VkPhysicalDeviceMemoryProperties &get_memory_properties() const { return mem_props; } const VkPhysicalDeviceProperties &get_gpu_properties() const { return gpu_props; } const Sampler &get_stock_sampler(StockSampler sampler) const; #ifdef GRANITE_VULKAN_FILESYSTEM ShaderManager &get_shader_manager(); TextureManager &get_texture_manager(); void init_shader_manager_cache(); void flush_shader_manager_cache(); #endif // For some platforms, the device and queue might be shared, possibly across threads, so need some mechanism to // lock the global device and queue. void set_queue_lock(std::function lock_callback, std::function unlock_callback); const ImplementationWorkarounds &get_workarounds() const { return workarounds; } const DeviceFeatures &get_device_features() const { return ext; } bool swapchain_touched() const; private: VkInstance instance = VK_NULL_HANDLE; VkPhysicalDevice gpu = VK_NULL_HANDLE; VkDevice device = VK_NULL_HANDLE; VkQueue graphics_queue = VK_NULL_HANDLE; VkQueue compute_queue = VK_NULL_HANDLE; VkQueue transfer_queue = VK_NULL_HANDLE; #ifdef GRANITE_VULKAN_MT std::atomic cookie; #else uint64_t cookie = 0; #endif uint64_t allocate_cookie(); void bake_program(Program &program); void request_vertex_block(BufferBlock &block, VkDeviceSize size); void request_index_block(BufferBlock &block, VkDeviceSize size); void request_uniform_block(BufferBlock &block, VkDeviceSize size); void request_staging_block(BufferBlock &block, VkDeviceSize size); QueryPoolHandle write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlagBits stage); void set_acquire_semaphore(unsigned index, Semaphore acquire); Semaphore consume_release_semaphore(); PipelineLayout *request_pipeline_layout(const CombinedResourceLayout &layout); DescriptorSetAllocator *request_descriptor_set_allocator(const DescriptorSetLayout &layout, const uint32_t *stages_for_sets); const Framebuffer &request_framebuffer(const RenderPassInfo &info); const RenderPass &request_render_pass(const RenderPassInfo &info, bool compatible); VkPhysicalDeviceMemoryProperties mem_props; VkPhysicalDeviceProperties gpu_props; DeviceFeatures ext; void init_stock_samplers(); // Make sure this is deleted last. HandlePool handle_pool; struct Managers { DeviceAllocator memory; FenceManager fence; SemaphoreManager semaphore; EventManager event; BufferPool vbo, ibo, ubo, staging; }; Managers managers; struct { #ifdef GRANITE_VULKAN_MT std::mutex lock; std::condition_variable cond; #endif unsigned counter = 0; } lock; void add_frame_counter(); void decrement_frame_counter(); struct PerFrame { PerFrame(Device *device); ~PerFrame(); void operator=(const PerFrame &) = delete; PerFrame(const PerFrame &) = delete; void begin(); VkDevice device; Managers &managers; std::vector graphics_cmd_pool; std::vector compute_cmd_pool; std::vector transfer_cmd_pool; QueryPool query_pool; std::vector vbo_blocks; std::vector ibo_blocks; std::vector ubo_blocks; std::vector staging_blocks; std::vector wait_fences; std::vector recycle_fences; std::vector allocations; std::vector destroyed_framebuffers; std::vector destroyed_samplers; std::vector destroyed_pipelines; std::vector destroyed_image_views; std::vector destroyed_buffer_views; std::vector destroyed_images; std::vector destroyed_buffers; std::vector graphics_submissions; std::vector compute_submissions; std::vector transfer_submissions; std::vector recycled_semaphores; std::vector recycled_events; std::vector destroyed_semaphores; std::vector keep_alive_images; }; // The per frame structure must be destroyed after // the hashmap data structures below, so it must be declared before. std::vector> per_frame; struct { Semaphore acquire; Semaphore release; bool touched = false; bool consumed = false; std::vector swapchain; unsigned index = 0; } wsi; struct QueueData { std::vector wait_semaphores; std::vector wait_stages; bool need_fence = false; } graphics, compute, transfer; // Pending buffers which need to be copied from CPU to GPU before submitting graphics or compute work. struct { std::vector vbo; std::vector ibo; std::vector ubo; } dma; void submit_queue(CommandBuffer::Type type, VkFence *fence, unsigned semaphore_count = 0, Semaphore *semaphore = nullptr); PerFrame &frame() { VK_ASSERT(frame_context_index < per_frame.size()); VK_ASSERT(per_frame[frame_context_index]); return *per_frame[frame_context_index]; } const PerFrame &frame() const { VK_ASSERT(frame_context_index < per_frame.size()); VK_ASSERT(per_frame[frame_context_index]); return *per_frame[frame_context_index]; } unsigned frame_context_index = 0; uint32_t graphics_queue_family_index = 0; uint32_t compute_queue_family_index = 0; uint32_t transfer_queue_family_index = 0; uint32_t find_memory_type(BufferDomain domain, uint32_t mask); uint32_t find_memory_type(ImageDomain domain, uint32_t mask); bool memory_type_is_device_optimal(uint32_t type) const; bool memory_type_is_host_visible(uint32_t type) const; SamplerHandle samplers[static_cast(StockSampler::Count)]; VulkanCache pipeline_layouts; VulkanCache descriptor_set_allocators; VulkanCache render_passes; VulkanCache shaders; VulkanCache programs; FramebufferAllocator framebuffer_allocator; TransientAttachmentAllocator transient_allocator; VkPipelineCache pipeline_cache = VK_NULL_HANDLE; SamplerHandle create_sampler(const SamplerCreateInfo &info, StockSampler sampler); void init_pipeline_cache(); void flush_pipeline_cache(); CommandPool &get_command_pool(CommandBuffer::Type type, unsigned thread); QueueData &get_queue_data(CommandBuffer::Type type); std::vector &get_queue_submissions(CommandBuffer::Type type); void clear_wait_semaphores(); void submit_staging(CommandBufferHandle &cmd, VkBufferUsageFlags usage, bool flush); PipelineEvent request_pipeline_event(); std::function queue_lock_callback; std::function queue_unlock_callback; void flush_frame(CommandBuffer::Type type); void sync_buffer_blocks(); void submit_empty_inner(CommandBuffer::Type type, VkFence *fence, unsigned semaphore_count, Semaphore *semaphore); void destroy_buffer(VkBuffer buffer); void destroy_image(VkImage image); void destroy_image_view(VkImageView view); void destroy_buffer_view(VkBufferView view); void destroy_pipeline(VkPipeline pipeline); void destroy_sampler(VkSampler sampler); void destroy_framebuffer(VkFramebuffer framebuffer); void destroy_semaphore(VkSemaphore semaphore); void recycle_semaphore(VkSemaphore semaphore); void destroy_event(VkEvent event); void free_memory(const DeviceAllocation &alloc); void reset_fence(VkFence fence); void keep_handle_alive(ImageHandle handle); void destroy_buffer_nolock(VkBuffer buffer); void destroy_image_nolock(VkImage image); void destroy_image_view_nolock(VkImageView view); void destroy_buffer_view_nolock(VkBufferView view); void destroy_pipeline_nolock(VkPipeline pipeline); void destroy_sampler_nolock(VkSampler sampler); void destroy_framebuffer_nolock(VkFramebuffer framebuffer); void destroy_semaphore_nolock(VkSemaphore semaphore); void recycle_semaphore_nolock(VkSemaphore semaphore); void destroy_event_nolock(VkEvent event); void free_memory_nolock(const DeviceAllocation &alloc); void flush_frame_nolock(); CommandBufferHandle request_command_buffer_nolock(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic); void submit_nolock(CommandBufferHandle cmd, Fence *fence, unsigned semaphore_count, Semaphore *semaphore); void submit_empty_nolock(CommandBuffer::Type type, Fence *fence, unsigned semaphore_count, Semaphore *semaphore); void add_wait_semaphore_nolock(CommandBuffer::Type type, Semaphore semaphore, VkPipelineStageFlags stages, bool flush); void request_vertex_block_nolock(BufferBlock &block, VkDeviceSize size); void request_index_block_nolock(BufferBlock &block, VkDeviceSize size); void request_uniform_block_nolock(BufferBlock &block, VkDeviceSize size); void request_staging_block_nolock(BufferBlock &block, VkDeviceSize size); CommandBufferHandle request_secondary_command_buffer_for_thread(unsigned thread_index, const Framebuffer *framebuffer, unsigned subpass, CommandBuffer::Type type = CommandBuffer::Type::Generic); void add_frame_counter_nolock(); void decrement_frame_counter_nolock(); void submit_secondary(CommandBuffer &primary, CommandBuffer &secondary); void wait_idle_nolock(); void end_frame_nolock(); Fence request_fence(); #ifdef GRANITE_VULKAN_FILESYSTEM ShaderManager shader_manager; TextureManager texture_manager; #endif std::string get_pipeline_cache_string() const; #ifdef GRANITE_VULKAN_FOSSILIZE Fossilize::StateRecorder state_recorder; std::mutex state_recorder_lock; bool enqueue_create_sampler(Fossilize::Hash hash, unsigned index, const VkSamplerCreateInfo *create_info, VkSampler *sampler) override; bool enqueue_create_descriptor_set_layout(Fossilize::Hash hash, unsigned index, const VkDescriptorSetLayoutCreateInfo *create_info, VkDescriptorSetLayout *layout) override; bool enqueue_create_pipeline_layout(Fossilize::Hash hash, unsigned index, const VkPipelineLayoutCreateInfo *create_info, VkPipelineLayout *layout) override; bool enqueue_create_shader_module(Fossilize::Hash hash, unsigned index, const VkShaderModuleCreateInfo *create_info, VkShaderModule *module) override; bool enqueue_create_render_pass(Fossilize::Hash hash, unsigned index, const VkRenderPassCreateInfo *create_info, VkRenderPass *render_pass) override; bool enqueue_create_compute_pipeline(Fossilize::Hash hash, unsigned index, const VkComputePipelineCreateInfo *create_info, VkPipeline *pipeline) override; bool enqueue_create_graphics_pipeline(Fossilize::Hash hash, unsigned index, const VkGraphicsPipelineCreateInfo *create_info, VkPipeline *pipeline) override; void wait_enqueue() override; VkPipeline fossilize_create_graphics_pipeline(Fossilize::Hash hash, VkGraphicsPipelineCreateInfo &info); VkPipeline fossilize_create_compute_pipeline(Fossilize::Hash hash, VkComputePipelineCreateInfo &info); unsigned register_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo &info); unsigned register_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo &info); unsigned register_render_pass(Fossilize::Hash hash, const VkRenderPassCreateInfo &info); unsigned register_descriptor_set_layout(Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo &info); unsigned register_pipeline_layout(Fossilize::Hash hash, const VkPipelineLayoutCreateInfo &info); unsigned register_shader_module(Fossilize::Hash hash, const VkShaderModuleCreateInfo &info); void set_render_pass_handle(unsigned index, VkRenderPass render_pass); void set_descriptor_set_layout_handle(unsigned index, VkDescriptorSetLayout set_layout); void set_pipeline_layout_handle(unsigned index, VkPipelineLayout layout); void set_shader_module_handle(unsigned index, VkShaderModule module); struct { std::unordered_map shader_map; std::unordered_map render_pass_map; #ifdef GRANITE_VULKAN_MT Granite::TaskGroup pipeline_group; #endif } replayer_state; void init_pipeline_state(); void flush_pipeline_state(); #endif ImplementationWorkarounds workarounds; void init_workarounds(); }; }