From 7d763f060eb0fe151a629aa36cce3d7ce076e12a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 18 Jun 2020 17:47:19 -0300 Subject: vk_update_descriptor: Upload descriptor sets data directly Instead of copying to a temporary payload before sending the update task to the worker thread, insert elements to the payload directly. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 +-- .../renderer_vulkan/vk_update_descriptor.cpp | 36 ++++++++-------------- .../renderer_vulkan/vk_update_descriptor.h | 32 +++++++++---------- 3 files changed, 30 insertions(+), 42 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 184b2238a..91da9ff80 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1154,7 +1154,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu const auto sampler = sampler_cache.GetSampler(texture.tsc); update_descriptor_queue.AddSampledImage(sampler, image_view); - const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; sampled_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1180,7 +1180,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); update_descriptor_queue.AddImage(image_view); - const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); *image_layout = VK_IMAGE_LAYOUT_GENERAL; image_views.push_back(ImageView{std::move(view), image_layout}); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 681ecde98..351c048d2 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() { } void VKUpdateDescriptorQueue::Acquire() { - entries.clear(); -} + // Minimum number of entries required. + // This is the maximum number of entries a single draw call migth use. + static constexpr std::size_t MIN_ENTRIES = 0x400; -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, - VkDescriptorSet set) { - if (payload.size() + entries.size() >= payload.max_size()) { + if (payload.size() + MIN_ENTRIES >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); payload.clear(); } + upload_start = &*payload.end(); +} - // TODO(Rodrigo): Rework to write the payload directly - const auto payload_start = payload.data() + payload.size(); - for (const auto& entry : entries) { - if (const auto image = std::get_if(&entry)) { - payload.push_back(*image); - } else if (const auto buffer = std::get_if(&entry)) { - payload.push_back(*buffer); - } else if (const auto texel = std::get_if(&entry)) { - payload.push_back(*texel); - } else { - UNREACHABLE(); - } - } - - scheduler.Record( - [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, payload_start); - }); +void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, + VkDescriptorSet set) { + const void* const data = upload_start; + const vk::Device* const logical = &device.GetLogical(); + scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { + logical->UpdateDescriptorSet(set, update_template, data); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index cc7e3dff4..945320c72 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -15,17 +15,13 @@ namespace Vulkan { class VKDevice; class VKScheduler; -class DescriptorUpdateEntry { -public: - explicit DescriptorUpdateEntry() {} - - DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} +struct DescriptorUpdateEntry { + DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} - DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} + DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} - DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} + DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} -private: union { VkDescriptorImageInfo image; VkDescriptorBufferInfo buffer; @@ -45,32 +41,34 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); void AddSampledImage(VkSampler sampler, VkImageView image_view) { - entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); } void AddImage(VkImageView image_view) { - entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); } void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { - entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); + payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); } void AddTexelBuffer(VkBufferView texel_buffer) { - entries.emplace_back(texel_buffer); + payload.emplace_back(texel_buffer); } - VkImageLayout* GetLastImageLayout() { - return &std::get(entries.back()).imageLayout; + VkImageLayout* LastImageLayout() { + return &payload.back().image.imageLayout; } -private: - using Variant = std::variant; + const VkImageLayout* LastImageLayout() const { + return &payload.back().image.imageLayout; + } +private: const VKDevice& device; VKScheduler& scheduler; - boost::container::static_vector entries; + const DescriptorUpdateEntry* upload_start = nullptr; boost::container::static_vector payload; }; -- cgit v1.2.3 From cf137ea40b8770310773cf9d51ae5e47bdbddf9d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 18 Jun 2020 18:16:21 -0300 Subject: vk_rasterizer: Don't preserve contents on full screen clears There's no need to load contents from the CPU when a clear resets all the contents of the underlying memory. This is already implemented on OpenGL and the texture cache. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 60 +++++++++++++++++++++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 5 +- 2 files changed, 58 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 184b2238a..a5fd68358 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry } } +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { + if (!is_clear) { + return true; + } + // First we have to make sure all clear masks are enabled. + if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || + !regs.clear_buffers.A) { + return true; + } + // If scissors are disabled, the whole screen is cleared + if (!regs.clear_flags.scissor) { + return false; + } + // Then we have to confirm scissor testing clears the whole image + const std::size_t index = regs.clear_buffers.RT; + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || + scissor.max_y < regs.rt[index].height; +} + +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { + // If we are not clearing, the contents have to be preserved + if (!is_clear) { + return true; + } + // For depth stencil clears we only have to confirm scissor test covers the whole image + if (!regs.clear_flags.scissor) { + return false; + } + // Make sure the clear cover the whole image + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || + scissor.max_y < regs.zeta_height; +} + } // Anonymous namespace class BufferBindings final { @@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(); + const Texceptions texceptions = UpdateAttachments(false); SetupImageTransitions(texceptions, color_attachments, zeta_attachment); key.renderpass_params = GetRenderPassParams(texceptions); @@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(); + [[maybe_unused]] const auto texceptions = UpdateAttachments(true); DEBUG_ASSERT(texceptions.none()); SetupImageTransitions(0, color_attachments, zeta_attachment); @@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { +RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& dirty = system.GPU().Maxwell3D().dirty.flags; + auto& maxwell3d = system.GPU().Maxwell3D(); + auto& dirty = maxwell3d.dirty.flags; + auto& regs = maxwell3d.regs; + const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { Texceptions texceptions; for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { if (update_rendertargets) { - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); + const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { texceptions[rt] = true; @@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { } if (update_rendertargets) { - zeta_attachment = texture_cache.GetDepthBufferSurface(true); + const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); + zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { texceptions[ZETA_TEXCEPTION_INDEX] = true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c8c187606..83e00e7e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -159,7 +159,10 @@ private: void FlushWork(); - Texceptions UpdateAttachments(); + /// @brief Updates the currently bound attachments + /// @param is_clear True when the framebuffer is updated as a clear + /// @return Bitfield of attachments being used as sampled textures + Texceptions UpdateAttachments(bool is_clear); std::tuple ConfigureFramebuffers(VkRenderPass renderpass); -- cgit v1.2.3 From a6e5b84d1fbfa976819645d8b7234d847756fc88 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 23:01:56 -0400 Subject: vulkan/wrapper: Remove noexcept from GetSurfaceCapabilitiesKHR() Check() can throw an exception if the Vulkan result isn't successful. We remove the check so that std::terminate isn't outright called and allows for better debugging (should it ever actually fail). --- src/video_core/renderer_vulkan/wrapper.cpp | 3 +-- src/video_core/renderer_vulkan/wrapper.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 2ce9b0626..42eff85d3 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -725,8 +725,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s return supported == VK_TRUE; } -VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const - noexcept { +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const { VkSurfaceCapabilitiesKHR capabilities; Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); return capabilities; diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 98937a77a..da42ca88e 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -779,7 +779,7 @@ public: bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; - VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; + VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const; std::vector GetSurfaceFormatsKHR(VkSurfaceKHR) const; -- cgit v1.2.3 From 2f09c7ddd314f03da0fbafacfcae6b0a47a209ae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Jun 2020 04:10:45 -0300 Subject: renderer_vulkan: Update validation layer name and test before enabling Update validation layer string to VK_LAYER_KHRONOS_validation. While we are at it, properly check for available validation layers before enabling them. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 28 ++++++++++++++++++---- src/video_core/renderer_vulkan/wrapper.cpp | 16 ++++++++++++- src/video_core/renderer_vulkan/wrapper.h | 4 ++++ 3 files changed, 43 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cd9673d1f..2d9b18ed9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc } } - static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; - vk::Span layers = layers_data; - if (!enable_layers) { - layers = {}; + std::vector layers; + layers.reserve(1); + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + + for (auto layer_it = layers.begin(); layer_it != layers.end();) { + const char* const layer = *layer_it; + const auto it = std::find_if( + layer_properties->begin(), layer_properties->end(), + [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + layer_it = layers.erase(layer_it); + } else { + ++layer_it; + } } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 42eff85d3..0d485a662 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { bool Load(InstanceDispatch& dld) noexcept { #define X(name) Proc(dld.name, dld, #name) - return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && + X(vkEnumerateInstanceLayerProperties); #undef X } @@ -770,4 +771,17 @@ std::optional> EnumerateInstanceExtensionProp return properties; } +std::optional> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector properties(num); + if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index da42ca88e..d56fdb3f9 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -141,6 +141,7 @@ struct InstanceDispatch { PFN_vkCreateInstance vkCreateInstance; PFN_vkDestroyInstance vkDestroyInstance; PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; PFN_vkCreateDevice vkCreateDevice; @@ -996,4 +997,7 @@ private: std::optional> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld); +std::optional> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld); + } // namespace Vulkan::vk -- cgit v1.2.3 From 32485917ba7cb7b2f0cad766c0897365294650a7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 11 May 2020 16:35:04 -0300 Subject: gl_buffer_cache: Mark buffers as resident Make stream buffer and cached buffers as resident and query their address. This allows us to use GPU addresses for several proprietary Nvidia extensions. --- src/video_core/buffer_cache/buffer_cache.h | 21 ++++++----- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 24 ++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 20 +++++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 44 +++++++++++----------- .../renderer_opengl/gl_stream_buffer.cpp | 11 +++++- src/video_core/renderer_opengl/gl_stream_buffer.h | 11 +++++- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 ++++++++------- src/video_core/renderer_vulkan/vk_stream_buffer.h | 6 ++- 10 files changed, 111 insertions(+), 67 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bae1d527c..6ea59253a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -41,7 +41,11 @@ class BufferCache { static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; public: - using BufferInfo = std::pair; + struct BufferInfo { + BufferType handle; + u64 offset; + u64 address; + }; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { @@ -50,7 +54,7 @@ public: auto& memory_manager = system.GPU().MemoryManager(); const std::optional cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } const VAddr cpu_addr = *cpu_addr_opt; @@ -88,7 +92,7 @@ public: Buffer* const block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); @@ -101,7 +105,7 @@ public: } } - return {block->Handle(), static_cast(block->Offset(cpu_addr))}; + return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -254,13 +258,12 @@ public: committed_flushes.pop_front(); } - virtual BufferType GetEmptyBuffer(std::size_t size) = 0; + virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr stream_buffer_) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, - stream_buffer_handle{stream_buffer->Handle()} {} + std::unique_ptr stream_buffer) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} ~BufferCache() = default; @@ -449,7 +452,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {stream_buffer_handle, uploaded_offset}; + return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; } void AlignBuffer(std::size_t alignment) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e09b47f57 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,28 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } Buffer::~Buffer() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,11 +55,11 @@ OGLBufferCache::~OGLBufferCache() { } std::shared_ptr OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(cpu_addr, size); + return std::make_shared(device, cpu_addr, size); } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -79,8 +86,9 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..6462cfae5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,20 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(VAddr cpu_addr, const std::size_t size); + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - GLuint Handle() const { + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + u64 gpu_address = 0; }; using GenericBufferCache = VideoCommon::BufferCache; @@ -43,7 +48,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -64,10 +69,13 @@ protected: BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array - cbufs; + std::array cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..7cb378a71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -253,8 +253,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { glBindVertexBuffer(static_cast(index), 0, 0, vertex_array.stride); continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast(index), vertex_buffer, vertex_buffer_offset, + const auto info = buffer_cache.UploadMemory(start, size); + glBindVertexBuffer(static_cast(index), info.handle, info.offset, vertex_array.stride); } } @@ -285,9 +285,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +643,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast(sizeof(ubo))); } @@ -956,8 +956,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -970,24 +969,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const GPUVAddr gpu_addr = buffer.address; - auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); if (device.UseAssemblyShaders()) { UNIMPLEMENTED_IF(use_unified); - if (offset != 0) { + if (info.offset != 0) { const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } if (use_unified) { - glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } } @@ -1023,9 +1023,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast(size)); } @@ -1712,8 +1711,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aeafcfbfe..164df4feb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -15,7 +16,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) + : buffer_size(size) { gl_buffer.Create(); GLsizeiptr allocate_size = size; @@ -32,6 +34,11 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buff glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); mapped_ptr = static_cast( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } OGLStreamBuffer::~OGLStreamBuffer() { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 826c2e361..e67a82980 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,9 +11,11 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -32,13 +34,18 @@ public: return gl_buffer.handle; } - GLsizeiptr Size() const { + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { return buffer_size; } private: OGLBuffer gl_buffer; + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; GLintptr mapped_offset = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..df258d7a4 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -71,14 +71,14 @@ std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s return std::make_shared(device, memory_manager, cpu_addr, size); } -VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); const auto& empty = staging_pool.GetUnusedBuffer(size, false); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return *empty.handle; + return {*empty.handle, 0, 0}; } void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..682383ff2 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -33,6 +33,10 @@ public: return *buffer.handle; } + u64 Address() const { + return 0; + } + private: VKBuffer buffer; }; @@ -44,7 +48,7 @@ public: VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - VkBuffer GetEmptyBuffer(std::size_t size) override; + BufferInfo GetEmptyBuffer(std::size_t size) override; protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 29001953c..e3714ee6d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() { UNIMPLEMENTED_IF(binding.buffer_offset != 0); const GPUVAddr gpu_addr = binding.Address(); - const auto size = static_cast(binding.buffer_size); - const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + const VkDeviceSize size = static_cast(binding.buffer_size); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); @@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); continue; } - const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(buffer, offset); + const auto info = buffer_cache.UploadMemory(start, size); + buffer_bindings.AddVertexBinding(info.handle, info.offset); } } @@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; std::tie(buffer, offset) = quad_indexed_pass.Assemble( regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); @@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto [buffer_handle, offset] = + const auto info = buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); - - update_descriptor_queue.AddBuffer(buffer_handle, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { @@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. static constexpr std::size_t dummy_size = 4; - const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); - update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); + const auto info = buffer_cache.GetEmptyBuffer(dummy_size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; } - const auto [buffer, offset] = buffer_cache.UploadMemory( + const auto info = buffer_cache.UploadMemory( actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); - update_descriptor_queue.AddBuffer(buffer, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,14 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - VkBuffer Handle() const { + VkBuffer Handle() const noexcept { return *buffer; } + u64 Address() const noexcept { + return 0; + } + private: struct Watch final { VKFenceWatch fence; -- cgit v1.2.3 From 32a2dcd4153f4e2aea7b5f88c85d8a352f647f12 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Jun 2020 20:47:48 -0300 Subject: buffer_cache: Use buffer methods instead of cache virtual methods --- src/video_core/buffer_cache/buffer_cache.h | 23 ++---- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 38 +++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 16 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 89 +++++++++++----------- src/video_core/renderer_vulkan/vk_buffer_cache.h | 23 +++--- 5 files changed, 90 insertions(+), 99 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6ea59253a..cf8bdd021 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -269,15 +269,6 @@ protected: virtual std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) = 0; - - virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) = 0; - - virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) = 0; - virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { return {}; } @@ -339,11 +330,11 @@ private: const VAddr cpu_addr_end = cpu_addr + size; if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); + block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); + block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } @@ -402,7 +393,7 @@ private: } staging_buffer.resize(size); system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); + block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -439,7 +430,7 @@ private: const std::size_t size = map->end - map->start; staging_buffer.resize(size); - DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); + block->Download(block->Offset(map->start), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -467,7 +458,7 @@ private: const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; const VAddr cpu_addr = buffer->CpuAddr(); std::shared_ptr new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(*buffer, *new_buffer, 0, 0, old_size); + new_buffer->CopyFrom(*buffer, 0, 0, old_size); QueueDestruction(std::move(buffer)); const VAddr cpu_addr_end = cpu_addr + new_size - 1; @@ -489,8 +480,8 @@ private: const std::size_t new_size = size_1 + size_2; std::shared_ptr new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); - CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); + new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); + new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); QueueDestruction(std::move(first)); QueueDestruction(std::move(second)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e09b47f57..d9f7b4cc6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,6 +34,24 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { + glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glGetNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), + static_cast(dst_offset), static_cast(size)); +} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device_, std::size_t stream_size) : GenericBufferCache{rasterizer, system, @@ -62,26 +80,6 @@ OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { return {0, 0, 0}; } -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast(src_offset), - static_cast(dst_offset), static_cast(size)); -} - OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6462cfae5..59d95adbc 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -28,6 +28,13 @@ public: explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + GLuint Handle() const noexcept { return gl_buffer.handle; } @@ -57,15 +64,6 @@ public: protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index df258d7a4..f10f96cd8 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,9 +37,9 @@ std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { VkBufferCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; ci.pNext = nullptr; @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp Buffer::~Buffer() = default; -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} - -VKBufferCache::~VKBufferCache() = default; - -std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(device, memory_manager, cpu_addr, size); -} - -VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { - size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.GetUnusedBuffer(size, false); - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, size, 0); - }); - return {*empty.handle, 0, 0}; -} - -void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st }); } -void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, + + const VkBuffer dst_buffer = Handle(); + scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t }); } +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +std::shared_ptr VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared(device, memory_manager, scheduler, staging_pool, cpu_addr, + size); +} + +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); + }); + return {*empty.handle, 0, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 682383ff2..3630aca77 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -25,10 +25,17 @@ class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size); + explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + VkBuffer Handle() const { return *buffer.handle; } @@ -38,6 +45,9 @@ public: } private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; + VKBuffer buffer; }; @@ -53,15 +63,6 @@ public: protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - private: const VKDevice& device; VKMemoryManager& memory_manager; -- cgit v1.2.3 From 6481d91e4a5b5fbae899c3a7924af0b132c16bc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Jun 2020 16:58:40 -0300 Subject: gl_buffer_cache: Copy to buffers created as STREAM_READ before downloading After marking buffers as resident, Nvidia's driver seems to take a slow path. To workaround this issue, copy to a STREAM_READ buffer and then call GetNamedBufferSubData on it. This is a temporary solution until we have asynchronous flushing. --- src/video_core/buffer_cache/buffer_cache.h | 6 ++---- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 17 ++++++++++++----- src/video_core/renderer_opengl/gl_buffer_cache.h | 7 ++++--- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 6 +++--- src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 +++--- 5 files changed, 24 insertions(+), 18 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index cf8bdd021..c6479af9f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -322,8 +322,7 @@ protected: } private: - MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, - std::size_t size) { + MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); @@ -377,8 +376,7 @@ private: return map; } - void UpdateBlock(const Buffer* block, VAddr start, VAddr end, - const VectorMapInterval& overlaps) { + void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d9f7b4cc6..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,20 +34,27 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), data); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + const GLsizeiptr gl_size = static_cast(size); + const GLintptr gl_offset = static_cast(offset); + if (read_buffer.handle == 0) { + read_buffer.Create(); + glNamedBufferData(read_buffer.handle, static_cast(Size()), nullptr, + GL_STREAM_READ); + } glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), - data); + glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); + glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const { + std::size_t size) { glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), static_cast(dst_offset), static_cast(size)); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 59d95adbc..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -28,12 +28,12 @@ public: explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data) const; + void Upload(std::size_t offset, std::size_t size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data) const; + void Download(std::size_t offset, std::size_t size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const; + std::size_t size); GLuint Handle() const noexcept { return gl_buffer.handle; @@ -45,6 +45,7 @@ public: private: OGLBuffer gl_buffer; + OGLBuffer read_buffer; u64 gpu_address = 0; }; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index f10f96cd8..2be38d419 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -56,7 +56,7 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); std::memcpy(staging.commit->Map(size), data, size); @@ -81,7 +81,7 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const }); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); @@ -110,7 +110,7 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const { + std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer dst_buffer = Handle(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3630aca77..991ee451c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -29,12 +29,12 @@ public: VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data) const; + void Upload(std::size_t offset, std::size_t size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data) const; + void Download(std::size_t offset, std::size_t size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const; + std::size_t size); VkBuffer Handle() const { return *buffer.handle; -- cgit v1.2.3 From 528b19a84287167d7699465e495b196d216b99db Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Apr 2020 09:48:53 -0400 Subject: General: Tune the priority of main emulation threads so they have higher priority than less important helper threads. --- src/common/thread.cpp | 46 +++++++++++++++++++++++++ src/common/thread.h | 9 +++++ src/core/core_timing.cpp | 1 + src/core/cpu_manager.cpp | 1 + src/video_core/gpu_thread.cpp | 1 + src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 ++ 6 files changed, 60 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/common/thread.cpp b/src/common/thread.cpp index c9684aed9..33c8437f5 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -25,6 +25,52 @@ namespace Common { +#ifdef _WIN32 + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + auto handle = GetCurrentThread(); + int windows_priority = 0; + switch (new_priority) { + case ThreadPriority::Low: + windows_priority = THREAD_PRIORITY_BELOW_NORMAL; + break; + case ThreadPriority::Normal: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + case ThreadPriority::High: + windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; + break; + case ThreadPriority::VeryHigh: + windows_priority = THREAD_PRIORITY_HIGHEST; + break; + default: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + } + SetThreadPriority(handle, windows_priority); +} + +#else + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + pthread_t this_thread = pthread_self(); + + s32 max_prio = sched_get_priority_max(SCHED_OTHER); + s32 min_prio = sched_get_priority_min(SCHED_OTHER); + u32 level = static_cast(new_priority) + 1; + + struct sched_param params; + if (max_prio > min_prio) { + params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4; + } else { + params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; + } + + pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); +} + +#endif + #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. diff --git a/src/common/thread.h b/src/common/thread.h index 127cc7e23..52b359413 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -86,6 +86,15 @@ private: std::size_t generation = 0; // Incremented once each time the barrier is used }; +enum class ThreadPriority : u32 { + Low = 0, + Normal = 1, + High = 2, + VeryHigh = 3, +}; + +void SetCurrentThreadPriority(ThreadPriority new_priority); + void SetCurrentThreadName(const char* name); } // namespace Common diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index b02119494..032b29e33 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -48,6 +48,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { std::string name = "yuzu:HostTiming"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); } diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 63c578852..32afcf3ae 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -337,6 +337,7 @@ void CpuManager::RunThread(std::size_t core) { } MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); auto& data = core_data[core]; data.enter_barrier = std::make_unique(); data.exit_barrier = std::make_unique(); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 323185bfc..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -22,6 +22,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@ #include #include "common/microprofile.h" +#include "common/thread.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { } void VKScheduler::WorkerThread() { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); -- cgit v1.2.3 From 4a35df337b7aaa3d4056a5b10da471bff11b4b2f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 28 Jun 2020 02:49:17 -0400 Subject: maxwell_to_vk: Reorder vertex formats and add A2B10G10R10 for all types except float --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 144 +++++++++++------------ 1 file changed, 69 insertions(+), 75 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 1f2b6734b..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return VK_FORMAT_R8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8: + return VK_FORMAT_R8G8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return VK_FORMAT_R8G8B8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + } + break; case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; - default: - break; } break; - case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_UNORM; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_UNORM; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_UNORM; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_UNORM; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_UNORM; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_UNORM; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_UNORM; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; + return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - default: - break; + return VK_FORMAT_A2B10G10R10_USCALED_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedInt: + case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SINT; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SINT; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SINT; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SINT; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SINT; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SINT; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SINT; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SINT; - case Maxwell::VertexAttribute::Size::Size_32: - return VK_FORMAT_R32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32: - return VK_FORMAT_R32G32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return VK_FORMAT_R32G32B32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return VK_FORMAT_R32G32B32A32_SINT; - default: - break; + return VK_FORMAT_R16G16B16A16_SSCALED; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_UINT; - default: - break; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_USCALED; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_USCALED; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_USCALED; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_USCALED; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_USCALED; + return VK_FORMAT_R16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_USCALED; + return VK_FORMAT_R16G16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_USCALED; + return VK_FORMAT_R16G16B16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_USCALED; - default: - break; + return VK_FORMAT_R16G16B16A16_SINT; + case Maxwell::VertexAttribute::Size::Size_32: + return VK_FORMAT_R32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32: + return VK_FORMAT_R32G32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return VK_FORMAT_R32G32B32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return VK_FORMAT_R32G32B32A32_SINT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: switch (size) { - case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SSCALED; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SSCALED; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SSCALED; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SSCALED; - default: - break; - } - break; - case Maxwell::VertexAttribute::Type::Float: - switch (size) { + return VK_FORMAT_R16G16B16A16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32: return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: @@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - default: - break; } break; } -- cgit v1.2.3