diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/compatible_formats.cpp | 162 | ||||
| -rw-r--r-- | src/video_core/compatible_formats.h | 32 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/macro/macro.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 144 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/wrapper.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/wrapper.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 25 |
20 files changed, 407 insertions, 151 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2dc752aa9..21c46a567 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(video_core STATIC buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp buffer_cache/map_interval.h + compatible_formats.cpp + compatible_formats.h dirty_flags.cpp dirty_flags.h dma_pusher.cpp diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp new file mode 100644 index 000000000..6c426b035 --- /dev/null +++ b/src/video_core/compatible_formats.cpp @@ -0,0 +1,162 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +namespace { + +// Compatibility table taken from Table 3.X.2 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt + +constexpr std::array VIEW_CLASS_128_BITS = { + PixelFormat::RGBA32F, + PixelFormat::RGBA32UI, +}; +// Missing formats: +// PixelFormat::RGBA32I + +constexpr std::array VIEW_CLASS_96_BITS = { + PixelFormat::RGB32F, +}; +// Missing formats: +// PixelFormat::RGB32UI, +// PixelFormat::RGB32I, + +constexpr std::array VIEW_CLASS_64_BITS = { + PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, + PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I + +// TODO: How should we handle 48 bits? + +constexpr std::array VIEW_CLASS_32_BITS = { + PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, + PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, + PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, + PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, + PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, + PixelFormat::BGRA8_SRGB, +}; +// Missing formats: +// PixelFormat::RGBA8UI +// PixelFormat::RGBA8I +// PixelFormat::RGB10_A2_UI + +// TODO: How should we handle 24 bits? + +constexpr std::array VIEW_CLASS_16_BITS = { + PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, + PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, +}; +// Missing formats: +// PixelFormat::RG8I + +constexpr std::array VIEW_CLASS_8_BITS = { + PixelFormat::R8UI, + PixelFormat::R8U, +}; +// Missing formats: +// PixelFormat::R8I +// PixelFormat::R8S + +constexpr std::array VIEW_CLASS_RGTC1_RED = { + PixelFormat::DXN1, +}; +// Missing formats: +// COMPRESSED_SIGNED_RED_RGTC1 + +constexpr std::array VIEW_CLASS_RGTC2_RG = { + PixelFormat::DXN2UNORM, + PixelFormat::DXN2SNORM, +}; + +constexpr std::array VIEW_CLASS_BPTC_UNORM = { + PixelFormat::BC7U, + PixelFormat::BC7U_SRGB, +}; + +constexpr std::array VIEW_CLASS_BPTC_FLOAT = { + PixelFormat::BC6H_SF16, + PixelFormat::BC6H_UF16, +}; + +// Compatibility table taken from Table 4.X.1 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt + +constexpr std::array COPY_CLASS_128_BITS = { + PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, + PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, + PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, + PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, +}; +// Missing formats: +// PixelFormat::RGBA32I +// COMPRESSED_RG_RGTC2 + +constexpr std::array COPY_CLASS_64_BITS = { + PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, + PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, + +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I, +// COMPRESSED_RGB_S3TC_DXT1_EXT +// COMPRESSED_SRGB_S3TC_DXT1_EXT +// COMPRESSED_RGBA_S3TC_DXT1_EXT +// COMPRESSED_SIGNED_RED_RGTC1 + +void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { + compatiblity[format_a][format_b] = true; + compatiblity[format_b][format_a] = true; +} + +void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { + Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); +} + +template <typename Range> +void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { + for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { + for (auto it_b = it_a; it_b != range.end(); ++it_b) { + Enable(compatibility, *it_a, *it_b); + } + } +} + +} // Anonymous namespace + +FormatCompatibility::FormatCompatibility() { + for (size_t i = 0; i < MaxPixelFormat; ++i) { + // Identity is allowed + Enable(view, i, i); + } + + EnableRange(view, VIEW_CLASS_128_BITS); + EnableRange(view, VIEW_CLASS_96_BITS); + EnableRange(view, VIEW_CLASS_64_BITS); + EnableRange(view, VIEW_CLASS_32_BITS); + EnableRange(view, VIEW_CLASS_16_BITS); + EnableRange(view, VIEW_CLASS_8_BITS); + EnableRange(view, VIEW_CLASS_RGTC1_RED); + EnableRange(view, VIEW_CLASS_RGTC2_RG); + EnableRange(view, VIEW_CLASS_BPTC_UNORM); + EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + + copy = view; + EnableRange(copy, COPY_CLASS_128_BITS); + EnableRange(copy, COPY_CLASS_64_BITS); +} + +} // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h new file mode 100644 index 000000000..d1082566d --- /dev/null +++ b/src/video_core/compatible_formats.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +class FormatCompatibility { +public: + using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; + + explicit FormatCompatibility(); + + bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { + return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; + } + + bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { + return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; + } + +private: + Table view; + Table copy; +}; + +} // namespace VideoCore::Surface diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..482e49711 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> + #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" @@ -154,8 +156,7 @@ u64 GPU::GetTicks() const { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - const u64 cpu_ticks = system.CoreTiming().GetTicks(); - u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); if (Settings::values.use_fast_gpu_time) { nanoseconds /= 256; } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a1b4c305c..2c42483bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -284,6 +284,12 @@ public: /// core timing events. virtual void Start() = 0; + /// Obtain the CPU Context + virtual void ObtainContext() = 0; + + /// Release the CPU Context + virtual void ReleaseContext() = 0; + /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 53305ab43..7b855f63e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - cpu_context->MakeCurrent(); gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } +void GPUAsynch::ObtainContext() { + cpu_context->MakeCurrent(); +} + +void GPUAsynch::ReleaseContext() { + cpu_context->DoneCurrent(); +} + void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 517658612..15e9f1d38 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,8 @@ public: ~GPUAsynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 6f38a672a..aaeb9811d 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() { +void GPUSynch::Start() {} + +void GPUSynch::ObtainContext() { context->MakeCurrent(); } +void GPUSynch::ReleaseContext() { + context->DoneCurrent(); +} + void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 4a6e9a01d..762c20aa5 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,8 @@ public: ~GPUSynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c3bb4fe06..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/settings.h" @@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread { static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, SynchState& state) { - MicroProfileOnThreadCreate("GpuThread"); + std::string name = "yuzu:GPU"; + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context while (state.queue.Empty()) diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index ef7dad349..a50e7b4e0 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <optional> #include <boost/container_hash/hash.hpp> #include "common/assert.h" #include "common/logging/log.h" @@ -35,22 +36,40 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, } } else { // Macro not compiled, check if it's uploaded and if so, compile it - auto macro_code = uploaded_macro_code.find(method); + std::optional<u32> mid_method = std::nullopt; + const auto macro_code = uploaded_macro_code.find(method); if (macro_code == uploaded_macro_code.end()) { - UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); - return; + for (const auto& [method_base, code] : uploaded_macro_code) { + if (method >= method_base && (method - method_base) < code.size()) { + mid_method = method_base; + break; + } + } + if (!mid_method.has_value()) { + UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); + return; + } } auto& cache_info = macro_cache[method]; - cache_info.hash = boost::hash_value(macro_code->second); - cache_info.lle_program = Compile(macro_code->second); + + if (!mid_method.has_value()) { + cache_info.lle_program = Compile(macro_code->second); + cache_info.hash = boost::hash_value(macro_code->second); + } else { + const auto& macro_cached = uploaded_macro_code[mid_method.value()]; + const auto rebased_method = method - mid_method.value(); + auto& code = uploaded_macro_code[method]; + code.resize(macro_cached.size() - rebased_method); + std::memcpy(code.data(), macro_cached.data() + rebased_method, + code.size() * sizeof(u32)); + cache_info.hash = boost::hash_value(code); + cache_info.lle_program = Compile(code); + } auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); if (hle_program.has_value()) { cache_info.has_hle_program = true; cache_info.hle_program = std::move(hle_program.value()); - } - - if (cache_info.has_hle_program) { cache_info.hle_program->Execute(parameters, method); } else { cache_info.lle_program->Execute(parameters, method); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b6b6659c1..208fc6167 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -188,20 +188,6 @@ bool IsASTCSupported() { return true; } -/// @brief Returns true when a GL_RENDERER is a Turing GPU -/// @param renderer GL_RENDERER string -bool IsTuring(std::string_view renderer) { - static constexpr std::array<std::string_view, 12> TURING_GPUS = { - "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", - "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", - "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", - }; - return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), - [renderer](std::string_view candidate) { - return renderer.find(candidate) != std::string_view::npos; - }); -} - } // Anonymous namespace Device::Device() @@ -213,7 +199,6 @@ Device::Device() const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_turing = is_nvidia && IsTuring(renderer); bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -238,15 +223,12 @@ Device::Device() has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on - // DeleteBuffers. Disable unified memory on these devices. - has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; - use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 362457ffe..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -213,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { glVertexAttribIFormat(gl_index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); + MaxwellToGL::VertexFormat(attrib), attrib.offset); } else { - glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + glVertexAttribFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexFormat(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); } glVertexAttribBinding(gl_index, attrib.buffer); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,10 +24,11 @@ namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { - case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::UnsignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT_2_10_10_10_REV; } break; - case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_UNSIGNED_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_UNSIGNED_SHORT; - } - break; - case Maxwell::VertexAttribute::Type::SignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_SHORT; - } - break; } - UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), attrib.SizeString()); return {}; } @@ -217,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } + case Tegra::Texture::WrapMode::MirrorOnceClampOGL: + if (GL_EXT_texture_mirror_clamp) { + return GL_MIRROR_CLAMP_EXT; + } else { + return GL_MIRROR_CLAMP_TO_EDGE; + } } UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); return GL_REPEAT; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 1f2b6734b..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return VK_FORMAT_R8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8: + return VK_FORMAT_R8G8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return VK_FORMAT_R8G8B8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + } + break; case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; - default: - break; } break; - case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_UNORM; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_UNORM; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_UNORM; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_UNORM; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_UNORM; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_UNORM; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_UNORM; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; + return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - default: - break; + return VK_FORMAT_A2B10G10R10_USCALED_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedInt: + case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SINT; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SINT; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SINT; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SINT; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SINT; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SINT; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SINT; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SINT; - case Maxwell::VertexAttribute::Size::Size_32: - return VK_FORMAT_R32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32: - return VK_FORMAT_R32G32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return VK_FORMAT_R32G32B32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return VK_FORMAT_R32G32B32A32_SINT; - default: - break; + return VK_FORMAT_R16G16B16A16_SSCALED; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_UINT; - default: - break; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_USCALED; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_USCALED; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_USCALED; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_USCALED; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_USCALED; + return VK_FORMAT_R16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_USCALED; + return VK_FORMAT_R16G16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_USCALED; + return VK_FORMAT_R16G16B16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_USCALED; - default: - break; + return VK_FORMAT_R16G16B16A16_SINT; + case Maxwell::VertexAttribute::Size::Size_32: + return VK_FORMAT_R32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32: + return VK_FORMAT_R32G32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return VK_FORMAT_R32G32B32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return VK_FORMAT_R32G32B32A32_SINT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: switch (size) { - case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SSCALED; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SSCALED; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SSCALED; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SSCALED; - default: - break; - } - break; - case Maxwell::VertexAttribute::Type::Float: - switch (size) { + return VK_FORMAT_R16G16B16A16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32: return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: @@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - default: - break; } break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cd9673d1f..2d9b18ed9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc } } - static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; - vk::Span<const char*> layers = layers_data; - if (!enable_layers) { - layers = {}; + std::vector<const char*> layers; + layers.reserve(1); + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + + for (auto layer_it = layers.begin(); layer_it != layers.end();) { + const char* const layer = *layer_it; + const auto it = std::find_if( + layer_properties->begin(), layer_properties->end(), + [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + layer_it = layers.erase(layer_it); + } else { + ++layer_it; + } } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@ #include <utility> #include "common/microprofile.h" +#include "common/thread.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { } void VKScheduler::WorkerThread() { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 42eff85d3..0d485a662 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { bool Load(InstanceDispatch& dld) noexcept { #define X(name) Proc(dld.name, dld, #name) - return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && + X(vkEnumerateInstanceLayerProperties); #undef X } @@ -770,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp return properties; } +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkLayerProperties> properties(num); + if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index da42ca88e..d56fdb3f9 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -141,6 +141,7 @@ struct InstanceDispatch { PFN_vkCreateInstance vkCreateInstance; PFN_vkDestroyInstance vkDestroyInstance; PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; PFN_vkCreateDevice vkCreateDevice; @@ -996,4 +997,7 @@ private: std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld); +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld); + } // namespace Vulkan::vk diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85075e868..6207d8dfe 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -24,6 +24,7 @@ #include "core/core.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/compatible_formats.h" #include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -47,8 +48,8 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::FormatCompatibility; using VideoCore::Surface::PixelFormat; - using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -595,7 +596,7 @@ private: } else { new_surface = GetUncachedSurface(gpu_addr, params); } - const auto& final_params = new_surface->GetSurfaceParams(); + const SurfaceParams& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { if (Settings::IsGPULevelExtreme()) { BufferCopy(current_surface, new_surface); @@ -603,7 +604,7 @@ private: } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + TryCopyImage(current_surface, new_surface, brick); } } Unregister(current_surface); @@ -694,7 +695,7 @@ private: } const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, src_params.depth); - ImageCopy(surface, new_surface, copy_params); + TryCopyImage(surface, new_surface, copy_params); } } if (passed_tests == 0) { @@ -791,7 +792,7 @@ private: const u32 width = params.width; const u32 height = params.height; const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); - ImageCopy(surface, new_surface, copy_params); + TryCopyImage(surface, new_surface, copy_params); } for (const auto& surface : overlaps) { Unregister(surface); @@ -1192,6 +1193,19 @@ private: return {}; } + /// Try to do an image copy logging when formats are incompatible. + void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { + const SurfaceParams& src_params = src->GetSurfaceParams(); + const SurfaceParams& dst_params = dst->GetSurfaceParams(); + if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { + LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", + static_cast<int>(dst_params.pixel_format), + static_cast<int>(src_params.pixel_format)); + return; + } + ImageCopy(src, dst, copy); + } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { return siblings_table[static_cast<std::size_t>(format)]; } @@ -1241,6 +1255,7 @@ private: VideoCore::RasterizerInterface& rasterizer; FormatLookupTable format_lookup_table; + FormatCompatibility format_compatibility; u64 ticks{}; |
