From 18637766efd1ff9a0c22967553983cfda69c96ca Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Nov 2022 16:36:53 +0100 Subject: MacroHLE: Reduce massive calculations on sizing estimation. --- src/common/CMakeLists.txt | 1 + src/common/range_map.h | 139 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 src/common/range_map.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index eb05e46a8..45332cf95 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -97,6 +97,7 @@ add_library(common STATIC point.h precompiled_headers.h quaternion.h + range_map.h reader_writer_queue.h ring_buffer.h ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp diff --git a/src/common/range_map.h b/src/common/range_map.h new file mode 100644 index 000000000..993e21643 --- /dev/null +++ b/src/common/range_map.h @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include + +#include "common/common_types.h" + +namespace Common { + +template +class RangeMap { +private: + using KeyT = std::conditional_t, typename KeyTBase, + std::make_signed_t>; + +public: + explicit RangeMap(ValueT null_value_) : null_value{null_value_} { + container.emplace(std::numeric_limits::min(), null_value); + }; + ~RangeMap() = default; + + void Map(KeyTBase address, KeyTBase address_end, ValueT value) { + KeyT new_address = static_cast(address); + KeyT new_address_end = static_cast(address_end); + if (new_address < 0) { + new_address = 0; + } + if (new_address_end < 0) { + new_address_end = 0; + } + InternalMap(new_address, new_address_end, value); + } + + void Unmap(KeyTBase address, KeyTBase address_end) { + Map(address, address_end, null_value); + } + + [[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const { + const KeyT new_address = static_cast(address); + if (new_address < 0) { + return 0; + } + return ContinousSizeInternal(new_address); + } + + [[nodiscard]] ValueT GetValueAt(KeyT address) const { + const KeyT new_address = static_cast(address); + if (new_address < 0) { + return null_value; + } + return GetValueInternal(new_address); + } + +private: + using MapType = std::map; + using IteratorType = MapType::iterator; + using ConstIteratorType = MapType::const_iterator; + + size_t ContinousSizeInternal(KeyT address) const { + const auto it = GetFirstElemnentBeforeOrOn(address); + if (it == container.end() || it->second == null_value) { + return 0; + } + const auto it_end = std::next(it); + if (it_end == container.end()) { + return std::numeric_limits::max() - address; + } + return it_end->first - address; + } + + ValueT GetValueInternal(KeyT address) const { + const auto it = GetFirstElemnentBeforeOrOn(address); + if (it == container.end()) { + return null_value; + } + return it->second; + } + + ConstIteratorType GetFirstElemnentBeforeOrOn(KeyT address) const { + auto it = container.lower_bound(address); + if (it == container.begin()) { + return it; + } + if (it != container.end() && (it->first == address)) { + return it; + } + --it; + return it; + } + + ValueT GetFirstValueWithin(KeyT address) { + auto it = container.lower_bound(address); + if (it == container.begin()) { + return it->second; + } + if (it == container.end()) [[unlikely]] { // this would be a bug + return null_value; + } + --it; + return it->second; + } + + ValueT GetLastValueWithin(KeyT address) { + auto it = container.upper_bound(address); + if (it == container.end()) { + return null_value; + } + if (it == container.begin()) [[unlikely]] { // this would be a bug + return it->second; + } + --it; + return it->second; + } + + void InternalMap(KeyT address, KeyT address_end, ValueT value) { + const bool must_add_start = GetFirstValueWithin(address) != value; + const ValueT last_value = GetLastValueWithin(address_end); + const bool must_add_end = last_value != value; + auto it = container.lower_bound(address); + const auto it_end = container.upper_bound(address_end); + while (it != it_end) { + it = container.erase(it); + } + if (must_add_start) { + container.emplace(address, value); + } + if (must_add_end) { + container.emplace(address_end, last_value); + } + } + + ValueT null_value; + MapType container; +}; + +} // namespace Common -- cgit v1.2.3 From d09aa0182f18d1ac338ab47009b42fdeb67497a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 24 Dec 2022 19:19:41 -0500 Subject: MacroHLE: Final cleanup and fixes. --- src/common/range_map.h | 8 +- src/shader_recompiler/environment.h | 4 +- src/video_core/buffer_cache/buffer_cache.h | 3 +- src/video_core/engines/draw_manager.cpp | 3 +- src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/macro/macro_hle.cpp | 98 +++++++--------------- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_vulkan/fixed_pipeline_state.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 11 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 +- .../renderer_vulkan/vk_staging_buffer_pool.h | 5 +- src/video_core/vulkan_common/vulkan_device.cpp | 24 ++++-- src/video_core/vulkan_common/vulkan_device.h | 52 ++++++------ src/video_core/vulkan_common/vulkan_wrapper.h | 3 +- 14 files changed, 94 insertions(+), 128 deletions(-) (limited to 'src/common') diff --git a/src/common/range_map.h b/src/common/range_map.h index 993e21643..051e713a7 100644 --- a/src/common/range_map.h +++ b/src/common/range_map.h @@ -13,8 +13,8 @@ namespace Common { template class RangeMap { private: - using KeyT = std::conditional_t, typename KeyTBase, - std::make_signed_t>; + using KeyT = + std::conditional_t, KeyTBase, std::make_signed_t>; public: explicit RangeMap(ValueT null_value_) : null_value{null_value_} { @@ -56,8 +56,8 @@ public: private: using MapType = std::map; - using IteratorType = MapType::iterator; - using ConstIteratorType = MapType::const_iterator; + using IteratorType = typename MapType::iterator; + using ConstIteratorType = typename MapType::const_iterator; size_t ContinousSizeInternal(KeyT address) const { const auto it = GetFirstElemnentBeforeOrOn(address); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index b9b4455f6..8fc359126 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -36,8 +36,8 @@ public: [[nodiscard]] virtual bool HasHLEMacroState() const = 0; - [[nodiscard]] virtual std::optional GetReplaceConstBuffer( - u32 bank, u32 offset) = 0; + [[nodiscard]] virtual std::optional GetReplaceConstBuffer(u32 bank, + u32 offset) = 0; virtual void Dump(u64 hash) = 0; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bdc0681b7..06fd40851 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -200,7 +200,8 @@ public: /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); - void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { + void SetDrawIndirect( + const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { current_draw_indirect = current_draw_indirect_; } diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 183d5403c..feea89c0e 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -97,7 +97,8 @@ void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { ProcessDrawIndirect(true); } -void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { +void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, + u32 index_count) { const auto& regs{maxwell3d->regs}; draw_state.topology = topology; draw_state.index_buffer = regs.index_buffer; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 478ba4dc7..dbefcd715 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3086,7 +3086,7 @@ public: std::unique_ptr draw_manager; friend class DrawManager; - + std::vector inline_index_draw_indexes; GPUVAddr getMacroAddress(size_t index) const { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 294a338d2..3481fcd41 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -47,21 +47,7 @@ public: explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} protected: - void advanceCheck() { - current_value = (current_value + 1) % fibonacci_post; - check_limit = current_value == 0; - if (check_limit) { - const u32 new_fibonacci = fibonacci_pre + fibonacci_post; - fibonacci_pre = fibonacci_post; - fibonacci_post = new_fibonacci; - } - } - Engines::Maxwell3D& maxwell3d; - u32 fibonacci_pre{89}; - u32 fibonacci_post{144}; - u32 current_value{fibonacci_post - 1}; - bool check_limit{}; }; class HLE_771BB18C62444DA0 final : public HLEMacroImpl { @@ -124,12 +110,13 @@ private: maxwell3d.RefreshParameters(); const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + auto topology = static_cast(parameters[0]); const u32 vertex_first = parameters[3]; const u32 vertex_count = parameters[1]; - - if (maxwell3d.AnyParametersDirty() && - maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { + if (!IsTopologySafe(topology) && + static_cast(maxwell3d.GetMaxCurrentVertices()) < + static_cast(vertex_first) + static_cast(vertex_count)) { ASSERT_MSG(false, "Faulty draw!"); return; } @@ -141,9 +128,8 @@ private: maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); } - maxwell3d.draw_manager->DrawArray( - static_cast(parameters[0]), - vertex_first, vertex_count, base_instance, instance_count); + maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, + instance_count); if (extended) { maxwell3d.regs.global_base_instance_index = 0; @@ -166,13 +152,7 @@ public: return; } - advanceCheck(); - if (check_limit) { - maxwell3d.RefreshParameters(); - minimum_limit = std::max(parameters[3], minimum_limit); - } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 base_size = std::max(minimum_limit, estimate); const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; maxwell3d.regs.vertex_id_base = element_base; @@ -191,7 +171,7 @@ public: params.max_draw_counts = 1; params.stride = 0; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); maxwell3d.regs.vertex_id_base = 0x0; @@ -223,8 +203,6 @@ private: maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); } - - u32 minimum_limit{1 << 18}; }; class HLE_MultiLayerClear final : public HLEMacroImpl { @@ -257,10 +235,6 @@ public: return; } - advanceCheck(); - if (check_limit) { - maxwell3d.RefreshParameters(); - } const u32 start_indirect = parameters[0]; const u32 end_indirect = parameters[1]; if (start_indirect >= end_indirect) { @@ -274,20 +248,7 @@ public: const u32 indirect_words = 5 + padding; const u32 stride = indirect_words * sizeof(u32); const std::size_t draw_count = end_indirect - start_indirect; - u32 lowest_first = std::numeric_limits::max(); - u32 highest_limit = std::numeric_limits::min(); - for (std::size_t index = 0; index < draw_count; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 count = parameters[base]; - const u32 first_index = parameters[base + 2]; - lowest_first = std::min(lowest_first, first_index); - highest_limit = std::max(highest_limit, first_index + count); - } - if (check_limit) { - minimum_limit = std::max(highest_limit, minimum_limit); - } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 base_size = std::max(minimum_limit, estimate); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; @@ -301,7 +262,7 @@ public: maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); } @@ -323,7 +284,6 @@ private: return; } const auto topology = static_cast(parameters[2]); - maxwell3d.regs.draw.topology.Assign(topology); const u32 padding = parameters[3]; const std::size_t max_draws = parameters[4]; @@ -345,8 +305,6 @@ private: base_vertex, base_instance, parameters[base + 1]); } } - - u32 minimum_limit{1 << 12}; }; class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { @@ -431,53 +389,53 @@ public: HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { builders.emplace(0x771BB18C62444DA0ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x0D61FC9FAAC9FCADULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x8A4D173EB99A8603ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d, true); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__, true); })); builders.emplace(0x0217920100488FF7ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x3F5E74B9C9A50164ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xEAD26C3E2109B06BULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xC713C83D8F63CCF3ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xD7333D26E0A93EDEULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xEB29B2A09AA06D38ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0xDB1341DBEB4C8AF7ULL, std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr { - return std::make_unique(maxwell3d); + [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d119c2b66..be4f76c18 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -93,8 +93,7 @@ public: void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion( - VAddr addr, u64 size, - VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index f47406347..98ea20b42 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -164,7 +164,8 @@ struct FixedPipelineState { }; void Refresh(const Maxwell& regs); - void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology, bool base_feautures_supported); + void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology, + bool base_feautures_supported); void Refresh3(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c69ebe396..d11383bf1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -227,7 +227,8 @@ struct DefaultSpec { ConfigureFuncPtr ConfigureFunc(const std::array& modules, const std::array& infos) { - return FindSpec(modules, infos); + return FindSpec(modules, infos); } } // Anonymous namespace @@ -505,11 +506,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } - if (is_rescaling) { - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), - rescaling_data.data()); - } + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); if (update_rescaling) { const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c06182807..c661e5b19 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -89,8 +89,7 @@ public: void SignalReference() override; void ReleaseFences() override; void FlushAndInvalidateRegion( - VAddr addr, u64 size, - VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 2906d92a4..4fd15f11a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -1,6 +1,5 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 780f5dede..ea62edb13 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -587,11 +587,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR dynamic_state_3 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, .pNext = nullptr, - .extendedDynamicState3DepthClampEnable = ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3LogicOpEnable = ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEnable = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEquation = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorWriteMask = ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3DepthClampEnable = + ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, + .extendedDynamicState3LogicOpEnable = + ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorBlendEnable = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorBlendEquation = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, + .extendedDynamicState3ColorWriteMask = + ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, }; SetNext(next, dynamic_state_3); } else { @@ -1342,14 +1347,17 @@ std::vector Device::LoadExtensions(bool requires_surface) { features.pNext = &extended_dynamic_state_3; physical.GetFeatures2(features); - ext_extended_dynamic_state_3_blend = extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && + ext_extended_dynamic_state_3_blend = + extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; - ext_extended_dynamic_state_3_enables = extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && + ext_extended_dynamic_state_3_enables = + extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; - ext_extended_dynamic_state_3 = ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; + ext_extended_dynamic_state_3 = + ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; if (ext_extended_dynamic_state_3) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b58ec736f..920a8f4e3 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -486,33 +486,33 @@ private: bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. - bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. - bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. - bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. - bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline - u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline + bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. + bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline + u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d7ae14478..accfad8c1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -1269,7 +1269,8 @@ public: dld->vkCmdSetColorBlendEnableEXT(handle, first, enables.size(), enables.data()); } - void SetColorBlendEquationEXT(u32 first, Span equations) const noexcept { + void SetColorBlendEquationEXT(u32 first, + Span equations) const noexcept { dld->vkCmdSetColorBlendEquationEXT(handle, first, equations.size(), equations.data()); } -- cgit v1.2.3 From a0c697124ced080f58866825e2e323e8682bbd7f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 3 Jan 2023 10:01:25 -0500 Subject: Video_core: Address feedback --- src/common/range_map.h | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 18 ++ .../backend/glsl/emit_glsl_context_get_set.cpp | 6 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 + .../backend/spirv/spirv_emit_context.cpp | 3 + .../backend/spirv/spirv_emit_context.h | 1 + src/shader_recompiler/frontend/ir/attribute.cpp | 2 + src/shader_recompiler/frontend/ir/attribute.h | 1 + .../ir_opt/constant_propagation_pass.cpp | 3 + src/shader_recompiler/shader_info.h | 1 + src/video_core/engines/draw_manager.cpp | 13 +- src/video_core/engines/draw_manager.h | 2 +- src/video_core/engines/maxwell_3d.cpp | 13 +- src/video_core/engines/maxwell_3d.h | 30 +- src/video_core/macro/macro.cpp | 2 +- src/video_core/macro/macro_hle.cpp | 356 ++++++++++++++------- src/video_core/renderer_opengl/gl_rasterizer.cpp | 18 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 27 +- .../renderer_vulkan/vk_state_tracker.cpp | 2 +- src/video_core/shader_environment.cpp | 8 +- 20 files changed, 346 insertions(+), 170 deletions(-) (limited to 'src/common') diff --git a/src/common/range_map.h b/src/common/range_map.h index 051e713a7..79c7ef547 100644 --- a/src/common/range_map.h +++ b/src/common/range_map.h @@ -60,7 +60,7 @@ private: using ConstIteratorType = typename MapType::const_iterator; size_t ContinousSizeInternal(KeyT address) const { - const auto it = GetFirstElemnentBeforeOrOn(address); + const auto it = GetFirstElementBeforeOrOn(address); if (it == container.end() || it->second == null_value) { return 0; } @@ -72,14 +72,14 @@ private: } ValueT GetValueInternal(KeyT address) const { - const auto it = GetFirstElemnentBeforeOrOn(address); + const auto it = GetFirstElementBeforeOrOn(address); if (it == container.end()) { return null_value; } return it->second; } - ConstIteratorType GetFirstElemnentBeforeOrOn(KeyT address) const { + ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const { auto it = container.lower_bound(address); if (it == container.begin()) { return it; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index f0bd84ab2..c7d7d5fef 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal case IR::Attribute::VertexId: ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name); break; + case IR::Attribute::BaseInstance: + ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name); + break; + case IR::Attribute::BaseVertex: + ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name); + break; + case IR::Attribute::DrawID: + ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name); + break; case IR::Attribute::FrontFace: ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); break; @@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S case IR::Attribute::VertexId: ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); break; + case IR::Attribute::BaseInstance: + ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name); + break; + case IR::Attribute::BaseVertex: + ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name); + break; + case IR::Attribute::DrawID: + ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name); + break; default: throw NotImplementedException("Get U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 25106da67..2e369ed72 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -240,6 +240,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::BaseVertex: ctx.AddF32("{}=itof(gl_BaseVertex);", inst); break; + case IR::Attribute::DrawID: + ctx.AddF32("{}=itof(gl_DrawID);", inst); + break; default: throw NotImplementedException("Get attribute {}", attr); } @@ -262,6 +265,9 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s case IR::Attribute::BaseVertex: ctx.AddU32("{}=uint(gl_BaseVertex);", inst); break; + case IR::Attribute::DrawID: + ctx.AddU32("{}=uint(gl_DrawID);", inst); + break; default: throw NotImplementedException("Get U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e4802bf9e..db9c94ce8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -343,6 +343,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); case IR::Attribute::BaseVertex: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); + case IR::Attribute::DrawID: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index)); case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits::max())), @@ -388,6 +390,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { return ctx.OpLoad(ctx.U32[1], ctx.base_instance); case IR::Attribute::BaseVertex: return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); + case IR::Attribute::DrawID: + return ctx.OpLoad(ctx.U32[1], ctx.draw_index); default: throw NotImplementedException("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 563a5fc49..ecb2db494 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1402,6 +1402,9 @@ void EmitContext::DefineInputs(const IR::Program& program) { } else if (loads[IR::Attribute::BaseVertex]) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } + if (loads[IR::Attribute::DrawID]) { + draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex); + } if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index dde45b4bc..4414a5169 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -218,6 +218,7 @@ public: Id base_instance{}; Id vertex_id{}; Id vertex_index{}; + Id draw_index{}; Id base_vertex{}; Id front_face{}; Id point_coord{}; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 73e189a89..1bf9db935 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -450,6 +450,8 @@ std::string NameOf(Attribute attribute) { return "BaseInstance"; case Attribute::BaseVertex: return "BaseVertex"; + case Attribute::DrawID: + return "DrawID"; } return fmt::format("", static_cast(attribute)); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 364d8a912..5f039b6f6 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -223,6 +223,7 @@ enum class Attribute : u64 { // Implementation attributes BaseInstance = 256, BaseVertex = 257, + DrawID = 258, }; constexpr size_t NUM_GENERICS = 32; diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 5275b2c8b..4d81e9336 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -518,6 +518,7 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { case IR::Attribute::VertexId: case IR::Attribute::BaseVertex: case IR::Attribute::BaseInstance: + case IR::Attribute::DrawID: break; default: return; @@ -665,6 +666,8 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { return IR::Attribute::BaseInstance; case ReplaceConstant::BaseVertex: return IR::Attribute::BaseVertex; + case ReplaceConstant::DrawID: + return IR::Attribute::DrawID; default: throw NotImplementedException("Not implemented replacement variable {}", *replacement); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ea0f48344..44236b6b1 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -19,6 +19,7 @@ namespace Shader { enum class ReplaceConstant : u32 { BaseInstance, BaseVertex, + DrawID, }; enum class TextureType : u32 { diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index feea89c0e..2437121ce 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { draw_state.topology = topology; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, @@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs draw_state.index_buffer.first = index_first; draw_state.index_buffer.count = index_count; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::SetInlineIndexBuffer(u32 index) { @@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { } } -void DrawManager::ProcessDrawIndirect(bool draw_indexed) { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, - draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); +void DrawManager::ProcessDrawIndirect() { + LOG_TRACE( + HW_GPU, + "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}", + draw_state.topology, indirect_state.is_indexed, indirect_state.include_count, + indirect_state.buffer_size, indirect_state.max_draw_counts); UpdateTopology(); diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 49a4fca48..58d1b2d59 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -85,7 +85,7 @@ private: void ProcessDraw(bool draw_indexed, u32 instance_count); - void ProcessDrawIndirect(bool draw_indexed); + void ProcessDrawIndirect(); Maxwell3D* maxwell3d{}; State draw_state{}; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 943a69935..fbfd1ddd2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -220,9 +220,6 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } void Maxwell3D::RefreshParametersImpl() { - if (!Settings::IsGPULevelHigh()) { - return; - } size_t current_index = 0; for (auto& segment : macro_segments) { if (segment.first == 0) { @@ -448,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: ProcessCBMultiData(base_start, amount); break; - case MAXWELL3D_REG_INDEX(inline_data): + case MAXWELL3D_REG_INDEX(inline_data): { + ASSERT(methods_pending == amount); upload_state.ProcessData(base_start, amount); return; + } default: for (u32 i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - i <= 1); @@ -537,7 +536,7 @@ void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryCondition() { const GPUVAddr condition_address{regs.render_enable.Address()}; switch (regs.render_enable_override) { - case Regs::RenderEnable::Override::AlwaysRender: { + case Regs::RenderEnable::Override::AlwaysRender: execute_on = true; break; case Regs::RenderEnable::Override::NeverRender: @@ -586,7 +585,6 @@ void Maxwell3D::ProcessQueryCondition() { break; } } - } } void Maxwell3D::ProcessCounterReset() { @@ -685,7 +683,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { +void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset, + HLEReplacementAttributeType name) { const u64 key = (static_cast(bank) << 32) | offset; replace_table.emplace(key, name); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a2dff0350..0b2fd2928 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1218,12 +1218,12 @@ public: struct Window { union { - u32 raw_1; + u32 raw_x; BitField<0, 16, u32> x_min; BitField<16, 16, u32> x_max; }; union { - u32 raw_2; + u32 raw_y; BitField<0, 16, u32> y_min; BitField<16, 16, u32> y_max; }; @@ -3031,14 +3031,15 @@ public: EngineHint engine_state{EngineHint::None}; - enum class HLEReplaceName : u32 { + enum class HLEReplacementAttributeType : u32 { BaseVertex = 0x0, BaseInstance = 0x1, + DrawID = 0x2, }; - void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name); - std::unordered_map replace_table; + std::unordered_map replace_table; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v, "Maxwell3D Regs must be trivially copyable"); @@ -3087,9 +3088,7 @@ public: std::unique_ptr draw_manager; friend class DrawManager; - std::vector inline_index_draw_indexes; - - GPUVAddr getMacroAddress(size_t index) const { + GPUVAddr GetMacroAddress(size_t index) const { return macro_addresses[index]; } @@ -3100,7 +3099,7 @@ public: RefreshParametersImpl(); } - bool AnyParametersDirty() { + bool AnyParametersDirty() const { return current_macro_dirty; } @@ -3114,6 +3113,10 @@ public: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); + /// Handles a write to the CB_DATA[i] register. + void ProcessCBData(u32 value); + void ProcessCBMultiData(const u32* start_base, u32 amount); + private: void InitializeRegisterDefaults(); @@ -3165,10 +3168,6 @@ private: /// Handles writes to syncing register. void ProcessSyncPoint(); - /// Handles a write to the CB_DATA[i] register. - void ProcessCBData(u32 value); - void ProcessCBMultiData(const u32* start_base, u32 amount); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); @@ -3196,11 +3195,6 @@ private: bool execute_on{true}; - std::array draw_command{}; - std::vector deferred_draw_method; - enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; - DrawMode draw_mode{DrawMode::General}; - bool draw_indexed{}; std::vector> macro_segments; std::vector macro_addresses; bool current_macro_dirty{}; diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 49c47dafe..a96e8648c 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -23,7 +23,7 @@ #include "video_core/macro/macro_jit_x64.h" #endif -MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192)); +MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro HLE", MP_RGB(128, 192, 192)); namespace Tegra { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index c08b4abb3..a5476e795 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include #include @@ -15,28 +15,28 @@ namespace Tegra { -using Maxwell = Engines::Maxwell3D; +using Maxwell3D = Engines::Maxwell3D; namespace { -bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { +bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { switch (topology) { - case Maxwell::Regs::PrimitiveTopology::Points: - case Maxwell::Regs::PrimitiveTopology::Lines: - case Maxwell::Regs::PrimitiveTopology::LineLoop: - case Maxwell::Regs::PrimitiveTopology::LineStrip: - case Maxwell::Regs::PrimitiveTopology::Triangles: - case Maxwell::Regs::PrimitiveTopology::TriangleStrip: - case Maxwell::Regs::PrimitiveTopology::TriangleFan: - case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: - case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: - case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: - case Maxwell::Regs::PrimitiveTopology::Patches: + case Maxwell3D::Regs::PrimitiveTopology::Points: + case Maxwell3D::Regs::PrimitiveTopology::Lines: + case Maxwell3D::Regs::PrimitiveTopology::LineLoop: + case Maxwell3D::Regs::PrimitiveTopology::LineStrip: + case Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::Patches: return true; - case Maxwell::Regs::PrimitiveTopology::Quads: - case Maxwell::Regs::PrimitiveTopology::QuadStrip: - case Maxwell::Regs::PrimitiveTopology::Polygon: + case Maxwell3D::Regs::PrimitiveTopology::Quads: + case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + case Maxwell3D::Regs::PrimitiveTopology::Polygon: default: return false; } @@ -44,34 +44,55 @@ bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { class HLEMacroImpl : public CachedMacro { public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} + explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} protected: - Engines::Maxwell3D& maxwell3d; + Maxwell3D& maxwell3d; }; -class HLE_771BB18C62444DA0 final : public HLEMacroImpl { +class HLE_DrawArrays final : public HLEMacroImpl { public: - explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); - const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + auto topology = static_cast(parameters[0]); + maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2], + maxwell3d.regs.global_base_instance_index, 1); + } +}; + +class HLE_DrawIndexed final : public HLEMacroImpl { +public: + explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + maxwell3d.regs.index_buffer.start_addr_high = parameters[1]; + maxwell3d.regs.index_buffer.start_addr_low = parameters[2]; + maxwell3d.regs.index_buffer.format = + static_cast(parameters[3]); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0] & - 0x3ffffff), - parameters[4], parameters[1], parameters[3], parameters[5], instance_count); + + auto topology = static_cast(parameters[0]); + maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4], + maxwell3d.regs.global_base_vertex_index, + maxwell3d.regs.global_base_instance_index, 1); } }; +/* + * @note: these macros have two versions, a normal and extended version, with the extended version + * also assigning the base vertex/instance. + */ +template class HLE_DrawArraysIndirect final : public HLEMacroImpl { public: - explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false) - : HLEMacroImpl(maxwell3d_), extended(extended_) {} + explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); + auto topology = static_cast(parameters[0]); if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { Fallback(parameters); return; @@ -81,20 +102,21 @@ public: params.is_indexed = false; params.include_count = false; params.count_start_address = 0; - params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.indirect_start_address = maxwell3d.GetMacroAddress(1); params.buffer_size = 4 * sizeof(u32); params.max_draw_counts = 1; params.stride = 0; - if (extended) { - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); } maxwell3d.draw_manager->DrawArrayIndirect(topology); - if (extended) { - maxwell3d.engine_state = Maxwell::EngineHint::None; + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); } } @@ -103,14 +125,14 @@ private: void Fallback(const std::vector& parameters) { SCOPE_EXIT({ if (extended) { - maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); } }); maxwell3d.RefreshParameters(); const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - auto topology = static_cast(parameters[0]); + auto topology = static_cast(parameters[0]); const u32 vertex_first = parameters[3]; const u32 vertex_count = parameters[1]; @@ -122,31 +144,35 @@ private: } const u32 base_instance = parameters[4]; - if (extended) { + if constexpr (extended) { maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); } maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, instance_count); - if (extended) { + if constexpr (extended) { maxwell3d.regs.global_base_instance_index = 0; - maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); } } - - bool extended; }; +/* + * @note: these macros have two versions, a normal and extended version, with the extended version + * also assigning the base vertex/instance. + */ +template class HLE_DrawIndexedIndirect final : public HLEMacroImpl { public: - explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); + auto topology = static_cast(parameters[0]); if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { Fallback(parameters); return; @@ -159,24 +185,30 @@ public: maxwell3d.regs.global_base_vertex_index = element_base; maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; params.include_count = false; params.count_start_address = 0; - params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.indirect_start_address = maxwell3d.GetMacroAddress(1); params.buffer_size = 5 * sizeof(u32); params.max_draw_counts = 1; params.stride = 0; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); - maxwell3d.engine_state = Maxwell::EngineHint::None; - maxwell3d.replace_table.clear(); maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } } private: @@ -189,31 +221,37 @@ private: maxwell3d.regs.global_base_vertex_index = element_base; maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0]), - parameters[3], parameters[1], element_base, base_instance, instance_count); + static_cast(parameters[0]), parameters[3], + parameters[1], element_base, base_instance, instance_count); maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.engine_state = Maxwell::EngineHint::None; - maxwell3d.replace_table.clear(); + if constexpr (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } } }; class HLE_MultiLayerClear final : public HLEMacroImpl { public: - explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); ASSERT(parameters.size() == 1); - const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; + const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; const u32 rt_index = clear_params.RT; const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; ASSERT(clear_params.layer == 0); @@ -225,11 +263,10 @@ public: class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { public: - explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_) - : HLEMacroImpl(maxwell3d_) {} + explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const auto topology = static_cast(parameters[2]); + const auto topology = static_cast(parameters[2]); if (!IsTopologySafe(topology)) { Fallback(parameters); return; @@ -253,27 +290,30 @@ public: auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; params.include_count = true; - params.count_start_address = maxwell3d.getMacroAddress(4); - params.indirect_start_address = maxwell3d.getMacroAddress(5); + params.count_start_address = maxwell3d.GetMacroAddress(4); + params.indirect_start_address = maxwell3d.GetMacroAddress(5); params.buffer_size = stride * draw_count; params.max_draw_counts = draw_count; params.stride = stride; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x648, + Maxwell3D::HLEReplacementAttributeType::DrawID); maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); - maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); } private: void Fallback(const std::vector& parameters) { SCOPE_EXIT({ - // Clean everything. // Clean everything. maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); }); maxwell3d.RefreshParameters(); @@ -283,7 +323,7 @@ private: // Nothing to do. return; } - const auto topology = static_cast(parameters[2]); + const auto topology = static_cast(parameters[2]); const u32 padding = parameters[3]; const std::size_t max_draws = parameters[4]; @@ -297,9 +337,13 @@ private: const u32 base_vertex = parameters[base + 3]; const u32 base_instance = parameters[base + 4]; maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.CallMethod(0x8e3, 0x648, true); + maxwell3d.CallMethod(0x8e4, static_cast(index), true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); @@ -309,7 +353,7 @@ private: class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { public: - explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); @@ -325,7 +369,7 @@ public: class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { public: - explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); @@ -341,7 +385,7 @@ public: class HLE_BindShader final : public HLEMacroImpl { public: - explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); @@ -371,7 +415,7 @@ public: class HLE_SetRasterBoundingBox final : public HLEMacroImpl { public: - explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { maxwell3d.RefreshParameters(); @@ -384,60 +428,156 @@ public: } }; +template +class HLE_ClearConstBuffer final : public HLEMacroImpl { +public: + explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + static constexpr std::array zeroes{}; + auto& regs = maxwell3d.regs; + regs.const_buffer.size = static_cast(base_size); + regs.const_buffer.address_high = parameters[0]; + regs.const_buffer.address_low = parameters[1]; + regs.const_buffer.offset = 0; + maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); + } +}; + +class HLE_ClearMemory final : public HLEMacroImpl { +public: + explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + + const u32 needed_memory = parameters[2] / sizeof(u32); + if (needed_memory > zero_memory.size()) { + zero_memory.resize(needed_memory, 0); + } + auto& regs = maxwell3d.regs; + regs.upload.line_length_in = parameters[2]; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMultiMethod(static_cast(MAXWELL3D_REG_INDEX(inline_data)), + zero_memory.data(), needed_memory, needed_memory); + } + +private: + std::vector zero_memory; +}; + +class HLE_TransformFeedbackSetup final : public HLEMacroImpl { +public: + explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + + auto& regs = maxwell3d.regs; + regs.transform_feedback_enabled = 1; + regs.transform_feedback.buffers[0].start_offset = 0; + regs.transform_feedback.buffers[1].start_offset = 0; + regs.transform_feedback.buffers[2].start_offset = 0; + regs.transform_feedback.buffers[3].start_offset = 0; + + regs.upload.line_length_in = 4; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(inline_data)), + regs.transform_feedback.controls[0].stride, true); + } +}; + } // Anonymous namespace -HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { - builders.emplace(0x771BB18C62444DA0ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); +HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { + builders.emplace(0xDD6A7FA92A7D2674ULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); })); builders.emplace(0x0D61FC9FAAC9FCADULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); })); builders.emplace(0x8A4D173EB99A8603ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__, true); + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); + })); + builders.emplace(0x2DB33AADB741839CULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); + })); + builders.emplace(0x771BB18C62444DA0ULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); })); builders.emplace(0x0217920100488FF7ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); })); builders.emplace(0x3F5E74B9C9A50164ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique( maxwell3d__); })); builders.emplace(0xEAD26C3E2109B06BULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique(maxwell3d__); })); builders.emplace(0xC713C83D8F63CCF3ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique(maxwell3d__); })); builders.emplace(0xD7333D26E0A93EDEULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique(maxwell3d__); })); builders.emplace(0xEB29B2A09AA06D38ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique(maxwell3d__); })); builders.emplace(0xDB1341DBEB4C8AF7ULL, - std::function(Engines::Maxwell3D&)>( - [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr { + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { return std::make_unique(maxwell3d__); })); + builders.emplace(0x6C97861D891EDf7EULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); + })); + builders.emplace(0xD246FDDF3A6173D7ULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique>(maxwell3d__); + })); + builders.emplace(0xEE4D0004BEC8ECF4ULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); + })); + builders.emplace(0xFC0CF27F5FFAA661ULL, + std::function(Maxwell3D&)>( + [](Maxwell3D& maxwell3d__) -> std::unique_ptr { + return std::make_unique(maxwell3d__); + })); } HLEMacro::~HLEMacro() = default; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ed7558073..7d48af8e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -357,21 +357,21 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType if (addr == 0 || size == 0) { return; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::BufferCache))) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::QueryCache))) { + if ((True(which & VideoCommon::CacheType::QueryCache))) { query_cache.FlushRegion(addr, size); } } bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { - if ((bool(which & VideoCommon::CacheType::BufferCache))) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; if (buffer_cache.IsRegionGpuModified(addr, size)) { return true; @@ -380,7 +380,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT if (!Settings::IsGPULevelHigh()) { return false; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; return texture_cache.IsRegionGpuModified(addr, size); } @@ -392,18 +392,18 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache if (addr == 0 || size == 0) { return; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - if (bool(which & VideoCommon::CacheType::BufferCache)) { + if (True(which & VideoCommon::CacheType::BufferCache)) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - if (bool(which & VideoCommon::CacheType::ShaderCache)) { + if (True(which & VideoCommon::CacheType::ShaderCache)) { shader_cache.InvalidateRegion(addr, size); } - if (bool(which & VideoCommon::CacheType::QueryCache)) { + if (True(which & VideoCommon::CacheType::QueryCache)) { query_cache.InvalidateRegion(addr, size); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fc746fe2c..242bf9602 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -431,21 +431,21 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType if (addr == 0 || size == 0) { return; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::BufferCache))) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::QueryCache))) { + if ((True(which & VideoCommon::CacheType::QueryCache))) { query_cache.FlushRegion(addr, size); } } bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { - if ((bool(which & VideoCommon::CacheType::BufferCache))) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; if (buffer_cache.IsRegionGpuModified(addr, size)) { return true; @@ -454,7 +454,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT if (!Settings::IsGPULevelHigh()) { return false; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; return texture_cache.IsRegionGpuModified(addr, size); } @@ -465,18 +465,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache if (addr == 0 || size == 0) { return; } - if (bool(which & VideoCommon::CacheType::TextureCache)) { + if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::BufferCache))) { + if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - if ((bool(which & VideoCommon::CacheType::QueryCache))) { + if ((True(which & VideoCommon::CacheType::QueryCache))) { query_cache.InvalidateRegion(addr, size); } - if ((bool(which & VideoCommon::CacheType::ShaderCache))) { + if ((True(which & VideoCommon::CacheType::ShaderCache))) { pipeline_cache.InvalidateRegion(addr, size); } } @@ -1050,7 +1050,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; - constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines LINE, // LineLoop @@ -1159,13 +1159,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { } void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!regs.logic_op.enable) { - return; - } if (!state_tracker.TouchLogicOp()) { return; } - auto op = static_cast(static_cast(regs.logic_op.op) - 0x1500); + const auto op_value = static_cast(regs.logic_op.op); + auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast(op_value - 0x1500) + : VK_LOGIC_OP_NO_OP; scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e5cf97472..d56558a83 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -251,4 +251,4 @@ void StateTracker::InvalidateState() { StateTracker::StateTracker() : flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {} -} // namespace Vulkan \ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c34728245..574760f80 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -351,12 +351,14 @@ std::optional GraphicsEnvironment::GetReplaceConstBuffe if (it == maxwell3d->replace_table.end()) { return std::nullopt; } - const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { + const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) { switch (name) { - case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: + case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex: return Shader::ReplaceConstant::BaseVertex; - case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: + case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance: return Shader::ReplaceConstant::BaseInstance; + case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::DrawID: + return Shader::ReplaceConstant::DrawID; default: UNREACHABLE(); } -- cgit v1.2.3 From 3ecc03ec1b5f8c48aee4f2e1f1428908647a1cfc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 4 Jan 2023 14:56:52 -0500 Subject: yuzu-ui: Add setting for disabling macro HLE --- src/common/settings.h | 1 + src/video_core/macro/macro.cpp | 9 +++++---- src/yuzu/configuration/config.cpp | 2 ++ src/yuzu/configuration/configure_debug.cpp | 3 +++ src/yuzu/configuration/configure_debug.ui | 15 ++++++++++++++- src/yuzu_cmd/config.cpp | 1 + 6 files changed, 26 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/settings.h b/src/common/settings.h index 6b199af93..5017951c5 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -531,6 +531,7 @@ struct Values { Setting reporting_services{false, "reporting_services"}; Setting quest_flag{false, "quest_flag"}; Setting disable_macro_jit{false, "disable_macro_jit"}; + Setting disable_macro_hle{false, "disable_macro_hle"}; Setting extended_logging{false, "extended_logging"}; Setting use_debug_asserts{false, "use_debug_asserts"}; Setting use_auto_stub{false, "use_auto_stub"}; diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index a96e8648c..82ad0477d 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -107,14 +107,15 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { } } - if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (!hle_program || Settings::values.disable_macro_hle) { + maxwell3d.RefreshParameters(); + cache_info.lle_program->Execute(parameters, method); + } else { cache_info.has_hle_program = true; cache_info.hle_program = std::move(hle_program); MICROPROFILE_SCOPE(MacroHLE); cache_info.hle_program->Execute(parameters, method); - } else { - maxwell3d.RefreshParameters(); - cache_info.lle_program->Execute(parameters, method); } } } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 3e51426c8..e9425b5bd 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -562,6 +562,7 @@ void Config::ReadDebuggingValues() { ReadBasicSetting(Settings::values.reporting_services); ReadBasicSetting(Settings::values.quest_flag); ReadBasicSetting(Settings::values.disable_macro_jit); + ReadBasicSetting(Settings::values.disable_macro_hle); ReadBasicSetting(Settings::values.extended_logging); ReadBasicSetting(Settings::values.use_debug_asserts); ReadBasicSetting(Settings::values.use_auto_stub); @@ -1198,6 +1199,7 @@ void Config::SaveDebuggingValues() { WriteBasicSetting(Settings::values.quest_flag); WriteBasicSetting(Settings::values.use_debug_asserts); WriteBasicSetting(Settings::values.disable_macro_jit); + WriteBasicSetting(Settings::values.disable_macro_hle); WriteBasicSetting(Settings::values.enable_all_controllers); WriteBasicSetting(Settings::values.create_crash_dumps); WriteBasicSetting(Settings::values.perform_vulkan_check); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index dacc75a20..cbeb8f168 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -73,6 +73,8 @@ void ConfigureDebug::SetConfiguration() { ui->dump_macros->setChecked(Settings::values.dump_macros.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_macro_hle->setEnabled(runtime_lock); + ui->disable_macro_hle->setChecked(Settings::values.disable_macro_hle.GetValue()); ui->disable_loop_safety_checks->setEnabled(runtime_lock); ui->disable_loop_safety_checks->setChecked( Settings::values.disable_shader_loop_safety_checks.GetValue()); @@ -117,6 +119,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.disable_shader_loop_safety_checks = ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); + Settings::values.disable_macro_hle = ui->disable_macro_hle->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Settings::values.perform_vulkan_check = ui->perform_vulkan_check->isChecked(); UISettings::values.disable_web_applet = ui->disable_web_applet->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 102c8c66c..15acefe33 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -176,7 +176,7 @@ - + true @@ -202,6 +202,19 @@ + + + + true + + + When checked, it disables the macro HLE functions. Enabling this makes games run slower + + + Disable Macro HLE + + + diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index de9b220da..1e45e57bc 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -348,6 +348,7 @@ void Config::ReadValues() { ReadSetting("Debugging", Settings::values.use_debug_asserts); ReadSetting("Debugging", Settings::values.use_auto_stub); ReadSetting("Debugging", Settings::values.disable_macro_jit); + ReadSetting("Debugging", Settings::values.disable_macro_hle); ReadSetting("Debugging", Settings::values.use_gdbstub); ReadSetting("Debugging", Settings::values.gdbstub_port); -- cgit v1.2.3