aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.h4
-rw-r--r--src/video_core/dma_pusher.cpp1
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h8
-rw-r--r--src/video_core/engines/kepler_compute.h10
-rw-r--r--src/video_core/engines/kepler_memory.cpp6
-rw-r--r--src/video_core/engines/kepler_memory.h7
-rw-r--r--src/video_core/engines/maxwell_3d.cpp27
-rw-r--r--src/video_core/engines/maxwell_3d.h24
-rw-r--r--src/video_core/engines/maxwell_dma.cpp14
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/engines/shader_bytecode.h5
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h12
-rw-r--r--src/video_core/gpu_asynch.cpp2
-rw-r--r--src/video_core/gpu_thread.cpp43
-rw-r--r--src/video_core/gpu_thread.h76
-rw-r--r--src/video_core/macro_interpreter.cpp20
-rw-r--r--src/video_core/memory_manager.cpp560
-rw-r--r--src/video_core/memory_manager.h180
-rw-r--r--src/video_core/rasterizer_cache.h20
-rw-r--r--src/video_core/rasterizer_interface.h1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp67
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h30
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp610
-rw-r--r--src/video_core/renderer_opengl/gl_state.h52
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/renderer_opengl/utils.cpp28
-rw-r--r--src/video_core/renderer_opengl/utils.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp210
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h92
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/decode/xmad.cpp39
-rw-r--r--src/video_core/shader/shader_ir.h12
-rw-r--r--src/video_core/shader/track.cpp17
-rw-r--r--src/video_core/textures/convert.cpp1
-rw-r--r--src/video_core/textures/convert.h5
-rw-r--r--src/video_core/textures/texture.h2
62 files changed, 1827 insertions, 932 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 63577a9c5..5c8ca429e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -130,7 +130,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
- renderer_vulkan/vk_stream_buffer.h)
+ renderer_vulkan/vk_stream_buffer.h
+ renderer_vulkan/vk_swapchain.cpp
+ renderer_vulkan/vk_swapchain.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -139,4 +141,4 @@ endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad lz4_static)
+target_link_libraries(video_core PRIVATE glad)
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
void DebugContext::DoOnEvent(Event event, void* data) {
{
- std::unique_lock<std::mutex> lock(breakpoint_mutex);
+ std::unique_lock lock{breakpoint_mutex};
// TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
// show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
void DebugContext::Resume() {
{
- std::lock_guard<std::mutex> lock(breakpoint_mutex);
+ std::lock_guard lock{breakpoint_mutex};
// Tell all observers that we are about to resume
for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
/// Constructs the object such that it observes events of the given DebugContext.
explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
: context_weak(debug_context) {
- std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+ std::unique_lock lock{debug_context->breakpoint_mutex};
debug_context->breakpoint_observers.push_back(this);
}
@@ -48,7 +48,7 @@ public:
auto context = context_weak.lock();
if (context) {
{
- std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+ std::unique_lock lock{context->breakpoint_mutex};
context->breakpoint_observers.remove(this);
}
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8b1bea1ae..046d047cb 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -8,6 +8,7 @@
#include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
namespace Tegra {
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 27a36348c..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
#include "common/bit_field.h"
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace Tegra {
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 03b7ee5d8..55966eef1 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,12 +6,13 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : memory_manager(memory_manager), rasterizer{rasterizer} {}
+ : rasterizer{rasterizer}, memory_manager{memory_manager} {}
void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 80523e320..2e51b7f13 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,7 +10,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace VideoCore {
class RasterizerInterface;
@@ -115,10 +118,9 @@ public:
};
} regs{};
- MemoryManager& memory_manager;
-
private:
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 6575afd0f..fb6cdf432 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -9,7 +9,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace Tegra::Engines {
@@ -40,10 +43,11 @@ public:
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
- MemoryManager& memory_manager;
-
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
+
+private:
+ MemoryManager& memory_manager;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0931b9626..cd51a31d7 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -15,7 +15,7 @@ namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::~KeplerMemory() = default;
@@ -46,7 +46,7 @@ void KeplerMemory::ProcessData(u32 data) {
// contain a dirty surface that will have to be written back to memory.
const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
- memory_manager.Write32(address, data);
+ memory_manager.Write<u32>(address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 9181e9d80..78b6c3e45 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -82,8 +85,8 @@ public:
private:
Core::System& system;
- MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
void ProcessData(u32 data);
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c5d5be4ef..74403eed4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
- macro_interpreter(*this) {
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
+ *this} {
InitializeRegisterDefaults();
}
@@ -250,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryGet();
break;
}
+ case MAXWELL3D_REG_INDEX(sync_info): {
+ ProcessSyncPoint();
+ break;
+ }
default:
break;
}
@@ -307,7 +310,7 @@ void Maxwell3D::ProcessQueryGet() {
// Write the current query sequence to the sequence address.
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short
// query.
- memory_manager.Write32(sequence_address, sequence);
+ memory_manager.Write<u32>(sequence_address, sequence);
} else {
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
// GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -327,6 +330,14 @@ void Maxwell3D::ProcessQueryGet() {
}
}
+void Maxwell3D::ProcessSyncPoint() {
+ const u32 sync_point = regs.sync_info.sync_point.Value();
+ const u32 increment = regs.sync_info.increment.Value();
+ const u32 cache_flush = regs.sync_info.unknown.Value();
+ LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
+ cache_flush);
+}
+
void Maxwell3D::DrawArrays() {
LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
@@ -395,7 +406,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
u8* ptr{memory_manager.GetPointer(address)};
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
- memory_manager.Write32(address, value);
+ memory_manager.Write<u32>(address, value);
dirty_flags.OnMemoryWrite();
@@ -447,7 +458,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
- const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -482,7 +493,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
- const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7fbf1026e..321af3297 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -16,13 +16,16 @@
#include "common/math_util.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
-#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -576,7 +579,17 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x188);
+ INSERT_PADDING_WORDS(0x69);
+
+ struct {
+ union {
+ BitField<0, 16, u32> sync_point;
+ BitField<16, 1, u32> unknown;
+ BitField<20, 1, u32> increment;
+ };
+ } sync_info;
+
+ INSERT_PADDING_WORDS(0x11E);
u32 tfb_enabled;
@@ -1093,7 +1106,6 @@ public:
};
State state{};
- MemoryManager& memory_manager;
struct DirtyFlags {
std::bitset<8> color_buffer{0xFF};
@@ -1141,6 +1153,8 @@ private:
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
+
/// Start offsets of each macro in macro_memory
std::unordered_map<u32, u32> macro_offsets;
@@ -1180,6 +1194,9 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
+ /// Handles writes to syncing register.
+ void ProcessSyncPoint();
+
/// Handles a write to the CB_DATA[i] register.
void ProcessCBData(u32 value);
@@ -1195,6 +1212,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a0ded4c25..2426d0067 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
@@ -16,7 +16,7 @@ namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -88,6 +88,16 @@ void MaxwellDMA::HandleCopy() {
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
+ if (!source_ptr) {
+ LOG_ERROR(HW_GPU, "source_ptr is invalid");
+ return;
+ }
+
+ if (!dst_ptr) {
+ LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+ return;
+ }
+
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 34c369320..c6b649842 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -139,13 +142,13 @@ public:
};
} regs{};
- MemoryManager& memory_manager;
-
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
+
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7f613370b..2e1e96c81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1238,13 +1238,16 @@ union Instruction {
union {
BitField<20, 16, u64> imm20_16;
+ BitField<35, 1, u64> high_b_rr; // used on RR
BitField<36, 1, u64> product_shift_left;
BitField<37, 1, u64> merge_37;
BitField<48, 1, u64> sign_a;
BitField<49, 1, u64> sign_b;
+ BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
BitField<50, 3, XmadMode> mode;
BitField<52, 1, u64> high_b;
BitField<53, 1, u64> high_a;
+ BitField<55, 1, u64> product_shift_left_second; // used on CR
BitField<56, 1, u64> merge_56;
} xmad;
@@ -1662,7 +1665,7 @@ private:
INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
- INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+ INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 66c690494..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
namespace Tegra {
@@ -30,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>();
+ memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
@@ -285,9 +286,10 @@ void GPU::ProcessSemaphoreTriggerMethod() {
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
- memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
+ memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
+ sizeof(block));
} else {
- const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
+ const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -314,11 +316,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
}
void GPU::ProcessSemaphoreRelease() {
- memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
+ memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
- const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
+ const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a14b95c30..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"
using CacheAddr = std::uintptr_t;
inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -124,6 +123,8 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5,
};
+class MemoryManager;
+
class GPU {
public:
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -176,11 +177,11 @@ public:
u32 address_high;
u32 address_low;
- GPUVAddr SmaphoreAddress() const {
+ GPUVAddr SemaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
- } smaphore_address;
+ } semaphore_address;
u32 semaphore_sequence;
u32 semaphore_trigger;
@@ -244,9 +245,8 @@ protected:
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
- /// Mapping of command subchannels to their bound engine ids.
+ /// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
-
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
@@ -263,7 +263,7 @@ private:
static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(smaphore_address, 0x4);
+ASSERT_REG_POSITION(semaphore_address, 0x4);
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
ASSERT_REG_POSITION(reference_count, 0x14);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 8b355cf7b..db507cf04 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -9,7 +9,7 @@
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
- : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+ : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
GPUAsynch::~GPUAsynch() = default;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 086b2f625..cc56cf467 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,9 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
- state.DecrementFramesCounter();
renderer.SwapBuffers(std::move(data->framebuffer));
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
@@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
} else {
UNREACHABLE();
}
+ state.signaled_fence = next.fence;
+ state.TrySynchronize();
}
}
}
-ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
- : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
- std::ref(dma_pusher), std::ref(state)} {}
+ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher)
+ : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
+ synchronization_event = system.CoreTiming().RegisterEvent(
+ "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
ThreadManager::~ThreadManager() {
// Notify GPU thread that a shutdown is pending
@@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() {
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- PushCommand(SubmitListCommand(std::move(entries)));
+ const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
+ const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
+ system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- state.IncrementFramesCounter();
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
- state.WaitForFrames();
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
@@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
if (state.queue.Empty()) {
// It's quicker to invalidate a single region on the CPU if the queue is already empty
- renderer.Rasterizer().InvalidateRegion(addr, size);
+ system.Renderer().Rasterizer().InvalidateRegion(addr, size);
} else {
PushCommand(InvalidateRegionCommand(addr, size));
}
@@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
-void ThreadManager::PushCommand(CommandData&& command_data) {
- state.queue.Push(CommandDataContainer(std::move(command_data)));
+u64 ThreadManager::PushCommand(CommandData&& command_data) {
+ const u64 fence{++state.last_fence};
+ state.queue.Push(CommandDataContainer(std::move(command_data), fence));
state.SignalCommands();
+ return fence;
+}
+
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+void SynchState::WaitForSynchronization(u64 fence) {
+ if (signaled_fence >= fence) {
+ return;
+ }
+
+ // Wait for the GPU to be idle (all commands to be executed)
+ {
+ MICROPROFILE_SCOPE(GPU_wait);
+ std::unique_lock<std::mutex> lock{synchronization_mutex};
+ synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
+ }
}
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 8cd7db1c6..62bcea5bb 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -4,10 +4,8 @@
#pragma once
-#include <array>
#include <atomic>
#include <condition_variable>
-#include <memory>
#include <mutex>
#include <optional>
#include <thread>
@@ -21,9 +19,12 @@ struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
+namespace Core {
+class System;
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
namespace VideoCommon::GPUThread {
@@ -77,81 +78,68 @@ using CommandData =
struct CommandDataContainer {
CommandDataContainer() = default;
- CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
+ CommandDataContainer(CommandData&& data, u64 next_fence)
+ : data{std::move(data)}, fence{next_fence} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
+ fence = t.fence;
return *this;
}
CommandData data;
+ u64 fence{};
};
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic_bool is_running{true};
std::atomic_int queued_frame_count{};
- std::mutex frames_mutex;
+ std::mutex synchronization_mutex;
std::mutex commands_mutex;
std::condition_variable commands_condition;
- std::condition_variable frames_condition;
+ std::condition_variable synchronization_condition;
- void IncrementFramesCounter() {
- std::lock_guard<std::mutex> lock{frames_mutex};
- ++queued_frame_count;
+ /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
+ /// synchronized. This is entirely empirical.
+ bool IsSynchronized() const {
+ constexpr std::size_t max_queue_gap{5};
+ return queue.Size() <= max_queue_gap;
}
- void DecrementFramesCounter() {
- {
- std::lock_guard<std::mutex> lock{frames_mutex};
- --queued_frame_count;
-
- if (queued_frame_count) {
- return;
- }
+ void TrySynchronize() {
+ if (IsSynchronized()) {
+ std::lock_guard<std::mutex> lock{synchronization_mutex};
+ synchronization_condition.notify_one();
}
- frames_condition.notify_one();
}
- void WaitForFrames() {
- {
- std::lock_guard<std::mutex> lock{frames_mutex};
- if (!queued_frame_count) {
- return;
- }
- }
-
- // Wait for the GPU to be idle (all commands to be executed)
- {
- std::unique_lock<std::mutex> lock{frames_mutex};
- frames_condition.wait(lock, [this] { return !queued_frame_count; });
- }
- }
+ void WaitForSynchronization(u64 fence);
void SignalCommands() {
- {
- std::unique_lock<std::mutex> lock{commands_mutex};
- if (queue.Empty()) {
- return;
- }
+ if (queue.Empty()) {
+ return;
}
commands_condition.notify_one();
}
void WaitForCommands() {
- std::unique_lock<std::mutex> lock{commands_mutex};
+ std::unique_lock lock{commands_mutex};
commands_condition.wait(lock, [this] { return !queue.Empty(); });
}
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
+ u64 last_fence{};
+ std::atomic<u64> signaled_fence{};
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
- explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+ explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
@@ -172,12 +160,12 @@ public:
private:
/// Pushes a command to be executed by the GPU thread
- void PushCommand(CommandData&& command_data);
+ u64 PushCommand(CommandData&& command_data);
private:
SynchState state;
- VideoCore::RendererBase& renderer;
- Tegra::DmaPusher& dma_pusher;
+ Core::System& system;
+ Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 64f75db43..524d9ea5a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
}
u32 MacroInterpreter::FetchParameter() {
- ASSERT(next_parameter_index < parameters.size());
- return parameters[next_parameter_index++];
+ return parameters.at(next_parameter_index++);
}
u32 MacroInterpreter::GetRegister(u32 register_id) const {
- // Register 0 is supposed to always return 0.
- if (register_id == 0)
- return 0;
-
- ASSERT(register_id < registers.size());
- return registers[register_id];
+ return registers.at(register_id);
}
void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
- // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
- // register.
- if (register_id == 0)
+ // Register 0 is hardwired as the zero register.
+ // Ensure no writes to it actually occur.
+ if (register_id == 0) {
return;
+ }
- ASSERT(register_id < registers.size());
- registers[register_id] = value;
+ registers.at(register_id) = value;
}
void MacroInterpreter::SetMethodAddress(u32 address) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8e8f36f28..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -7,216 +7,526 @@
#include "common/logging/log.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
namespace Tegra {
-MemoryManager::MemoryManager() {
- // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
- // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
- // Undertale using 0 for a render target.
- PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
-}
-
-GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
- const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
+ std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+ std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+ Common::PageType::Unmapped);
+ page_table.Resize(address_space_width);
- ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+ // Initialize the map with a single free region covering the entire managed space.
+ VirtualMemoryArea initial_vma;
+ initial_vma.size = address_space_end;
+ vma_map.emplace(initial_vma.base, initial_vma);
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(*gpu_addr + offset)};
+ UpdatePageTableForVMA(initial_vma);
+}
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
- slot = static_cast<u64>(PageStatus::Allocated);
- }
+ AllocateMemory(gpu_addr, 0, aligned_size);
- return *gpu_addr;
+ return gpu_addr;
}
GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
-
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
- slot = static_cast<u64>(PageStatus::Allocated);
- }
+ AllocateMemory(gpu_addr, 0, aligned_size);
return gpu_addr;
}
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
- const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
- ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+ MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(*gpu_addr + offset)};
+ return gpu_addr;
+}
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
+ ASSERT((gpu_addr & page_mask) == 0);
- slot = cpu_addr + offset;
- }
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
- const MappedRegion region{cpu_addr, *gpu_addr, size};
- mapped_regions.push_back(region);
+ MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
- return *gpu_addr;
+ return gpu_addr;
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & PAGE_MASK) == 0);
+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+ ASSERT((gpu_addr & page_mask) == 0);
- if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
- // Page has been already mapped. In this case, we must find a new area of memory to use that
- // is different than the specified one. Super Mario Odyssey hits this scenario when changing
- // areas, but we do not want to overwrite the old pages.
- // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
- LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+ rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
+ UnmapRange(gpu_addr, aligned_size);
- const std::optional<GPUVAddr> new_gpu_addr{
- FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+ return gpu_addr;
+}
+
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
+ // Find the first Free VMA.
+ const VMAHandle vma_handle{
+ std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
+ if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+ return false;
+ }
- ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+ const VAddr vma_end{vma.second.base + vma.second.size};
+ return vma_end > region_start && vma_end >= region_start + size;
+ })};
- gpu_addr = *new_gpu_addr;
+ if (vma_handle == vma_map.end()) {
+ return {};
}
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
+ return std::max(region_start, vma_handle->second.base);
+}
- ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+ return (addr >> page_bits) < page_table.pointers.size();
+}
- slot = cpu_addr + offset;
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
+ if (!IsAddressValid(addr)) {
+ return {};
}
- const MappedRegion region{cpu_addr, gpu_addr, size};
- mapped_regions.push_back(region);
+ const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+ if (cpu_addr) {
+ return cpu_addr + (addr & page_mask);
+ }
- return gpu_addr;
+ return {};
}
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & PAGE_MASK) == 0);
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) const {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
+ const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ T value;
+ std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+ return value;
+ }
- ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
- slot != static_cast<u64>(PageStatus::Unmapped));
+ switch (page_table.attributes[addr >> page_bits]) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+ return 0;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ return {};
+}
- slot = static_cast<u64>(PageStatus::Unmapped);
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+ if (!IsAddressValid(addr)) {
+ return;
}
- // Delete the region mappings that are contained within the unmapped region
- mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
- [&](const MappedRegion& region) {
- return region.gpu_addr <= gpu_addr &&
- region.gpu_addr + region.size < gpu_addr + size;
- }),
- mapped_regions.end());
- return gpu_addr;
+ u8* page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+ return;
+ }
+
+ switch (page_table.attributes[addr >> page_bits]) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+ static_cast<u32>(data), addr);
+ return;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+ break;
+ default:
+ UNREACHABLE();
+ }
}
-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
- for (const auto& region : mapped_regions) {
- const GPUVAddr region_end{region.gpu_addr + region.size};
- if (region_start >= region.gpu_addr && region_start < region_end) {
- return region_end;
- }
+template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
+template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
+template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
+template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+ if (!IsAddressValid(addr)) {
+ return {};
}
+
+ u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer != nullptr) {
+ return page_pointer + (addr & page_mask);
+ }
+
+ LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
return {};
}
-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
- PageStatus status) {
- GPUVAddr gpu_addr{region_start};
- u64 free_space{};
- align = (align + PAGE_MASK) & ~PAGE_MASK;
+const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
- while (gpu_addr + free_space < MAX_ADDRESS) {
- if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
- free_space += PAGE_SIZE;
- if (free_space >= size) {
- return gpu_addr;
- }
- } else {
- gpu_addr += free_space + PAGE_SIZE;
- free_space = 0;
- gpu_addr = Common::AlignUp(gpu_addr, align);
- }
+ const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer != nullptr) {
+ return page_pointer + (addr & page_mask);
}
+ LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
return {};
}
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
- const VAddr base_addr{PageSlot(gpu_addr)};
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
+ std::size_t remaining_size{size};
+ std::size_t page_index{src_addr >> page_bits};
+ std::size_t page_offset{src_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ std::memcpy(dest_buffer, src_ptr, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
- if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
- base_addr == static_cast<u64>(PageStatus::Unmapped) ||
- base_addr == static_cast<u64>(PageStatus::Reserved)) {
- return {};
+ page_index++;
+ page_offset = 0;
+ dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+ remaining_size -= copy_amount;
}
+}
- return base_addr + (gpu_addr & PAGE_MASK);
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+ std::size_t remaining_size{size};
+ std::size_t page_index{dest_addr >> page_bits};
+ std::size_t page_offset{dest_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+ std::memcpy(dest_ptr, src_buffer, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ page_index++;
+ page_offset = 0;
+ src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+ remaining_size -= copy_amount;
+ }
}
-u8 MemoryManager::Read8(GPUVAddr addr) {
- return Memory::Read8(*GpuToCpuAddress(addr));
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+ std::size_t remaining_size{size};
+ std::size_t page_index{src_addr >> page_bits};
+ std::size_t page_offset{src_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ WriteBlock(dest_addr, src_ptr, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ page_index++;
+ page_offset = 0;
+ dest_addr += static_cast<VAddr>(copy_amount);
+ src_addr += static_cast<VAddr>(copy_amount);
+ remaining_size -= copy_amount;
+ }
}
-u16 MemoryManager::Read16(GPUVAddr addr) {
- return Memory::Read16(*GpuToCpuAddress(addr));
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+ VAddr backing_addr) {
+ LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+ (base + size) * page_size);
+
+ const VAddr end{base + size};
+ ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+ base + page_table.pointers.size());
+
+ std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+ if (memory == nullptr) {
+ std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+ std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+ backing_addr);
+ } else {
+ while (base != end) {
+ page_table.pointers[base] = memory;
+ page_table.backing_addr[base] = backing_addr;
+
+ base += 1;
+ memory += page_size;
+ backing_addr += page_size;
+ }
+ }
}
-u32 MemoryManager::Read32(GPUVAddr addr) {
- return Memory::Read32(*GpuToCpuAddress(addr));
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+ MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
}
-u64 MemoryManager::Read64(GPUVAddr addr) {
- return Memory::Read64(*GpuToCpuAddress(addr));
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+ MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
}
-void MemoryManager::Write8(GPUVAddr addr, u8 data) {
- Memory::Write8(*GpuToCpuAddress(addr), data);
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+ ASSERT(base + size == next.base);
+ if (type != next.type) {
+ return {};
+ }
+ if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+ return {};
+ }
+ if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
+ return {};
+ }
+ return true;
}
-void MemoryManager::Write16(GPUVAddr addr, u16 data) {
- Memory::Write16(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+ if (target >= address_space_end) {
+ return vma_map.end();
+ } else {
+ return std::prev(vma_map.upper_bound(target));
+ }
}
-void MemoryManager::Write32(GPUVAddr addr, u32 data) {
- Memory::Write32(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ vma.type = VirtualMemoryArea::Type::Allocated;
+ vma.backing_addr = 0;
+ vma.backing_memory = {};
+ UpdatePageTableForVMA(vma);
+
+ return MergeAdjacent(vma_handle);
}
-void MemoryManager::Write64(GPUVAddr addr, u64 data) {
- Memory::Write64(*GpuToCpuAddress(addr), data);
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+ u64 size) {
+
+ // This is the appropriately sized VMA that will turn into our allocation.
+ VMAIter vma_handle{CarveVMA(target, size)};
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ ASSERT(vma.size == size);
+
+ vma.offset = offset;
+
+ return Allocate(vma_handle);
}
-u8* MemoryManager::GetPointer(GPUVAddr addr) {
- return Memory::GetPointer(*GpuToCpuAddress(addr));
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+ VAddr backing_addr) {
+ // This is the appropriately sized VMA that will turn into our allocation.
+ VMAIter vma_handle{CarveVMA(target, size)};
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ ASSERT(vma.size == size);
+
+ vma.type = VirtualMemoryArea::Type::Mapped;
+ vma.backing_memory = memory;
+ vma.backing_addr = backing_addr;
+ UpdatePageTableForVMA(vma);
+
+ return MergeAdjacent(vma_handle);
}
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
- std::memcpy(dest_buffer, GetPointer(src_addr), size);
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+ VMAIter vma{CarveVMARange(target, size)};
+ const VAddr target_end{target + size};
+ const VMAIter end{vma_map.end()};
+
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ // Unmapped ranges return to allocated state and can be reused
+ // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+ vma = std::next(Allocate(vma));
+ }
+
+ ASSERT(FindVMA(target)->second.size >= size);
}
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
- std::memcpy(GetPointer(dest_addr), src_buffer, size);
+
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+ // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+ // non-const access to its container.
+ return vma_map.erase(iter, iter); // Erases an empty range of elements
}
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
- std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+ VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+ if (vma_handle == vma_map.end()) {
+ // Target address is outside the managed range
+ return {};
+ }
+
+ const VirtualMemoryArea& vma{vma_handle->second};
+ if (vma.type == VirtualMemoryArea::Type::Mapped) {
+ // Region is already allocated
+ return vma_handle;
+ }
+
+ const VAddr start_in_vma{base - vma.base};
+ const VAddr end_in_vma{start_in_vma + size};
+
+ ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+ vma.size, end_in_vma);
+
+ if (end_in_vma < vma.size) {
+ // Split VMA at the end of the allocated region
+ SplitVMA(vma_handle, end_in_vma);
+ }
+ if (start_in_vma != 0) {
+ // Split VMA at the start of the allocated region
+ vma_handle = SplitVMA(vma_handle, start_in_vma);
+ }
+
+ return vma_handle;
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+ ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
+
+ const VAddr target_end{target + size};
+ ASSERT(target_end >= target);
+ ASSERT(size > 0);
+
+ VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+ const VMAIter i_end{vma_map.lower_bound(target_end)};
+ if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+ return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+ })) {
+ return {};
+ }
+
+ if (target != begin_vma->second.base) {
+ begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+ }
+
+ VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+ if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+ end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+ }
+
+ return begin_vma;
+}
+
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+ VirtualMemoryArea& old_vma{vma_handle->second};
+ VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+ // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+ // a bug. This restriction might be removed later.
+ ASSERT(offset_in_vma < old_vma.size);
+ ASSERT(offset_in_vma > 0);
+
+ old_vma.size = offset_in_vma;
+ new_vma.base += offset_in_vma;
+ new_vma.size -= offset_in_vma;
+
+ switch (new_vma.type) {
+ case VirtualMemoryArea::Type::Unmapped:
+ break;
+ case VirtualMemoryArea::Type::Allocated:
+ new_vma.offset += offset_in_vma;
+ break;
+ case VirtualMemoryArea::Type::Mapped:
+ new_vma.backing_memory += offset_in_vma;
+ break;
+ }
+
+ ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+ return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
+}
+
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+ const VMAIter next_vma{std::next(iter)};
+ if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+ iter->second.size += next_vma->second.size;
+ vma_map.erase(next_vma);
+ }
+
+ if (iter != vma_map.begin()) {
+ VMAIter prev_vma{std::prev(iter)};
+ if (prev_vma->second.CanBeMergedWith(iter->second)) {
+ prev_vma->second.size += iter->second.size;
+ vma_map.erase(iter);
+ iter = prev_vma;
+ }
+ }
+
+ return iter;
}
-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
- auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
- if (!block) {
- block = std::make_unique<PageBlock>();
- block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+ switch (vma.type) {
+ case VirtualMemoryArea::Type::Unmapped:
+ UnmapRegion(vma.base, vma.size);
+ break;
+ case VirtualMemoryArea::Type::Allocated:
+ MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+ break;
+ case VirtualMemoryArea::Type::Mapped:
+ MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+ break;
}
- return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 425e2f31c..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,82 +1,154 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
-#include <array>
-#include <memory>
+#include <map>
#include <optional>
-#include <vector>
#include "common/common_types.h"
+#include "common/page_table.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
namespace Tegra {
-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+ enum class Type : u8 {
+ Unmapped,
+ Allocated,
+ Mapped,
+ };
+
+ /// Virtual base address of the region.
+ GPUVAddr base{};
+ /// Size of the region.
+ u64 size{};
+ /// Memory area mapping type.
+ Type type{Type::Unmapped};
+ /// CPU memory mapped address corresponding to this memory area.
+ VAddr backing_addr{};
+ /// Offset into the backing_memory the mapping starts from.
+ std::size_t offset{};
+ /// Pointer backing this VMA.
+ u8* backing_memory{};
+
+ /// Tests if this area can be merged to the right with `next`.
+ bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};
class MemoryManager final {
public:
- MemoryManager();
+ MemoryManager(VideoCore::RasterizerInterface& rasterizer);
GPUVAddr AllocateSpace(u64 size, u64 align);
- GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+ GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
- GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
- GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
- GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-
- static constexpr u64 PAGE_BITS = 16;
- static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
- static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+ GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+ GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+ std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
- u8 Read8(GPUVAddr addr);
- u16 Read16(GPUVAddr addr);
- u32 Read32(GPUVAddr addr);
- u64 Read64(GPUVAddr addr);
+ template <typename T>
+ T Read(GPUVAddr addr) const;
- void Write8(GPUVAddr addr, u8 data);
- void Write16(GPUVAddr addr, u16 data);
- void Write32(GPUVAddr addr, u32 data);
- void Write64(GPUVAddr addr, u64 data);
+ template <typename T>
+ void Write(GPUVAddr addr, T data);
- u8* GetPointer(GPUVAddr vaddr);
+ u8* GetPointer(GPUVAddr addr);
+ const u8* GetPointer(GPUVAddr addr) const;
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
+ void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+ void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
- enum class PageStatus : u64 {
- Unmapped = 0xFFFFFFFFFFFFFFFFULL,
- Allocated = 0xFFFFFFFFFFFFFFFEULL,
- Reserved = 0xFFFFFFFFFFFFFFFDULL,
- };
-
- std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
- PageStatus status);
- VAddr& PageSlot(GPUVAddr gpu_addr);
-
- static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
- static constexpr u64 PAGE_TABLE_BITS{10};
- static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
- static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
- static constexpr u64 PAGE_BLOCK_BITS{14};
- static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
- static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
-
- using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
- std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
-
- struct MappedRegion {
- VAddr cpu_addr;
- GPUVAddr gpu_addr;
- u64 size;
- };
+ using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+ using VMAHandle = VMAMap::const_iterator;
+ using VMAIter = VMAMap::iterator;
+
+ bool IsAddressValid(GPUVAddr addr) const;
+ void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+ VAddr backing_addr = 0);
+ void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+ void UnmapRegion(GPUVAddr base, u64 size);
+
+ /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+ VMAHandle FindVMA(GPUVAddr target) const;
+
+ VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
+
+ /**
+ * Maps an unmanaged host memory pointer at a given address.
+ *
+ * @param target The guest address to start the mapping at.
+ * @param memory The memory to be mapped.
+ * @param size Size of the mapping.
+ * @param state MemoryState tag to attach to the VMA.
+ */
+ VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+
+ /// Unmaps a range of addresses, splitting VMAs as necessary.
+ void UnmapRange(GPUVAddr target, u64 size);
+
+ /// Converts a VMAHandle to a mutable VMAIter.
+ VMAIter StripIterConstness(const VMAHandle& iter);
+
+ /// Marks as the specfied VMA as allocated.
+ VMAIter Allocate(VMAIter vma);
+
+ /**
+ * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+ * the appropriate error checking.
+ */
+ VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+ /**
+ * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+ * end of the range.
+ */
+ VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+ /**
+ * Splits a VMA in two, at the specified offset.
+ * @returns the right side of the split, with the original iterator becoming the left side.
+ */
+ VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+ /**
+ * Checks for and merges the specified VMA with adjacent ones if possible.
+ * @returns the merged VMA or the original if no merging was possible.
+ */
+ VMAIter MergeAdjacent(VMAIter vma);
+
+ /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+ void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+ /// Finds a free (unmapped region) of the specified size starting at the specified address.
+ GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
- std::vector<MappedRegion> mapped_regions;
+private:
+ static constexpr u64 page_bits{16};
+ static constexpr u64 page_size{1 << page_bits};
+ static constexpr u64 page_mask{page_size - 1};
+
+ /// Address space in bits, this is fairly arbitrary but sufficiently large.
+ static constexpr u32 address_space_width{39};
+ /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+ static constexpr GPUVAddr address_space_base{0x100000};
+ /// End of address space, based on address space in bits.
+ static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+
+ Common::PageTable page_table{page_bits};
+ VMAMap vma_map;
+ VideoCore::RasterizerInterface& rasterizer;
};
} // namespace Tegra
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index ecd9986a0..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -71,8 +71,8 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
- CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
+ CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
};
template <class T>
@@ -84,7 +84,7 @@ public:
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
@@ -94,7 +94,7 @@ public:
/// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
@@ -108,7 +108,7 @@ public:
/// Invalidates everything in the cache
void InvalidateAll() {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ std::lock_guard lock{mutex};
while (interval_cache.begin() != interval_cache.end()) {
Unregister(*interval_cache.begin()->second.begin());
@@ -132,8 +132,8 @@ protected:
}
/// Register an object into the cache
- void Register(const T& object) {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ virtual void Register(const T& object) {
+ std::lock_guard lock{mutex};
object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}});
@@ -142,8 +142,8 @@ protected:
}
/// Unregisters an object from the cache
- void Unregister(const T& object) {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ virtual void Unregister(const T& object) {
+ std::lock_guard lock{mutex};
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
@@ -153,14 +153,14 @@ protected:
/// Returns a ticks counter used for tracking when cached objects were last modified
u64 GetModifiedTicks() {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ std::lock_guard lock{mutex};
return ++modified_ticks;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
- std::lock_guard<std::recursive_mutex> lock{mutex};
+ std::lock_guard lock{mutex};
if (!object->IsDirty()) {
return;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 76e292e87..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace VideoCore {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5048ed6ce..25652e794 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,7 +7,7 @@
#include "common/alignment.h"
#include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -15,14 +15,14 @@ namespace OpenGL {
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
std::size_t alignment, u8* host_ptr)
- : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
- host_ptr} {}
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+ alignment{alignment} {}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
- std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+ bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
// Cache management is a big overhead, so only cache entries with a given size.
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 1de1f84ae..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -58,7 +58,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
- GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+ GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c8dbcacbd..8d9ee81f1 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,9 +4,9 @@
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,7 +15,7 @@
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
- : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
buffer.Create();
// Bind and unbind the buffer so it gets allocated by the driver
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
@@ -46,7 +46,7 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
return search->second;
}
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
u8* host_ptr) {
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
@@ -76,8 +76,8 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
global_region.GetCbufOffset()};
- const auto actual_addr{memory_manager.Read64(addr)};
- const auto size{memory_manager.Read32(addr + 8)};
+ const auto actual_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
// Look up global region in the cache based on address
const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index a840491f7..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -66,7 +66,7 @@ public:
private:
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
- GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
+ GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
void ReserveGlobalRegion(GlobalRegion region);
std::unordered_map<CacheAddr, GlobalRegion> reserve;
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 75d816795..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
@@ -40,8 +40,7 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
return index_offset;
}
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
- u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
#pragma once
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace OpenGL {
@@ -24,7 +22,7 @@ public:
GLintptr MakeQuadArray(u32 first, u32 count);
- GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+ GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 198c54872..d250d5cbb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,7 +17,6 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
@@ -26,7 +25,6 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/video_core.h"
namespace OpenGL {
@@ -100,11 +98,9 @@ struct FramebufferCacheKey {
}
};
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
- ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
- emu_window{window}, system{system}, screen_info{info},
- buffer_cache(*this, STREAM_BUFFER_SIZE) {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
+ : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+ screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -225,8 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
if (!vertex_array.IsEnabled())
continue;
- const Tegra::GPUVAddr start = vertex_array.StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = vertex_array.StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
const u64 size = end - start + 1;
@@ -303,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ // Prepare packed bindings
+ bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+ bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -320,13 +320,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+ ubo.SetFromRegs(gpu, stage);
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the emulation info buffer
- glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
@@ -370,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
base_bindings = next_bindings;
}
+ bind_ubo_pushbuffer.Bind();
+ bind_ssbo_pushbuffer.Bind();
+
SyncClipEnabled(clip_distances);
gpu.dirty_flags.shaders = false;
@@ -421,8 +424,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
if (!regs.vertex_array[index].IsEnabled())
continue;
- const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = regs.vertex_array[index].StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
size += end - start + 1;
@@ -904,23 +907,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffers;
- constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
- std::array<GLuint, max_binds> bind_buffers;
- std::array<GLintptr, max_binds> bind_offsets;
- std::array<GLsizeiptr, max_binds> bind_sizes;
-
- ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
-
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
if (!buffer.enabled) {
- // With disabled buffers set values as zero to unbind them
- bind_buffers[bindpoint] = 0;
- bind_offsets[bindpoint] = 0;
- bind_sizes[bindpoint] = 0;
+ // Set values to zero to unbind buffers
+ bind_ubo_pushbuffer.Push(0, 0, 0);
continue;
}
@@ -948,30 +942,19 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
- // Prepare values for multibind
- bind_buffers[bindpoint] = buffer_cache.GetHandle();
- bind_offsets[bindpoint] = const_buffer_offset;
- bind_sizes[bindpoint] = size;
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
}
-
- // The first binding is reserved for emulation values
- const GLuint ubo_base_binding = base_bindings.cbuf + 1;
- glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
- bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
- // TODO(Rodrigo): Use ARB_multi_bind here
const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
- for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
- const auto& entry = entries[bindpoint];
- const u32 current_bindpoint = base_bindings.gmem + bindpoint;
- const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+ for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry{entries[bindpoint]};
+ const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+ bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+ static_cast<GLsizeiptr>(region->GetSizeInBytes()));
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 30f3e8acb..e4c64ae71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -12,15 +12,12 @@
#include <optional>
#include <tuple>
#include <utility>
-#include <vector>
#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -29,10 +26,9 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/utils.h"
namespace Core {
class System;
@@ -50,8 +46,7 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
- explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
- ScreenInfo& info);
+ explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
~RasterizerOpenGL() override;
void DrawArrays() override;
@@ -214,7 +209,6 @@ private:
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
- Core::Frontend::EmuWindow& emu_window;
Core::System& system;
ScreenInfo& screen_info;
@@ -236,6 +230,9 @@ private:
PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment;
+ BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+ BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 57329cd61..9026a9452 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -13,9 +13,9 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
-#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -55,7 +55,7 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
}
}
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
gpu_addr = gpu_addr_;
@@ -222,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
@@ -266,6 +266,10 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
+ if (!params.is_tiled) {
+ const u32 bpp = params.GetFormatBpp() / 8;
+ params.pitch = config.width * bpp;
+ }
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
@@ -562,8 +566,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
}
CachedSurface::CachedSurface(const SurfaceParams& params)
- : params{params}, gl_target{SurfaceTargetToGL(params.target)},
- cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
+ : RasterizerCacheObject{params.host_ptr}, params{params},
+ gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
+
+ const auto optional_cpu_addr{
+ Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+ ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+ cpu_addr = *optional_cpu_addr;
+
texture.Create(gl_target);
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,20 +613,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
ApplyTextureDefaults(texture.handle, params.max_mip_level);
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
-
- // Clamp size to mapped GPU memory region
- // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
- // R32F render buffer. We do not yet know if this is a game bug or something else, but this
- // check is necessary to prevent flushing from overwriting unmapped memory.
-
- auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
- if (cached_size_in_bytes > max_size) {
- LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
- cached_size_in_bytes = max_size;
- }
-
- cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -670,8 +666,8 @@ void CachedSurface::FlushGLBuffer() {
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+ glPixelStorei(GL_PACK_ALIGNMENT, align);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -716,8 +712,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, align);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -925,7 +921,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
- if (params.gpu_addr == 0 || params.height * params.width == 0) {
+ if (!params.IsValid()) {
return {};
}
@@ -941,7 +937,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
- UnregisterSurface(surface);
+ Unregister(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
@@ -949,7 +945,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
- UnregisterSurface(surface);
+ Unregister(surface);
}
}
@@ -980,11 +976,11 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
const auto& init_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- Tegra::GPUVAddr address{init_params.gpu_addr};
+ GPUVAddr address{init_params.gpu_addr};
const std::size_t layer_size{dst_params.LayerMemorySize()};
for (u32 layer = 0; layer < dst_params.depth; layer++) {
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
- const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+ const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
if (!copy) {
continue;
@@ -1244,10 +1240,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
return {};
}
-static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
- u32 mipmap) {
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
const std::size_t size{params.LayerMemorySize()};
- Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
+ GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
@@ -1304,12 +1299,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
FlushObject(intersect);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9366f47f2..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,13 +5,13 @@
#pragma once
#include <array>
-#include <map>
#include <memory>
#include <string>
-#include <unordered_set>
+#include <tuple>
#include <vector>
#include "common/alignment.h"
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
@@ -109,6 +109,11 @@ struct SurfaceParams {
return size;
}
+ /// Returns true if the parameters constitute a valid rasterizer surface.
+ bool IsValid() const {
+ return gpu_addr && host_ptr && height && width;
+ }
+
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
/// mipmaps.
std::size_t LayerMemorySize() const {
@@ -201,6 +206,13 @@ struct SurfaceParams {
return bd;
}
+ u32 RowAlign(u32 mip_level) const {
+ const u32 m_width = MipWidth(mip_level);
+ const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+ const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+ return (1U << l2);
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
@@ -210,7 +222,7 @@ struct SurfaceParams {
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
@@ -232,7 +244,7 @@ struct SurfaceParams {
}
/// Initializes parameters for caching, should be called after everything has been initialized
- void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+ void InitCacheParameters(GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
@@ -297,7 +309,7 @@ struct SurfaceParams {
bool srgb_conversion;
// Parameters used for caching
u8* host_ptr;
- Tegra::GPUVAddr gpu_addr;
+ GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
@@ -533,13 +545,17 @@ private:
return nullptr;
}
+ void Register(const Surface& object) override {
+ RasterizerCache<Surface>::Register(object);
+ }
+
/// Unregisters an object from the cache
- void UnregisterSurface(const Surface& object) {
+ void Unregister(const Surface& object) override {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
- Unregister(object);
+ RasterizerCache<Surface>::Unregister(object);
}
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1ed740877..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,13 +6,12 @@
#include "common/assert.h"
#include "common/hash.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
@@ -32,7 +31,7 @@ struct UnspecializedShader {
namespace {
/// Gets the address for the specified shader stage program
-Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
@@ -41,6 +40,10 @@ Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+ ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+ std::fill(program_code.begin(), program_code.end(), 0);
+ return program_code;
+ });
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}
@@ -215,9 +218,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
- : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
- program_type{program_type}, disk_cache{disk_cache},
- precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
+ : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
+ unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
+ precompiled_programs{precompiled_programs} {
const std::size_t code_size = CalculateProgramSize(program_code);
const std::size_t code_size_b =
@@ -245,9 +248,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
GLShader::ProgramResult result, u8* host_ptr)
- : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
- disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
- host_ptr} {
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+ program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
+ precompiled_programs} {
code = std::move(result.first);
entries = result.second;
@@ -486,7 +489,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
}
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
+ const GPUVAddr program_addr{GetShaderAddress(program)};
// Look up shader in the cache based on address
const auto& host_ptr{memory_manager.GetPointer(program_addr)};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index fd1c85115..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,21 +5,20 @@
#pragma once
#include <array>
+#include <atomic>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
+#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
-#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 11d1169f0..28e490b3c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
namespace OpenGL::GLShader {
+namespace {
+
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
-
class ShaderWriter {
public:
void AddExpression(std::string_view text) {
@@ -69,10 +75,10 @@ public:
shader_source += '\n';
}
- std::string GenerateTemporal() {
- std::string temporal = "tmp";
- temporal += std::to_string(temporal_index++);
- return temporal;
+ std::string GenerateTemporary() {
+ std::string temporary = "tmp";
+ temporary += std::to_string(temporary_index++);
+ return temporary;
}
std::string GetResult() {
@@ -87,11 +93,11 @@ private:
}
std::string shader_source;
- u32 temporal_index = 1;
+ u32 temporary_index = 1;
};
/// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u32 elem) {
+std::string GetSwizzle(u32 elem) {
ASSERT(elem <= 3);
std::string swizzle = ".";
swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
}
/// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
case Tegra::Shader::OutputTopology::PointList:
return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
}
/// Returns true if an object has to be treated as precise
-static bool IsPrecise(Operation operand) {
+bool IsPrecise(Operation operand) {
const auto& meta = operand.GetMeta();
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
return false;
}
-static bool IsPrecise(Node node) {
+bool IsPrecise(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
return IsPrecise(*operation);
}
@@ -426,9 +432,14 @@ private:
std::string Visit(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+ if (operation_index >= operation_decompilers.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
+ return {};
+ }
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
- UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+ UNREACHABLE_MSG("Undefined operation: {}", operation_index);
+ return {};
}
return (this->*decompiler)(*operation);
@@ -540,9 +551,8 @@ private:
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
- const std::string final_offset = code.GenerateTemporal();
- code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
- std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
+ const std::string final_offset = code.GenerateTemporary();
+ code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
final_offset, final_offset);
@@ -587,9 +597,9 @@ private:
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
- const std::string temporal = code.GenerateTemporal();
- code.AddLine(precise + "float " + temporal + " = " + value + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine(precise + "float " + temporary + " = " + value + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index) {
@@ -601,9 +611,9 @@ private:
return Visit(operand);
}
- const std::string temporal = code.GenerateTemporal();
- code.AddLine("float " + temporal + " = " + Visit(operand) + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
@@ -718,8 +728,8 @@ private:
result_type));
}
- std::string GenerateTexture(Operation operation, const std::string& func,
- const std::vector<std::pair<Type, Node>>& extras) {
+ std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+ const std::vector<TextureIR>& extras) {
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -729,11 +739,11 @@ private:
const bool has_array = meta->sampler.IsArray();
const bool has_shadow = meta->sampler.IsShadow();
- std::string expr = func;
- expr += '(';
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
+ std::string expr = "texture" + function_suffix;
+ if (!meta->aoffi.empty()) {
+ expr += "Offset";
+ }
+ expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
@@ -751,36 +761,74 @@ private:
}
expr += ')';
- for (const auto& extra_pair : extras) {
- const auto [type, operand] = extra_pair;
- if (operand == nullptr) {
- continue;
+ for (const auto& variant : extras) {
+ if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+ expr += GenerateTextureArgument(*argument);
+ } else if (std::get_if<TextureAoffi>(&variant)) {
+ expr += GenerateTextureAoffi(meta->aoffi);
+ } else {
+ UNREACHABLE();
}
- expr += ", ";
+ }
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += "ftoi(" + Visit(operand) + ')';
- }
- break;
- case Type::Float:
- expr += Visit(operand);
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
+ return expr + ')';
+ }
+
+ std::string GenerateTextureArgument(TextureArgument argument) {
+ const auto [type, operand] = argument;
+ if (operand == nullptr) {
+ return {};
+ }
+
+ std::string expr = ", ";
+ switch (type) {
+ case Type::Int:
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (some extra arguments are
+ // required to be constant)
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
}
+ break;
+ case Type::Float:
+ expr += Visit(operand);
+ break;
+ default: {
+ const auto type_int = static_cast<u32>(type);
+ UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+ expr += '0';
+ break;
+ }
+ }
+ return expr;
+ }
+
+ std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+ if (aoffi.empty()) {
+ return {};
+ }
+ constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ std::string expr = ", ";
+ expr += coord_constructors.at(aoffi.size() - 1);
+ expr += '(';
+
+ for (std::size_t index = 0; index < aoffi.size(); ++index) {
+ const auto operand{aoffi.at(index)};
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+ // to be constant by the standard).
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
+ }
+ if (index + 1 < aoffi.size()) {
+ expr += ", ";
}
}
+ expr += ')';
- return expr + ')';
+ return expr;
}
std::string Assign(Operation operation) {
@@ -1159,7 +1207,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+ std::string expr = GenerateTexture(
+ operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1170,7 +1219,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+ std::string expr = GenerateTexture(
+ operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1182,7 +1232,8 @@ private:
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+ return GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
GetSwizzle(meta->element);
}
@@ -1196,11 +1247,12 @@ private:
switch (meta->element) {
case 0:
case 1:
- return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element);
+ return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
+ GetSwizzle(meta->element) + "))";
case 2:
return "0";
case 3:
- return "textureQueryLevels(" + sampler + ')';
+ return "itof(textureQueryLevels(" + sampler + "))";
}
UNREACHABLE();
return "0";
@@ -1211,8 +1263,8 @@ private:
ASSERT(meta);
if (meta->element < 2) {
- return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
- " * vec2(256))" + GetSwizzle(meta->element) + "))";
+ return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+ GetSwizzle(meta->element) + "))";
}
return "0";
}
@@ -1565,6 +1617,8 @@ private:
ShaderWriter code;
};
+} // Anonymous namespace
+
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 72aca4938..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,7 +5,6 @@
#pragma once
#include <array>
-#include <set>
#include <string>
#include <utility>
#include <vector>
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 82fc4d44b..8a43eb157 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -4,7 +4,6 @@
#include <cstring>
#include <fmt/format.h>
-#include <lz4.h>
#include "common/assert.h"
#include "common/common_paths.h"
@@ -12,6 +11,7 @@
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
+#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
@@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
return hash;
}
-template <typename T>
-std::vector<u8> CompressData(const T* source, std::size_t source_size) {
- if (source_size > LZ4_MAX_INPUT_SIZE) {
- // Source size exceeds LZ4 maximum input size
- return {};
- }
- const auto source_size_int = static_cast<int>(source_size);
- const int max_compressed_size = LZ4_compressBound(source_size_int);
- std::vector<u8> compressed(max_compressed_size);
- const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
- reinterpret_cast<char*>(compressed.data()),
- source_size_int, max_compressed_size);
- if (compressed_size <= 0) {
- // Compression failed
- return {};
- }
- compressed.resize(compressed_size);
- return compressed;
-}
-
-std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
- std::vector<u8> uncompressed(uncompressed_size);
- const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
- reinterpret_cast<char*>(uncompressed.data()),
- static_cast<int>(compressed.size()),
- static_cast<int>(uncompressed.size()));
- if (static_cast<int>(uncompressed_size) != size_check) {
- // Decompression failed
- return {};
- }
- return uncompressed;
-}
-
} // namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
@@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- dump.binary = DecompressData(compressed_binary, binary_length);
+ dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
if (dump.binary.empty()) {
return {};
}
@@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- const std::vector<u8> code = DecompressData(compressed_code, code_size);
+ const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
if (code.empty()) {
return {};
}
@@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
- const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+ const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+ reinterpret_cast<const u8*>(code.data()), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
@@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+ const std::vector<u8> compressed_binary =
+ Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
+
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
usage.unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7d96649af..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <fmt/format.h>
-#include "common/assert.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fba8e681b..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,12 +4,9 @@
#pragma once
-#include <array>
-#include <string>
#include <vector>
#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& regs = gpu.regs;
- const auto& state = gpu.state;
+using Tegra::Engines::Maxwell3D;
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+ const auto& regs = maxwell.regs;
+ const auto& state = maxwell.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
u32 func = static_cast<u32>(regs.alpha_test_func);
// Normalize the gl variants of opCompare to be the same as the normal variants
- u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never);
+ const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
if (func >= op_gl_variant_base) {
func = func - op_gl_variant_base + 1U;
}
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
// Assign in which stage the position has to be flipped
// (the last stage before the fragment shader).
- if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ if (maxwell.regs.shader_config[geometry_index].enable) {
+ flip_stage = geometry_index;
} else {
flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
namespace OpenGL::GLShader {
-using Tegra::Engines::Maxwell3D;
-
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-// Not following that rule will cause problems on some AMD drivers.
+/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
+/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+/// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
- void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+
alignas(16) GLvec4 viewport_flip;
struct alignas(16) {
GLuint instance_id;
@@ -63,7 +62,6 @@ public:
UpdatePipeline();
state.draw.shader_program = 0;
state.draw.program_pipeline = pipeline.handle;
- state.geometry_shaders.enabled = (gs != 0);
}
private:
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9419326a3..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@
namespace OpenGL {
-OpenGLState OpenGLState::cur_state;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+OpenGLState OpenGLState::cur_state;
bool OpenGLState::s_rgb_used;
+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+ if (enable) {
+ glEnable(cap);
+ } else {
+ glDisable(cap);
+ }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+ if (enable) {
+ glEnablei(cap, index);
+ } else {
+ glDisablei(cap, index);
+ }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, index, new_value);
+}
+
+} // namespace
+
OpenGLState::OpenGLState() {
// These all match default OpenGL values
- geometry_shaders.enabled = false;
framebuffer_srgb.enabled = false;
+
multisample_control.alpha_to_coverage = false;
multisample_control.alpha_to_one = false;
+
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
primitive_restart.enabled = false;
primitive_restart.index = 0;
+
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
- stencil.test_enabled = false;
- auto reset_stencil = [](auto& config) {
+
+ const auto ResetStencil = [](auto& config) {
config.test_func = GL_ALWAYS;
config.test_ref = 0;
config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
config.action_depth_pass = GL_KEEP;
config.action_stencil_fail = GL_KEEP;
};
- reset_stencil(stencil.front);
- reset_stencil(stencil.back);
+ stencil.test_enabled = false;
+ ResetStencil(stencil.front);
+ ResetStencil(stencil.back);
+
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
item.scissor.width = 0;
item.scissor.height = 0;
}
+
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
+
independant_blend.enabled = false;
+
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
+
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
clip_distance = {};
point.size = 1;
+
fragment_color_clamp.enabled = false;
+
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
+
polygon_offset.fill_enable = false;
polygon_offset.line_enable = false;
polygon_offset.point_enable = false;
@@ -103,260 +159,255 @@ OpenGLState::OpenGLState() {
}
void OpenGLState::ApplyDefaultState() {
+ glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_PRIMITIVE_RESTART);
glDisable(GL_STENCIL_TEST);
- glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_SCISSOR_TEST);
}
+void OpenGLState::ApplyFramebufferState() const {
+ if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+ }
+ if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+ }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+ if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+ glBindVertexArray(draw.vertex_array);
+ }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+ if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+ glUseProgram(draw.shader_program);
+ }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+ if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+ glBindProgramPipeline(draw.program_pipeline);
+ }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+ for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+ Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+ clip_distance[i]);
+ }
+}
+
+void OpenGLState::ApplyPointSize() const {
+ if (UpdateValue(cur_state.point.size, point.size)) {
+ glPointSize(point.size);
+ }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+ if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+ fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+ }
+}
+
+void OpenGLState::ApplyMultisample() const {
+ Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+ multisample_control.alpha_to_coverage);
+ Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+ multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+ if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+ depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+ return;
+ }
+ cur_state.depth_clamp = depth_clamp;
+
+ UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+ "Unimplemented Depth Clamp Separation!");
+
+ Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
void OpenGLState::ApplySRgb() const {
- if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
- if (framebuffer_srgb.enabled) {
- // Track if sRGB is used
- s_rgb_used = true;
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
- }
+ if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+ return;
+ cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+ if (framebuffer_srgb.enabled) {
+ // Track if sRGB is used
+ s_rgb_used = true;
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
}
}
void OpenGLState::ApplyCulling() const {
- if (cull.enabled != cur_state.cull.enabled) {
- if (cull.enabled) {
- glEnable(GL_CULL_FACE);
- } else {
- glDisable(GL_CULL_FACE);
- }
- }
+ Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
- if (cull.mode != cur_state.cull.mode) {
+ if (UpdateValue(cur_state.cull.mode, cull.mode)) {
glCullFace(cull.mode);
}
- if (cull.front_face != cur_state.cull.front_face) {
+ if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
glFrontFace(cull.front_face);
}
}
void OpenGLState::ApplyColorMask() const {
- if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- const auto& updated = color_mask[i];
- const auto& current = cur_state.color_mask[i];
- if (updated.red_enabled != current.red_enabled ||
- updated.green_enabled != current.green_enabled ||
- updated.blue_enabled != current.blue_enabled ||
- updated.alpha_enabled != current.alpha_enabled) {
- glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
- updated.blue_enabled, updated.alpha_enabled);
- }
- }
- } else {
- const auto& updated = color_mask[0];
- const auto& current = cur_state.color_mask[0];
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ const auto& updated = color_mask[i];
+ auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
- glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
- updated.alpha_enabled);
+ current = updated;
+ glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+ updated.blue_enabled, updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() const {
- if (depth.test_enabled != cur_state.depth.test_enabled) {
- if (depth.test_enabled) {
- glEnable(GL_DEPTH_TEST);
- } else {
- glDisable(GL_DEPTH_TEST);
- }
- }
+ Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
- if (depth.test_func != cur_state.depth.test_func) {
+ if (cur_state.depth.test_func != depth.test_func) {
+ cur_state.depth.test_func = depth.test_func;
glDepthFunc(depth.test_func);
}
- if (depth.write_mask != cur_state.depth.write_mask) {
+ if (cur_state.depth.write_mask != depth.write_mask) {
+ cur_state.depth.write_mask = depth.write_mask;
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
- if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
- if (primitive_restart.enabled) {
- glEnable(GL_PRIMITIVE_RESTART);
- } else {
- glDisable(GL_PRIMITIVE_RESTART);
- }
- }
+ Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
- if (primitive_restart.index != cur_state.primitive_restart.index) {
+ if (cur_state.primitive_restart.index != primitive_restart.index) {
+ cur_state.primitive_restart.index = primitive_restart.index;
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() const {
- if (stencil.test_enabled != cur_state.stencil.test_enabled) {
- if (stencil.test_enabled) {
- glEnable(GL_STENCIL_TEST);
- } else {
- glDisable(GL_STENCIL_TEST);
- }
- }
-
- const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
- if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
- config.test_mask != prev_config.test_mask) {
+ Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
+
+ const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+ if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+ current.test_mask != config.test_mask) {
+ current.test_func = config.test_func;
+ current.test_ref = config.test_ref;
+ current.test_mask = config.test_mask;
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
- if (config.action_depth_fail != prev_config.action_depth_fail ||
- config.action_depth_pass != prev_config.action_depth_pass ||
- config.action_stencil_fail != prev_config.action_stencil_fail) {
+ if (current.action_depth_fail != config.action_depth_fail ||
+ current.action_depth_pass != config.action_depth_pass ||
+ current.action_stencil_fail != config.action_stencil_fail) {
+ current.action_depth_fail = config.action_depth_fail;
+ current.action_depth_pass = config.action_depth_pass;
+ current.action_stencil_fail = config.action_stencil_fail;
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
- if (config.write_mask != prev_config.write_mask) {
+ if (current.write_mask != config.write_mask) {
+ current.write_mask = config.write_mask;
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
-void OpenGLState::EmulateViewportWithScissor() {
- auto& current = viewports[0];
- if (current.scissor.enabled) {
- const GLint left = std::max(current.x, current.scissor.x);
- const GLint right =
- std::max(current.x + current.width, current.scissor.x + current.scissor.width);
- const GLint bottom = std::max(current.y, current.scissor.y);
- const GLint top =
- std::max(current.y + current.height, current.scissor.y + current.scissor.height);
- current.scissor.x = std::max(left, 0);
- current.scissor.y = std::max(bottom, 0);
- current.scissor.width = std::max(right - left, 0);
- current.scissor.height = std::max(top - bottom, 0);
- } else {
- current.scissor.enabled = true;
- current.scissor.x = current.x;
- current.scissor.y = current.y;
- current.scissor.width = current.width;
- current.scissor.height = current.height;
- }
-}
void OpenGLState::ApplyViewport() const {
- if (geometry_shaders.enabled) {
- for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
- i++) {
- const auto& current = cur_state.viewports[i];
- const auto& updated = viewports[i];
- if (updated.x != current.x || updated.y != current.y ||
- updated.width != current.width || updated.height != current.height) {
- glViewportIndexedf(
- i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
- static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
- }
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
- }
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnablei(GL_SCISSOR_TEST, i);
- } else {
- glDisablei(GL_SCISSOR_TEST, i);
- }
- }
-
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
- }
- }
- } else {
- const auto& current = cur_state.viewports[0];
- const auto& updated = viewports[0];
- if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
- updated.height != current.height) {
- glViewport(updated.x, updated.y, updated.width, updated.height);
- }
-
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRange(updated.depth_range_near, updated.depth_range_far);
+ for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+ const auto& updated = viewports[i];
+ auto& current = cur_state.viewports[i];
+
+ if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+ current.height != updated.height) {
+ current.x = updated.x;
+ current.y = updated.y;
+ current.width = updated.width;
+ current.height = updated.height;
+ glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+ static_cast<GLfloat>(updated.width),
+ static_cast<GLfloat>(updated.height));
}
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnable(GL_SCISSOR_TEST);
- } else {
- glDisable(GL_SCISSOR_TEST);
- }
+ if (current.depth_range_near != updated.depth_range_near ||
+ current.depth_range_far != updated.depth_range_far) {
+ current.depth_range_near = updated.depth_range_near;
+ current.depth_range_far = updated.depth_range_far;
+ glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
+ Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+ if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+ current.scissor.width != updated.scissor.width ||
+ current.scissor.height != updated.scissor.height) {
+ current.scissor.x = updated.scissor.x;
+ current.scissor.y = updated.scissor.y;
+ current.scissor.width = updated.scissor.width;
+ current.scissor.height = updated.scissor.height;
+ glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+ updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
- const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
- if (updated.enabled != current.enabled) {
- if (updated.enabled) {
- glEnable(GL_BLEND);
- } else {
- glDisable(GL_BLEND);
- }
- }
- if (!updated.enabled) {
- return;
- }
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ Blend& current = cur_state.blend[0];
+
+ Enable(GL_BLEND, current.enabled, updated.enabled);
+
+ if (current.src_rgb_func != updated.src_rgb_func ||
+ current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+ current.dst_a_func != updated.dst_a_func) {
+ current.src_rgb_func = updated.src_rgb_func;
+ current.dst_rgb_func = updated.dst_rgb_func;
+ current.src_a_func = updated.src_a_func;
+ current.dst_a_func = updated.dst_a_func;
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+ current.rgb_equation = updated.rgb_equation;
+ current.a_equation = updated.a_equation;
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
- const Blend& current = cur_state.blend[target];
- if (updated.enabled != current.enabled || force) {
- if (updated.enabled) {
- glEnablei(GL_BLEND, static_cast<GLuint>(target));
- } else {
- glDisablei(GL_BLEND, static_cast<GLuint>(target));
- }
+ Blend& current = cur_state.blend[target];
+
+ if (current.enabled != updated.enabled || force) {
+ current.enabled = updated.enabled;
+ Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
}
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+ current.dst_a_func),
+ std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+ updated.dst_a_func))) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+ std::tie(updated.rgb_equation, updated.a_equation))) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
@@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- ApplyTargetBlending(i,
- independant_blend.enabled != cur_state.independant_blend.enabled);
+ const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+ for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+ ApplyTargetBlending(target, force);
}
} else {
ApplyGlobalBlending();
}
- if (blend_color.red != cur_state.blend_color.red ||
- blend_color.green != cur_state.blend_color.green ||
- blend_color.blue != cur_state.blend_color.blue ||
- blend_color.alpha != cur_state.blend_color.alpha) {
+ cur_state.independant_blend.enabled = independant_blend.enabled;
+
+ if (UpdateTie(
+ std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+ cur_state.blend_color.blue, cur_state.blend_color.alpha),
+ std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
- if (logic_op.enabled != cur_state.logic_op.enabled) {
- if (logic_op.enabled) {
- glEnable(GL_COLOR_LOGIC_OP);
- } else {
- glDisable(GL_COLOR_LOGIC_OP);
- }
- }
+ Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
- if (logic_op.operation != cur_state.logic_op.operation) {
+ if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
- const bool fill_enable_changed =
- polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
- const bool line_enable_changed =
- polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
- const bool point_enable_changed =
- polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
- const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
- const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
- const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
- if (fill_enable_changed) {
- if (polygon_offset.fill_enable) {
- glEnable(GL_POLYGON_OFFSET_FILL);
- } else {
- glDisable(GL_POLYGON_OFFSET_FILL);
- }
- }
-
- if (line_enable_changed) {
- if (polygon_offset.line_enable) {
- glEnable(GL_POLYGON_OFFSET_LINE);
- } else {
- glDisable(GL_POLYGON_OFFSET_LINE);
- }
- }
-
- if (point_enable_changed) {
- if (polygon_offset.point_enable) {
- glEnable(GL_POLYGON_OFFSET_POINT);
- } else {
- glDisable(GL_POLYGON_OFFSET_POINT);
- }
- }
-
- if (factor_changed || units_changed || clamp_changed) {
+ Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+ polygon_offset.fill_enable);
+ Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+ polygon_offset.line_enable);
+ Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+ polygon_offset.point_enable);
+
+ if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+ cur_state.polygon_offset.clamp),
+ std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
- glPolygonOffset(polygon_offset.factor, polygon_offset.units);
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
+ glPolygonOffset(polygon_offset.factor, polygon_offset.units);
}
}
}
@@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+ std::array<GLuint, Maxwell::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
- const auto& cur_state_texture_unit = cur_state.texture_units[i];
+ auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
-
- if (textures[i] != cur_state_texture_unit.texture) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (cur_state_texture_unit.texture == textures[i])
+ continue;
+ cur_state_texture_unit.texture = textures[i];
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
-
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
@@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+ std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
for (std::size_t i = 0; i < std::size(samplers); ++i) {
+ if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+ continue;
+ cur_state.texture_units[i].sampler = texture_units[i].sampler;
samplers[i] = texture_units[i].sampler;
- if (samplers[i] != cur_state.texture_units[i].sampler) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
@@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const {
}
}
-void OpenGLState::ApplyFramebufferState() const {
- if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
- glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
- }
- if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
- }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
- if (draw.vertex_array != cur_state.draw.vertex_array) {
- glBindVertexArray(draw.vertex_array);
- }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
- if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
- depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
- return;
- }
- UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
- "Unimplemented Depth Clamp Separation!");
-
- if (depth_clamp.far_plane || depth_clamp.near_plane) {
- glEnable(GL_DEPTH_CLAMP);
- } else {
- glDisable(GL_DEPTH_CLAMP);
- }
-}
-
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
-
- // Shader program
- if (draw.shader_program != cur_state.draw.shader_program) {
- glUseProgram(draw.shader_program);
- }
-
- // Program pipeline
- if (draw.program_pipeline != cur_state.draw.program_pipeline) {
- glBindProgramPipeline(draw.program_pipeline);
- }
- // Clip distance
- for (std::size_t i = 0; i < clip_distance.size(); ++i) {
- if (clip_distance[i] != cur_state.clip_distance[i]) {
- if (clip_distance[i]) {
- glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- } else {
- glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- }
- }
- }
- // Point
- if (point.size != cur_state.point.size) {
- glPointSize(point.size);
- }
- if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
- glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
- fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
- }
- if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
- if (multisample_control.alpha_to_coverage) {
- glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- }
- }
- if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
- if (multisample_control.alpha_to_one) {
- glEnable(GL_SAMPLE_ALPHA_TO_ONE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_ONE);
- }
- }
+ ApplyShaderProgram();
+ ApplyProgramPipeline();
+ ApplyClipDistances();
+ ApplyPointSize();
+ ApplyFragmentColorClamp();
+ ApplyMultisample();
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
@@ -574,7 +531,28 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
- cur_state = *this;
+}
+
+void OpenGLState::EmulateViewportWithScissor() {
+ auto& current = viewports[0];
+ if (current.scissor.enabled) {
+ const GLint left = std::max(current.x, current.scissor.x);
+ const GLint right =
+ std::max(current.x + current.width, current.scissor.x + current.scissor.width);
+ const GLint bottom = std::max(current.y, current.scissor.y);
+ const GLint top =
+ std::max(current.y + current.height, current.scissor.y + current.scissor.height);
+ current.scissor.x = std::max(left, 0);
+ current.scissor.y = std::max(bottom, 0);
+ current.scissor.width = std::max(right - left, 0);
+ current.scissor.height = std::max(top - bottom, 0);
+ } else {
+ current.scissor.enabled = true;
+ current.scissor.x = current.x;
+ current.scissor.y = current.y;
+ current.scissor.width = current.width;
+ current.scissor.height = current.height;
+ }
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 9e1eda5b1..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
} depth_clamp; // GL_DEPTH_CLAMP
struct {
- bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
- } geometry_shaders;
-
- struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
GLenum front_face; // GL_FRONT_FACE
@@ -184,34 +180,26 @@ public:
static OpenGLState GetCurState() {
return cur_state;
}
+
static bool GetsRGBUsed() {
return s_rgb_used;
}
+
static void ClearsRGBUsed() {
s_rgb_used = false;
}
+
/// Apply this state as the current OpenGL state
void Apply() const;
- /// Apply only the state affecting the framebuffer
+
void ApplyFramebufferState() const;
- /// Apply only the state affecting the vertex array
void ApplyVertexArrayState() const;
- /// Set the initial OpenGL state
- static void ApplyDefaultState();
- /// Resets any references to the given resource
- OpenGLState& UnbindTexture(GLuint handle);
- OpenGLState& ResetSampler(GLuint handle);
- OpenGLState& ResetProgram(GLuint handle);
- OpenGLState& ResetPipeline(GLuint handle);
- OpenGLState& ResetVertexArray(GLuint handle);
- OpenGLState& ResetFramebuffer(GLuint handle);
- void EmulateViewportWithScissor();
-
-private:
- static OpenGLState cur_state;
- // Workaround for sRGB problems caused by
- // QT not supporting srgb output
- static bool s_rgb_used;
+ void ApplyShaderProgram() const;
+ void ApplyProgramPipeline() const;
+ void ApplyClipDistances() const;
+ void ApplyPointSize() const;
+ void ApplyFragmentColorClamp() const;
+ void ApplyMultisample() const;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
+
+ /// Set the initial OpenGL state
+ static void ApplyDefaultState();
+
+ /// Resets any references to the given resource
+ OpenGLState& UnbindTexture(GLuint handle);
+ OpenGLState& ResetSampler(GLuint handle);
+ OpenGLState& ResetProgram(GLuint handle);
+ OpenGLState& ResetPipeline(GLuint handle);
+ OpenGLState& ResetVertexArray(GLuint handle);
+ OpenGLState& ResetFramebuffer(GLuint handle);
+
+ /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+ void EmulateViewportWithScissor();
+
+private:
+ static OpenGLState cur_state;
+
+ // Workaround for sRGB problems caused by QT not supporting srgb output
+ static bool s_rgb_used;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 5e3d862c6..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
@@ -266,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
}
// Initialize sRGB Usage
OpenGLState::ClearsRGBUsed();
- rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb3..84a987371 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
#include <string>
#include <fmt/format.h>
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+ first = first_;
+ buffers.clear();
+ offsets.clear();
+ sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+ buffers.push_back(buffer);
+ offsets.push_back(offset);
+ sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+ const std::size_t count{buffers.size()};
+ DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+ if (count == 0) {
+ return;
+ }
+ glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+ sizes.data());
+}
+
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
if (!GLAD_GL_KHR_debug) {
return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc11..aef45c9dc 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
#pragma once
#include <string>
+#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
namespace OpenGL {
+class BindBuffersRangePushBuffer {
+public:
+ BindBuffersRangePushBuffer(GLenum target);
+ ~BindBuffersRangePushBuffer();
+
+ void Setup(GLuint first_);
+
+ void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+ void Bind() const;
+
+private:
+ GLenum target;
+ GLuint first;
+ std::vector<GLuint> buffers;
+ std::vector<GLintptr> offsets;
+ std::vector<GLsizeiptr> sizes;
+};
+
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 95eab3fec..02a9f5ecb 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -19,8 +20,8 @@ namespace Vulkan {
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
std::size_t alignment, u8* host_ptr)
- : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
- host_ptr} {}
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+ alignment{alignment} {}
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
@@ -39,8 +40,7 @@ VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VKBufferCache::~VKBufferCache() = default;
-u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
- bool cache) {
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 8b415744b..08b786aad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -68,8 +68,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
- u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
- bool cache = true);
+ u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index a1e117443..13c46e5b8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -21,7 +21,7 @@ public:
CommandBufferPool(const VKDevice& device)
: VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
- void Allocate(std::size_t begin, std::size_t end) {
+ void Allocate(std::size_t begin, std::size_t end) override {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const u32 graphics_family = device.GetGraphicsFamily();
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5bfe4cead..08ee86fa6 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -97,7 +97,7 @@ private:
class VKFenceWatch final : public VKResource {
public:
explicit VKFenceWatch();
- ~VKFenceWatch();
+ ~VKFenceWatch() override;
/// Waits for the fence to be released.
void Wait();
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
+ if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
+ return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
+ }
+ const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+ return format.format == vk::Format::eB8G8R8A8Unorm &&
+ format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
+ });
+ return found != formats.end() ? *found : formats[0];
+}
+
+vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
+ // Mailbox doesn't lock the application like fifo (vsync), prefer it
+ const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
+ return mode == vk::PresentModeKHR::eMailbox;
+ });
+ return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
+}
+
+vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+ u32 height) {
+ constexpr auto undefined_size{std::numeric_limits<u32>::max()};
+ if (capabilities.currentExtent.width != undefined_size) {
+ return capabilities.currentExtent;
+ }
+ vk::Extent2D extent = {width, height};
+ extent.width = std::max(capabilities.minImageExtent.width,
+ std::min(capabilities.maxImageExtent.width, extent.width));
+ extent.height = std::max(capabilities.minImageExtent.height,
+ std::min(capabilities.maxImageExtent.height, extent.height));
+ return extent;
+}
+} // namespace
+
+VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
+ : surface{surface}, device{device} {}
+
+VKSwapchain::~VKSwapchain() = default;
+
+void VKSwapchain::Create(u32 width, u32 height) {
+ const auto dev = device.GetLogical();
+ const auto& dld = device.GetDispatchLoader();
+ const auto physical_device = device.GetPhysical();
+
+ const vk::SurfaceCapabilitiesKHR capabilities{
+ physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
+ if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
+ return;
+ }
+
+ dev.waitIdle(dld);
+ Destroy();
+
+ CreateSwapchain(capabilities, width, height);
+ CreateSemaphores();
+ CreateImageViews();
+
+ fences.resize(image_count, nullptr);
+}
+
+void VKSwapchain::AcquireNextImage() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+ dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
+ *present_semaphores[frame_index], {}, &image_index, dld);
+
+ if (auto& fence = fences[image_index]; fence) {
+ fence->Wait();
+ fence->Release();
+ fence = nullptr;
+ }
+}
+
+bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
+ const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
+ const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
+ const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
+ const auto& dld{device.GetDispatchLoader()};
+ const auto present_queue{device.GetPresentQueue()};
+ bool recreated = false;
+
+ const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
+ &swapchain.get(), &image_index, {});
+ switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
+ case vk::Result::eSuccess:
+ break;
+ case vk::Result::eErrorOutOfDateKHR:
+ if (current_width > 0 && current_height > 0) {
+ Create(current_width, current_height);
+ recreated = true;
+ }
+ break;
+ default:
+ LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
+ vk::to_string(result));
+ UNREACHABLE();
+ }
+
+ ASSERT(fences[image_index] == nullptr);
+ fences[image_index] = &fence;
+ frame_index = (frame_index + 1) % image_count;
+ return recreated;
+}
+
+bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
+ // TODO(Rodrigo): Handle framebuffer pixel format changes
+ return framebuffer.width != current_width || framebuffer.height != current_height;
+}
+
+void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+ u32 height) {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+ const auto physical_device{device.GetPhysical()};
+
+ const std::vector<vk::SurfaceFormatKHR> formats{
+ physical_device.getSurfaceFormatsKHR(surface, dld)};
+
+ const std::vector<vk::PresentModeKHR> present_modes{
+ physical_device.getSurfacePresentModesKHR(surface, dld)};
+
+ const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
+ const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
+ extent = ChooseSwapExtent(capabilities, width, height);
+
+ current_width = extent.width;
+ current_height = extent.height;
+
+ u32 requested_image_count{capabilities.minImageCount + 1};
+ if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
+ requested_image_count = capabilities.maxImageCount;
+ }
+
+ vk::SwapchainCreateInfoKHR swapchain_ci(
+ {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
+ extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
+ capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
+ {});
+
+ const u32 graphics_family{device.GetGraphicsFamily()};
+ const u32 present_family{device.GetPresentFamily()};
+ const std::array<u32, 2> queue_indices{graphics_family, present_family};
+ if (graphics_family != present_family) {
+ swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
+ swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
+ swapchain_ci.pQueueFamilyIndices = queue_indices.data();
+ } else {
+ swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
+ }
+
+ swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
+
+ images = dev.getSwapchainImagesKHR(*swapchain, dld);
+ image_count = static_cast<u32>(images.size());
+ image_format = surface_format.format;
+}
+
+void VKSwapchain::CreateSemaphores() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+
+ present_semaphores.resize(image_count);
+ for (std::size_t i = 0; i < image_count; i++) {
+ present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+ }
+}
+
+void VKSwapchain::CreateImageViews() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+
+ image_views.resize(image_count);
+ for (std::size_t i = 0; i < image_count; i++) {
+ const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
+ image_format, {},
+ {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
+ image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+ }
+}
+
+void VKSwapchain::Destroy() {
+ frame_index = 0;
+ present_semaphores.clear();
+ framebuffers.clear();
+ image_views.clear();
+ swapchain.reset();
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Layout {
+struct FramebufferLayout;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+
+class VKSwapchain {
+public:
+ explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
+ ~VKSwapchain();
+
+ /// Creates (or recreates) the swapchain with a given size.
+ void Create(u32 width, u32 height);
+
+ /// Acquires the next image in the swapchain, waits as needed.
+ void AcquireNextImage();
+
+ /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
+ /// recreated. Takes responsability for the ownership of fence.
+ bool Present(vk::Semaphore render_semaphore, VKFence& fence);
+
+ /// Returns true when the framebuffer layout has changed.
+ bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+
+ const vk::Extent2D& GetSize() const {
+ return extent;
+ }
+
+ u32 GetImageCount() const {
+ return image_count;
+ }
+
+ u32 GetImageIndex() const {
+ return image_index;
+ }
+
+ vk::Image GetImageIndex(u32 index) const {
+ return images[index];
+ }
+
+ vk::ImageView GetImageViewIndex(u32 index) const {
+ return *image_views[index];
+ }
+
+ vk::Format GetImageFormat() const {
+ return image_format;
+ }
+
+private:
+ void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
+ void CreateSemaphores();
+ void CreateImageViews();
+
+ void Destroy();
+
+ const vk::SurfaceKHR surface;
+ const VKDevice& device;
+
+ UniqueSwapchainKHR swapchain;
+
+ u32 image_count{};
+ std::vector<vk::Image> images;
+ std::vector<UniqueImageView> image_views;
+ std::vector<UniqueFramebuffer> framebuffers;
+ std::vector<VKFence*> fences;
+ std::vector<UniqueSemaphore> present_semaphores;
+
+ u32 image_index{};
+ u32 frame_index{};
+
+ vk::Format image_format{};
+ vk::Extent2D extent{};
+
+ u32 current_width{};
+ u32 current_height{};
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
#include <fmt/format.h>
#include "common/assert.h"
+#include "common/bit_field.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
+ const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
- bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
+ bb, instr,
+ GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
- WriteTexInstructionFloat(bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array));
+ const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+ WriteTexInstructionFloat(
+ bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset) {
+ Node array, Node depth_compare, u32 bias_offset,
+ std::vector<Node> aoffi) {
const bool is_array = array;
const bool is_shadow = depth_compare;
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
+ MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array) {
- const bool lod_bias_enabled =
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+ TextureProcessMode process_mode, bool depth_compare, bool is_array,
+ bool is_aoffi) {
+ const bool lod_bias_enabled{
+ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+ u64 parameter_register = instr.gpr20.Value();
+ if (lod_bias_enabled) {
+ ++parameter_register;
+ }
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
const Node array = is_array ? GetRegister(array_register) : nullptr;
+ std::vector<Node> aoffi;
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+ }
+
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
- const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- dc = GetRegister(depth_register);
+ dc = GetRegister(parameter_register++);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array) {
+ bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
- for (size_t i = 0; i < coord_count; ++i)
+ for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
+ }
+
+ u64 parameter_register = instr.gpr20.Value();
+ std::vector<Node> aoffi;
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+ }
+
+ Node dc{};
+ if (depth_compare) {
+ dc = GetRegister(parameter_register++);
+ }
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
return {coord_count, total_coord_count};
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+ bool is_tld4) {
+ const auto [coord_offsets, size, wrap_value,
+ diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
+ if (is_tld4) {
+ return {{0, 8, 16}, 6, 32, 64};
+ } else {
+ return {{0, 4, 8}, 4, 8, 16};
+ }
+ }();
+ const u32 mask = (1U << size) - 1;
+
+ std::vector<Node> aoffi;
+ aoffi.reserve(coord_count);
+
+ const auto aoffi_immediate{
+ TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+ if (!aoffi_immediate) {
+ // Variable access, not supported on AMD.
+ LOG_WARNING(HW_GPU,
+ "AOFFI constant folding failed, some hardware might have graphical issues");
+ for (std::size_t coord = 0; coord < coord_count; ++coord) {
+ const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+ const Node condition =
+ Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+ const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+ aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+ }
+ return aoffi;
+ }
+
+ for (std::size_t coord = 0; coord < coord_count; ++coord) {
+ s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+ if (value >= wrap_value) {
+ value -= diff_value;
+ }
+ aoffi.push_back(Immediate(value));
+ }
+ return aoffi;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c34843307..db15c0718 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const bool is_signed_b = instr.xmad.sign_b == 1;
const bool is_signed_c = is_signed_a;
- auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
+ auto [is_merge, is_psl, is_high_b, mode, op_b,
+ op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
return {instr.xmad.merge_56,
+ instr.xmad.product_shift_left_second,
+ instr.xmad.high_b,
+ instr.xmad.mode_cbf,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RR:
- return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+ return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+ instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RC:
- return {false, GetRegister(instr.gpr39),
+ return {false,
+ false,
+ instr.xmad.high_b,
+ instr.xmad.mode_cbf,
+ GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::XMAD_IMM:
- return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+ return {instr.xmad.merge_37,
+ instr.xmad.product_shift_left,
+ false,
+ instr.xmad.mode,
+ Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
}
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, Immediate(0), Immediate(0)};
+ return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
const Node original_b = op_b;
- op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
+ op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
// TODO(Rodrigo): Use an appropiate sign for this operation
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
- if (instr.xmad.product_shift_left) {
+ if (is_psl) {
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
}
+ SetTemporal(bb, 0, product);
+ product = GetTemporal(0);
const Node original_c = op_c;
+ const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
op_c = [&]() {
- switch (instr.xmad.mode) {
+ switch (set_mode) {
case Tegra::Shader::XmadMode::None:
return original_c;
case Tegra::Shader::XmadMode::CLo:
@@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
+ SetTemporal(bb, 1, op_c);
+ op_c = GetTemporal(1);
+
// TODO(Rodrigo): Use an appropiate sign for this operation
Node sum = Operation(OperationCode::IAdd, product, op_c);
+ SetTemporal(bb, 2, sum);
+ sum = GetTemporal(2);
if (is_merge) {
const Node a = BitfieldExtract(sum, 0, 16);
const Node b =
@@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstring>
#include <map>
+#include <optional>
#include <set>
#include <string>
#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
const Sampler& sampler;
Node array{};
Node depth_compare{};
+ std::vector<Node> aoffi;
Node bias{};
Node lod{};
Node component{};
@@ -741,14 +743,14 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array);
+ bool is_array, bool is_aoffi);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array);
+ bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -757,9 +759,11 @@ private:
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
+ std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset);
+ Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
@@ -773,6 +777,8 @@ private:
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+ std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
#include <utility>
#include <variant>
+#include "common/common_types.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
- const Node node = code[cursor];
+ const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
if (operation->GetCode() == operation_code)
return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
+ // that it uses as operand
+ const auto [found, found_cursor] =
+ TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+ if (!found) {
+ return {};
+ }
+ if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+ return immediate->GetValue();
+ }
+ return {};
+}
+
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) {
for (; cursor >= 0; --cursor) {
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index 5e439f036..82050bd51 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "video_core/surface.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/convert.h"
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
index 07cd8b5da..12542e71c 100644
--- a/src/video_core/textures/convert.h
+++ b/src/video_core/textures/convert.h
@@ -5,7 +5,10 @@
#pragma once
#include "common/common_types.h"
-#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+enum class PixelFormat;
+}
namespace Tegra::Texture {
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 93ecc6e31..bea0d5bc2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -7,9 +7,7 @@
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace Tegra::Texture {