aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt28
-rw-r--r--src/video_core/command_processor.cpp149
-rw-r--r--src/video_core/command_processor.h37
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp64
-rw-r--r--src/video_core/debug_utils/debug_utils.h163
-rw-r--r--src/video_core/engines/fermi_2d.cpp13
-rw-r--r--src/video_core/engines/fermi_2d.h22
-rw-r--r--src/video_core/engines/maxwell_3d.cpp343
-rw-r--r--src/video_core/engines/maxwell_3d.h508
-rw-r--r--src/video_core/engines/maxwell_compute.cpp13
-rw-r--r--src/video_core/engines/maxwell_compute.h22
-rw-r--r--src/video_core/gpu.cpp25
-rw-r--r--src/video_core/gpu.h106
-rw-r--r--src/video_core/memory_manager.cpp110
-rw-r--r--src/video_core/memory_manager.h49
-rw-r--r--src/video_core/rasterizer_interface.h63
-rw-r--r--src/video_core/renderer_base.cpp7
-rw-r--r--src/video_core/renderer_base.h39
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp349
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h172
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1352
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h360
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h85
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h66
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp154
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h8
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp125
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp182
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h34
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp229
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h12
-rw-r--r--src/video_core/textures/decoders.cpp105
-rw-r--r--src/video_core/textures/decoders.h26
-rw-r--r--src/video_core/textures/texture.h61
-rw-r--r--src/video_core/utils.h114
-rw-r--r--src/video_core/video_core.cpp2
-rw-r--r--src/video_core/video_core.h4
41 files changed, 5040 insertions, 299 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 69f2b4afd..3dab81769 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,13 +1,41 @@
add_library(video_core STATIC
+ command_processor.cpp
+ command_processor.h
+ debug_utils/debug_utils.cpp
+ debug_utils/debug_utils.h
+ engines/fermi_2d.cpp
+ engines/fermi_2d.h
+ engines/maxwell_3d.cpp
+ engines/maxwell_3d.h
+ engines/maxwell_compute.cpp
+ engines/maxwell_compute.h
+ gpu.cpp
+ gpu.h
+ memory_manager.cpp
+ memory_manager.h
+ rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_opengl/gl_rasterizer.cpp
+ renderer_opengl/gl_rasterizer.h
+ renderer_opengl/gl_rasterizer_cache.cpp
+ renderer_opengl/gl_rasterizer_cache.h
renderer_opengl/gl_resource_manager.h
+ renderer_opengl/gl_shader_decompiler.cpp
+ renderer_opengl/gl_shader_decompiler.h
+ renderer_opengl/gl_shader_gen.cpp
+ renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state.cpp
renderer_opengl/gl_state.h
+ renderer_opengl/gl_stream_buffer.cpp
+ renderer_opengl/gl_stream_buffer.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
+ textures/decoders.cpp
+ textures/decoders.h
+ textures/texture.h
utils.h
video_core.cpp
video_core.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
new file mode 100644
index 000000000..d4cdb4ab2
--- /dev/null
+++ b/src/video_core/command_processor.cpp
@@ -0,0 +1,149 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <utility>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "common/vector_math.h"
+#include "core/memory.h"
+#include "core/tracer/recorder.h"
+#include "video_core/command_processor.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_compute.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
+
+namespace Tegra {
+
+enum class BufferMethods {
+ BindObject = 0,
+ SetGraphMacroCode = 0x45,
+ SetGraphMacroCodeArg = 0x46,
+ SetGraphMacroEntry = 0x47,
+ CountBufferMethods = 0x100,
+};
+
+void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+ LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X remaining params %u",
+ method, subchannel, value, remaining_params);
+
+ if (method == static_cast<u32>(BufferMethods::SetGraphMacroEntry)) {
+ // Prepare to upload a new macro, reset the upload counter.
+ LOG_DEBUG(HW_GPU, "Uploading GPU macro %08X", value);
+ current_macro_entry = value;
+ current_macro_code.clear();
+ return;
+ }
+
+ if (method == static_cast<u32>(BufferMethods::SetGraphMacroCodeArg)) {
+ // Append a new code word to the current macro.
+ current_macro_code.push_back(value);
+
+ // There are no more params remaining, submit the code to the 3D engine.
+ if (remaining_params == 0) {
+ maxwell_3d->SubmitMacroCode(current_macro_entry, std::move(current_macro_code));
+ current_macro_entry = InvalidGraphMacroEntry;
+ current_macro_code.clear();
+ }
+
+ return;
+ }
+
+ if (method == static_cast<u32>(BufferMethods::BindObject)) {
+ // Bind the current subchannel to the desired engine id.
+ LOG_DEBUG(HW_GPU, "Binding subchannel %u to engine %u", subchannel, value);
+ ASSERT(bound_engines.find(subchannel) == bound_engines.end());
+ bound_engines[subchannel] = static_cast<EngineID>(value);
+ return;
+ }
+
+ if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+ // TODO(Subv): Research and implement these methods.
+ LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+ return;
+ }
+
+ ASSERT(bound_engines.find(subchannel) != bound_engines.end());
+
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ fermi_2d->WriteReg(method, value);
+ break;
+ case EngineID::MAXWELL_B:
+ maxwell_3d->WriteReg(method, value, remaining_params);
+ break;
+ case EngineID::MAXWELL_COMPUTE_B:
+ maxwell_compute->WriteReg(method, value);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+}
+
+void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
+ // TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an
+ // application VAddr.
+ const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address);
+ VAddr current_addr = head_address;
+ while (current_addr < head_address + size * sizeof(CommandHeader)) {
+ const CommandHeader header = {Memory::Read32(current_addr)};
+ current_addr += sizeof(u32);
+
+ switch (header.mode.Value()) {
+ case SubmissionMode::IncreasingOld:
+ case SubmissionMode::Increasing: {
+ // Increase the method value with each argument.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::NonIncreasingOld:
+ case SubmissionMode::NonIncreasing: {
+ // Use the same method value for all arguments.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::IncreaseOnce: {
+ ASSERT(header.arg_count.Value() >= 1);
+
+ // Use the original method for the first argument and then the next method for all other
+ // arguments.
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - 1);
+ current_addr += sizeof(u32);
+
+ for (unsigned i = 1; i < header.arg_count; ++i) {
+ WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::Inline: {
+ // The register value is stored in the bits 16-28 as an immediate
+ WriteReg(header.method, header.subchannel, header.inline_data, 0);
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
+ }
+ }
+}
+
+} // namespace Tegra
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
new file mode 100644
index 000000000..f7214ffec
--- /dev/null
+++ b/src/video_core/command_processor.h
@@ -0,0 +1,37 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+
+enum class SubmissionMode : u32 {
+ IncreasingOld = 0,
+ Increasing = 1,
+ NonIncreasingOld = 2,
+ NonIncreasing = 3,
+ Inline = 4,
+ IncreaseOnce = 5
+};
+
+union CommandHeader {
+ u32 hex;
+
+ BitField<0, 13, u32> method;
+ BitField<13, 3, u32> subchannel;
+
+ BitField<16, 13, u32> arg_count;
+ BitField<16, 13, u32> inline_data;
+
+ BitField<29, 3, SubmissionMode> mode;
+};
+static_assert(std::is_standard_layout<CommandHeader>::value == true,
+ "CommandHeader does not use standard layout");
+static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
+
+} // namespace Tegra
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
new file mode 100644
index 000000000..22d44aab2
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -0,0 +1,64 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <condition_variable>
+#include <cstdint>
+#include <cstring>
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <string>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/color.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/vector_math.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+namespace Tegra {
+
+void DebugContext::DoOnEvent(Event event, void* data) {
+ {
+ std::unique_lock<std::mutex> lock(breakpoint_mutex);
+
+ // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
+ // show on debug widgets
+
+ // TODO: Should stop the CPU thread here once we multithread emulation.
+
+ active_breakpoint = event;
+ at_breakpoint = true;
+
+ // Tell all observers that we hit a breakpoint
+ for (auto& breakpoint_observer : breakpoint_observers) {
+ breakpoint_observer->OnMaxwellBreakPointHit(event, data);
+ }
+
+ // Wait until another thread tells us to Resume()
+ resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
+ }
+}
+
+void DebugContext::Resume() {
+ {
+ std::lock_guard<std::mutex> lock(breakpoint_mutex);
+
+ // Tell all observers that we are about to resume
+ for (auto& breakpoint_observer : breakpoint_observers) {
+ breakpoint_observer->OnMaxwellResume();
+ }
+
+ // Resume the waiting thread (i.e. OnEvent())
+ at_breakpoint = false;
+ }
+
+ resume_from_breakpoint.notify_one();
+}
+
+} // namespace Tegra
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
new file mode 100644
index 000000000..bbba8e380
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -0,0 +1,163 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <condition_variable>
+#include <iterator>
+#include <list>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "common/vector_math.h"
+
+namespace Tegra {
+
+class DebugContext {
+public:
+ enum class Event {
+ FirstEvent = 0,
+
+ MaxwellCommandLoaded = FirstEvent,
+ MaxwellCommandProcessed,
+ IncomingPrimitiveBatch,
+ FinishedPrimitiveBatch,
+
+ NumEvents
+ };
+
+ /**
+ * Inherit from this class to be notified of events registered to some debug context.
+ * Most importantly this is used for our debugger GUI.
+ *
+ * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
+ * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
+ * access
+ * @todo Evaluate an alternative interface, in which there is only one managing observer and
+ * multiple child observers running (by design) on the same thread.
+ */
+ class BreakPointObserver {
+ public:
+ /// Constructs the object such that it observes events of the given DebugContext.
+ BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
+ : context_weak(debug_context) {
+ std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+ debug_context->breakpoint_observers.push_back(this);
+ }
+
+ virtual ~BreakPointObserver() {
+ auto context = context_weak.lock();
+ if (context) {
+ std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+ context->breakpoint_observers.remove(this);
+
+ // If we are the last observer to be destroyed, tell the debugger context that
+ // it is free to continue. In particular, this is required for a proper yuzu
+ // shutdown, when the emulation thread is waiting at a breakpoint.
+ if (context->breakpoint_observers.empty())
+ context->Resume();
+ }
+ }
+
+ /**
+ * Action to perform when a breakpoint was reached.
+ * @param event Type of event which triggered the breakpoint
+ * @param data Optional data pointer (if unused, this is a nullptr)
+ * @note This function will perform nothing unless it is overridden in the child class.
+ */
+ virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
+
+ /**
+ * Action to perform when emulation is resumed from a breakpoint.
+ * @note This function will perform nothing unless it is overridden in the child class.
+ */
+ virtual void OnMaxwellResume() {}
+
+ protected:
+ /**
+ * Weak context pointer. This need not be valid, so when requesting a shared_ptr via
+ * context_weak.lock(), always compare the result against nullptr.
+ */
+ std::weak_ptr<DebugContext> context_weak;
+ };
+
+ /**
+ * Simple structure defining a breakpoint state
+ */
+ struct BreakPoint {
+ bool enabled = false;
+ };
+
+ /**
+ * Static constructor used to create a shared_ptr of a DebugContext.
+ */
+ static std::shared_ptr<DebugContext> Construct() {
+ return std::shared_ptr<DebugContext>(new DebugContext);
+ }
+
+ /**
+ * Used by the emulation core when a given event has happened. If a breakpoint has been set
+ * for this event, OnEvent calls the event handlers of the registered breakpoint observers.
+ * The current thread then is halted until Resume() is called from another thread (or until
+ * emulation is stopped).
+ * @param event Event which has happened
+ * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
+ * Resume() is called.
+ */
+ void OnEvent(Event event, void* data) {
+ // This check is left in the header to allow the compiler to inline it.
+ if (!breakpoints[(int)event].enabled)
+ return;
+ // For the rest of event handling, call a separate function.
+ DoOnEvent(event, data);
+ }
+
+ void DoOnEvent(Event event, void* data);
+
+ /**
+ * Resume from the current breakpoint.
+ * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
+ * Calling from any other thread is safe.
+ */
+ void Resume();
+
+ /**
+ * Delete all set breakpoints and resume emulation.
+ */
+ void ClearBreakpoints() {
+ for (auto& bp : breakpoints) {
+ bp.enabled = false;
+ }
+ Resume();
+ }
+
+ // TODO: Evaluate if access to these members should be hidden behind a public interface.
+ std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
+ Event active_breakpoint;
+ bool at_breakpoint = false;
+
+private:
+ /**
+ * Private default constructor to make sure people always construct this through Construct()
+ * instead.
+ */
+ DebugContext() = default;
+
+ /// Mutex protecting current breakpoint state and the observer list.
+ std::mutex breakpoint_mutex;
+
+ /// Used by OnEvent to wait for resumption.
+ std::condition_variable resume_from_breakpoint;
+
+ /// List of registered observers
+ std::list<BreakPointObserver*> breakpoint_observers;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
new file mode 100644
index 000000000..7aab163dc
--- /dev/null
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -0,0 +1,13 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/fermi_2d.h"
+
+namespace Tegra {
+namespace Engines {
+
+void Fermi2D::WriteReg(u32 method, u32 value) {}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
new file mode 100644
index 000000000..8967ddede
--- /dev/null
+++ b/src/video_core/engines/fermi_2d.h
@@ -0,0 +1,22 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+
+class Fermi2D final {
+public:
+ Fermi2D() = default;
+ ~Fermi2D() = default;
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value);
+};
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
new file mode 100644
index 000000000..986165c6d
--- /dev/null
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -0,0 +1,343 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Engines {
+
+/// First register id that is actually a Macro call.
+constexpr u32 MacroRegistersStart = 0xE00;
+
+const std::unordered_map<u32, Maxwell3D::MethodInfo> Maxwell3D::method_handlers = {
+ {0xE1A, {"BindTextureInfoBuffer", 1, &Maxwell3D::BindTextureInfoBuffer}},
+ {0xE24, {"SetShader", 5, &Maxwell3D::SetShader}},
+ {0xE2A, {"BindStorageBuffer", 1, &Maxwell3D::BindStorageBuffer}},
+};
+
+Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) {
+ uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code);
+}
+
+void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
+ // TODO(Subv): Write an interpreter for the macros uploaded via registers 0x45 and 0x47
+
+ // The requested macro must have been uploaded already.
+ ASSERT_MSG(uploaded_macros.find(method) != uploaded_macros.end(), "Macro %08X was not uploaded",
+ method);
+
+ auto itr = method_handlers.find(method);
+ ASSERT_MSG(itr != method_handlers.end(), "Unhandled method call %08X", method);
+
+ ASSERT(itr->second.arguments == parameters.size());
+
+ (this->*itr->second.handler)(parameters);
+
+ // Reset the current macro and its parameters.
+ executing_macro = 0;
+ macro_params.clear();
+}
+
+void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid Maxwell3D register, increase the size of the Regs structure");
+
+ auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+
+ // It is an error to write to a register other than the current macro's ARG register before it
+ // has finished execution.
+ if (executing_macro != 0) {
+ ASSERT(method == executing_macro + 1);
+ }
+
+ // Methods after 0xE00 are special, they're actually triggers for some microcode that was
+ // uploaded to the GPU during initialization.
+ if (method >= MacroRegistersStart) {
+ // We're trying to execute a macro
+ if (executing_macro == 0) {
+ // A macro call must begin by writing the macro method's register, not its argument.
+ ASSERT_MSG((method % 2) == 0,
+ "Can't start macro execution by writing to the ARGS register");
+ executing_macro = method;
+ }
+
+ macro_params.push_back(value);
+
+ // Call the macro when there are no more parameters in the command buffer
+ if (remaining_params == 0) {
+ CallMacroMethod(executing_macro, macro_params);
+ }
+ return;
+ }
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
+ }
+
+ regs.reg_array[method] = value;
+
+#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
+
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(code_address.code_address_high):
+ case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
+ // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS
+ // register, we do not currently know if that's intended or a bug, so we assert it lest
+ // stuff breaks in other places (like the shader address calculation).
+ ASSERT_MSG(regs.code_address.CodeAddress() == 0, "Unexpected CODE_ADDRESS register value.");
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
+ ProcessCBData(value);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
+ ProcessCBBind(Regs::ShaderStage::Vertex);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(cb_bind[1].raw_config): {
+ ProcessCBBind(Regs::ShaderStage::TesselationControl);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(cb_bind[2].raw_config): {
+ ProcessCBBind(Regs::ShaderStage::TesselationEval);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(cb_bind[3].raw_config): {
+ ProcessCBBind(Regs::ShaderStage::Geometry);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(cb_bind[4].raw_config): {
+ ProcessCBBind(Regs::ShaderStage::Fragment);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
+ DrawArrays();
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(query.query_get): {
+ ProcessQueryGet();
+ break;
+ }
+ default:
+ break;
+ }
+
+#undef MAXWELL3D_REG_INDEX
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
+ }
+}
+
+void Maxwell3D::ProcessQueryGet() {
+ GPUVAddr sequence_address = regs.query.QueryAddress();
+ // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
+ // VAddr before writing.
+ VAddr address = memory_manager.PhysicalToVirtualAddress(sequence_address);
+
+ switch (regs.query.query_get.mode) {
+ case Regs::QueryMode::Write: {
+ // Write the current query sequence to the sequence address.
+ u32 sequence = regs.query.query_sequence;
+ Memory::Write32(address, sequence);
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Query mode %u not implemented",
+ static_cast<u32>(regs.query.query_get.mode.Value()));
+ }
+}
+
+void Maxwell3D::DrawArrays() {
+ LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring");
+ auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+ }
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
+}
+
+void Maxwell3D::BindTextureInfoBuffer(const std::vector<u32>& parameters) {
+ /**
+ * Parameters description:
+ * [0] = Shader stage, usually 4 for FragmentShader
+ */
+
+ u32 stage = parameters[0];
+
+ // Perform the same operations as the real macro code.
+ GPUVAddr address = static_cast<GPUVAddr>(regs.tex_info_buffers.address[stage]) << 8;
+ u32 size = regs.tex_info_buffers.size[stage];
+
+ regs.const_buffer.cb_size = size;
+ regs.const_buffer.cb_address_high = address >> 32;
+ regs.const_buffer.cb_address_low = address & 0xFFFFFFFF;
+}
+
+void Maxwell3D::SetShader(const std::vector<u32>& parameters) {
+ /**
+ * Parameters description:
+ * [0] = Shader Program.
+ * [1] = Unknown, presumably the shader id.
+ * [2] = Offset to the start of the shader, after the 0x30 bytes header.
+ * [3] = Shader Stage.
+ * [4] = Const Buffer Address >> 8.
+ */
+ auto shader_program = static_cast<Regs::ShaderProgram>(parameters[0]);
+ // TODO(Subv): This address is probably an offset from the CODE_ADDRESS register.
+ GPUVAddr address = parameters[2];
+ auto shader_stage = static_cast<Regs::ShaderStage>(parameters[3]);
+ GPUVAddr cb_address = parameters[4] << 8;
+
+ auto& shader = state.shader_programs[static_cast<size_t>(shader_program)];
+ shader.program = shader_program;
+ shader.stage = shader_stage;
+ shader.address = address;
+
+ // Perform the same operations as the real macro code.
+ // TODO(Subv): Early exit if register 0xD1C + shader_program contains the same as params[1].
+ auto& shader_regs = regs.shader_config[static_cast<size_t>(shader_program)];
+ shader_regs.start_id = address;
+ // TODO(Subv): Write params[1] to register 0xD1C + shader_program.
+ // TODO(Subv): Write params[2] to register 0xD22 + shader_program.
+
+ // Note: This value is hardcoded in the macro's code.
+ static constexpr u32 DefaultCBSize = 0x10000;
+ regs.const_buffer.cb_size = DefaultCBSize;
+ regs.const_buffer.cb_address_high = cb_address >> 32;
+ regs.const_buffer.cb_address_low = cb_address & 0xFFFFFFFF;
+
+ // Write a hardcoded 0x11 to CB_BIND, this binds the current const buffer to buffer c1[] in the
+ // shader. It's likely that these are the constants for the shader.
+ regs.cb_bind[static_cast<size_t>(shader_stage)].valid.Assign(1);
+ regs.cb_bind[static_cast<size_t>(shader_stage)].index.Assign(1);
+
+ ProcessCBBind(shader_stage);
+}
+
+void Maxwell3D::BindStorageBuffer(const std::vector<u32>& parameters) {
+ /**
+ * Parameters description:
+ * [0] = Buffer offset >> 2
+ */
+
+ u32 buffer_offset = parameters[0] << 2;
+
+ // Perform the same operations as the real macro code.
+ // Note: This value is hardcoded in the macro's code.
+ static constexpr u32 DefaultCBSize = 0x5F00;
+ regs.const_buffer.cb_size = DefaultCBSize;
+
+ GPUVAddr address = regs.ssbo_info.BufferAddress();
+ regs.const_buffer.cb_address_high = address >> 32;
+ regs.const_buffer.cb_address_low = address & 0xFFFFFFFF;
+
+ regs.const_buffer.cb_pos = buffer_offset;
+}
+
+void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
+ // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
+ auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
+
+ auto& buffer = shader.const_buffers[bind_data.index];
+
+ buffer.enabled = bind_data.valid.Value() != 0;
+ buffer.index = bind_data.index;
+ buffer.address = regs.const_buffer.BufferAddress();
+ buffer.size = regs.const_buffer.cb_size;
+}
+
+void Maxwell3D::ProcessCBData(u32 value) {
+ // Write the input value to the current const buffer at the current position.
+ GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
+ ASSERT(buffer_address != 0);
+
+ // Don't allow writing past the end of the buffer.
+ ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
+
+ VAddr address =
+ memory_manager.PhysicalToVirtualAddress(buffer_address + regs.const_buffer.cb_pos);
+
+ Memory::Write32(address, value);
+
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+}
+
+std::vector<Texture::TICEntry> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) {
+ std::vector<Texture::TICEntry> textures;
+
+ auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
+ ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+ GPUVAddr tic_base_address = regs.tic.TICAddress();
+
+ GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
+
+ // Offset into the texture constbuffer where the texture info begins.
+ static constexpr size_t TextureInfoOffset = 0x20;
+
+ for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
+ current_texture < tex_info_buffer_end; current_texture += 4) {
+
+ Texture::TextureHandle tex_info{
+ Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))};
+
+ if (tex_info.tic_id != 0 || tex_info.tsc_id != 0) {
+ GPUVAddr tic_address_gpu =
+ tic_base_address + tex_info.tic_id * sizeof(Texture::TICEntry);
+ VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu);
+
+ Texture::TICEntry tic_entry;
+ Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+
+ auto r_type = tic_entry.r_type.Value();
+ auto g_type = tic_entry.g_type.Value();
+ auto b_type = tic_entry.b_type.Value();
+ auto a_type = tic_entry.a_type.Value();
+
+ // TODO(Subv): Different data types for separate components are not supported
+ ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+
+ auto format = tic_entry.format.Value();
+
+ textures.push_back(tic_entry);
+ }
+ }
+
+ return textures;
+}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
new file mode 100644
index 000000000..441cc0c19
--- /dev/null
+++ b/src/video_core/engines/maxwell_3d.h
@@ -0,0 +1,508 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <unordered_map>
+#include <vector>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Engines {
+
+class Maxwell3D final {
+public:
+ explicit Maxwell3D(MemoryManager& memory_manager);
+ ~Maxwell3D() = default;
+
+ /// Register structure of the Maxwell3D engine.
+ /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0xE36;
+
+ static constexpr size_t NumRenderTargets = 8;
+ static constexpr size_t NumViewports = 16;
+ static constexpr size_t NumCBData = 16;
+ static constexpr size_t NumVertexArrays = 32;
+ static constexpr size_t NumVertexAttributes = 32;
+ static constexpr size_t MaxShaderProgram = 6;
+ static constexpr size_t MaxShaderStage = 5;
+ // Maximum number of const buffers per shader stage.
+ static constexpr size_t MaxConstBuffers = 16;
+
+ enum class QueryMode : u32 {
+ Write = 0,
+ Sync = 1,
+ };
+
+ enum class ShaderProgram : u32 {
+ VertexA = 0,
+ VertexB = 1,
+ TesselationControl = 2,
+ TesselationEval = 3,
+ Geometry = 4,
+ Fragment = 5,
+ };
+
+ enum class ShaderStage : u32 {
+ Vertex = 0,
+ TesselationControl = 1,
+ TesselationEval = 2,
+ Geometry = 3,
+ Fragment = 4,
+ };
+
+ enum class VertexSize : u32 {
+ Size_32_32_32_32 = 0x01,
+ Size_32_32_32 = 0x02,
+ Size_16_16_16_16 = 0x03,
+ Size_32_32 = 0x04,
+ Size_16_16_16 = 0x05,
+ Size_8_8_8_8 = 0x0a,
+ Size_16_16 = 0x0f,
+ Size_32 = 0x12,
+ Size_8_8_8 = 0x13,
+ Size_8_8 = 0x18,
+ Size_16 = 0x1b,
+ Size_8 = 0x1d,
+ Size_10_10_10_2 = 0x30,
+ Size_11_11_10 = 0x31,
+ };
+
+ static std::string VertexSizeToString(VertexSize vertex_size) {
+ switch (vertex_size) {
+ case VertexSize::Size_32_32_32_32:
+ return "32_32_32_32";
+ case VertexSize::Size_32_32_32:
+ return "32_32_32";
+ case VertexSize::Size_16_16_16_16:
+ return "16_16_16_16";
+ case VertexSize::Size_32_32:
+ return "32_32";
+ case VertexSize::Size_16_16_16:
+ return "16_16_16";
+ case VertexSize::Size_8_8_8_8:
+ return "8_8_8_8";
+ case VertexSize::Size_16_16:
+ return "16_16";
+ case VertexSize::Size_32:
+ return "32";
+ case VertexSize::Size_8_8_8:
+ return "8_8_8";
+ case VertexSize::Size_8_8:
+ return "8_8";
+ case VertexSize::Size_16:
+ return "16";
+ case VertexSize::Size_8:
+ return "8";
+ case VertexSize::Size_10_10_10_2:
+ return "10_10_10_2";
+ case VertexSize::Size_11_11_10:
+ return "11_11_10";
+ }
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ enum class VertexType : u32 {
+ SignedNorm = 1,
+ UnsignedNorm = 2,
+ SignedInt = 3,
+ UnsignedInt = 4,
+ UnsignedScaled = 5,
+ SignedScaled = 6,
+ Float = 7,
+ };
+
+ static std::string VertexTypeToString(VertexType vertex_type) {
+ switch (vertex_type) {
+ case VertexType::SignedNorm:
+ return "SignedNorm";
+ case VertexType::UnsignedNorm:
+ return "UnsignedNorm";
+ case VertexType::SignedInt:
+ return "SignedInt";
+ case VertexType::UnsignedInt:
+ return "UnsignedInt";
+ case VertexType::UnsignedScaled:
+ return "UnsignedScaled";
+ case VertexType::SignedScaled:
+ return "SignedScaled";
+ case VertexType::Float:
+ return "Float";
+ }
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ enum class PrimitiveTopology : u32 {
+ Points = 0x0,
+ Lines = 0x1,
+ LineLoop = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ TriangleFan = 0x6,
+ Quads = 0x7,
+ QuadStrip = 0x8,
+ Polygon = 0x9,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS(0x200);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 horiz;
+ u32 vert;
+ u32 format;
+ u32 block_dimensions;
+ u32 array_mode;
+ u32 layer_stride;
+ u32 base_layer;
+ INSERT_PADDING_WORDS(7);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } rt[NumRenderTargets];
+
+ INSERT_PADDING_WORDS(0x80);
+
+ struct {
+ union {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> width;
+ };
+ union {
+ BitField<0, 16, u32> y;
+ BitField<16, 16, u32> height;
+ };
+ float depth_range_near;
+ float depth_range_far;
+ } viewport[NumViewports];
+
+ INSERT_PADDING_WORDS(0x1D);
+
+ struct {
+ u32 first;
+ u32 count;
+ } vertex_buffer;
+
+ INSERT_PADDING_WORDS(0x99);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 format;
+ u32 block_dimensions;
+ u32 layer_stride;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } zeta;
+
+ INSERT_PADDING_WORDS(0x5B);
+
+ union {
+ BitField<0, 5, u32> buffer;
+ BitField<6, 1, u32> constant;
+ BitField<7, 14, u32> offset;
+ BitField<21, 6, VertexSize> size;
+ BitField<27, 3, VertexType> type;
+ BitField<31, 1, u32> bgra;
+ } vertex_attrib_format[NumVertexAttributes];
+
+ INSERT_PADDING_WORDS(0xF);
+
+ struct {
+ union {
+ BitField<0, 4, u32> count;
+ };
+ } rt_control;
+
+ INSERT_PADDING_WORDS(0xCF);
+
+ struct {
+ u32 tsc_address_high;
+ u32 tsc_address_low;
+ u32 tsc_limit;
+
+ GPUVAddr TSCAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
+ }
+ } tsc;
+
+ INSERT_PADDING_WORDS(0x3);
+
+ struct {
+ u32 tic_address_high;
+ u32 tic_address_low;
+ u32 tic_limit;
+
+ GPUVAddr TICAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
+ }
+ } tic;
+
+ INSERT_PADDING_WORDS(0x22);
+
+ struct {
+ u32 code_address_high;
+ u32 code_address_low;
+
+ GPUVAddr CodeAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
+ }
+ } code_address;
+ INSERT_PADDING_WORDS(1);
+
+ struct {
+ u32 vertex_end_gl;
+ union {
+ u32 vertex_begin_gl;
+ BitField<0, 16, PrimitiveTopology> topology;
+ };
+ } draw;
+
+ INSERT_PADDING_WORDS(0x139);
+ struct {
+ u32 query_address_high;
+ u32 query_address_low;
+ u32 query_sequence;
+ union {
+ u32 raw;
+ BitField<0, 2, QueryMode> mode;
+ BitField<4, 1, u32> fence;
+ BitField<12, 4, u32> unit;
+ } query_get;
+
+ GPUVAddr QueryAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(query_address_high) << 32) | query_address_low);
+ }
+ } query;
+
+ INSERT_PADDING_WORDS(0x3C);
+
+ struct {
+ union {
+ BitField<0, 12, u32> stride;
+ BitField<12, 1, u32> enable;
+ };
+ u32 start_high;
+ u32 start_low;
+ u32 divisor;
+
+ GPUVAddr StartAddress() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
+ start_low);
+ }
+ } vertex_array[NumVertexArrays];
+
+ INSERT_PADDING_WORDS(0x40);
+
+ struct {
+ u32 limit_high;
+ u32 limit_low;
+
+ GPUVAddr LimitAddress() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
+ limit_low);
+ }
+ } vertex_array_limit[NumVertexArrays];
+
+ struct {
+ union {
+ BitField<0, 1, u32> enable;
+ BitField<4, 4, ShaderProgram> program;
+ };
+ u32 start_id;
+ INSERT_PADDING_WORDS(1);
+ u32 gpr_alloc;
+ ShaderStage type;
+ INSERT_PADDING_WORDS(9);
+ } shader_config[MaxShaderProgram];
+
+ INSERT_PADDING_WORDS(0x8C);
+
+ struct {
+ u32 cb_size;
+ u32 cb_address_high;
+ u32 cb_address_low;
+ u32 cb_pos;
+ u32 cb_data[NumCBData];
+
+ GPUVAddr BufferAddress() const {
+ return static_cast<GPUVAddr>(
+ (static_cast<GPUVAddr>(cb_address_high) << 32) | cb_address_low);
+ }
+ } const_buffer;
+
+ INSERT_PADDING_WORDS(0x10);
+
+ struct {
+ union {
+ u32 raw_config;
+ BitField<0, 1, u32> valid;
+ BitField<4, 5, u32> index;
+ };
+ INSERT_PADDING_WORDS(7);
+ } cb_bind[MaxShaderStage];
+
+ INSERT_PADDING_WORDS(0x56);
+
+ u32 tex_cb_index;
+
+ INSERT_PADDING_WORDS(0x395);
+
+ struct {
+ /// Compressed address of a buffer that holds information about bound SSBOs.
+ /// This address is usually bound to c0 in the shaders.
+ u32 buffer_address;
+
+ GPUVAddr BufferAddress() const {
+ return static_cast<GPUVAddr>(buffer_address) << 8;
+ }
+ } ssbo_info;
+
+ INSERT_PADDING_WORDS(0x11);
+
+ struct {
+ u32 address[MaxShaderStage];
+ u32 size[MaxShaderStage];
+ } tex_info_buffers;
+
+ INSERT_PADDING_WORDS(0x102);
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
+
+ struct State {
+ struct ConstBufferInfo {
+ GPUVAddr address;
+ u32 index;
+ u32 size;
+ bool enabled;
+ };
+
+ struct ShaderProgramInfo {
+ Regs::ShaderStage stage;
+ Regs::ShaderProgram program;
+ GPUVAddr address;
+ };
+
+ struct ShaderStageInfo {
+ std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers;
+ };
+
+ std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+ std::array<ShaderProgramInfo, Regs::MaxShaderProgram> shader_programs;
+ };
+
+ State state{};
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value, u32 remaining_params);
+
+ /// Uploads the code for a GPU macro program associated with the specified entry.
+ void SubmitMacroCode(u32 entry, std::vector<u32> code);
+
+ /// Returns a list of enabled textures for the specified shader stage.
+ std::vector<Texture::TICEntry> GetStageTextures(Regs::ShaderStage stage);
+
+private:
+ MemoryManager& memory_manager;
+
+ std::unordered_map<u32, std::vector<u32>> uploaded_macros;
+
+ /// Macro method that is currently being executed / being fed parameters.
+ u32 executing_macro = 0;
+ /// Parameters that have been submitted to the macro call so far.
+ std::vector<u32> macro_params;
+
+ /**
+ * Call a macro on this engine.
+ * @param method Method to call
+ * @param parameters Arguments to the method call
+ */
+ void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
+
+ /// Handles a write to the QUERY_GET register.
+ void ProcessQueryGet();
+
+ /// Handles a write to the CB_DATA[i] register.
+ void ProcessCBData(u32 value);
+
+ /// Handles a write to the CB_BIND register.
+ void ProcessCBBind(Regs::ShaderStage stage);
+
+ /// Handles a write to the VERTEX_END_GL register, triggering a draw.
+ void DrawArrays();
+
+ /// Method call handlers
+ void BindTextureInfoBuffer(const std::vector<u32>& parameters);
+ void SetShader(const std::vector<u32>& parameters);
+ void BindStorageBuffer(const std::vector<u32>& parameters);
+
+ struct MethodInfo {
+ const char* name;
+ u32 arguments;
+ void (Maxwell3D::*handler)(const std::vector<u32>& parameters);
+ };
+
+ static const std::unordered_map<u32, MethodInfo> method_handlers;
+};
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport, 0x300);
+ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(zeta, 0x3F8);
+ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
+ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_address, 0x582);
+ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(query, 0x6C0);
+ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
+ASSERT_REG_POSITION(shader_config[0], 0x800);
+ASSERT_REG_POSITION(const_buffer, 0x8E0);
+ASSERT_REG_POSITION(cb_bind[0], 0x904);
+ASSERT_REG_POSITION(tex_cb_index, 0x982);
+ASSERT_REG_POSITION(ssbo_info, 0xD18);
+ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
+ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
new file mode 100644
index 000000000..e4e5f9e5e
--- /dev/null
+++ b/src/video_core/engines/maxwell_compute.cpp
@@ -0,0 +1,13 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/maxwell_compute.h"
+
+namespace Tegra {
+namespace Engines {
+
+void MaxwellCompute::WriteReg(u32 method, u32 value) {}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/maxwell_compute.h
new file mode 100644
index 000000000..7262e1bcb
--- /dev/null
+++ b/src/video_core/engines/maxwell_compute.h
@@ -0,0 +1,22 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+
+class MaxwellCompute final {
+public:
+ MaxwellCompute() = default;
+ ~MaxwellCompute() = default;
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value);
+};
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
new file mode 100644
index 000000000..9463cd5d6
--- /dev/null
+++ b/src/video_core/gpu.cpp
@@ -0,0 +1,25 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_compute.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+
+GPU::GPU() {
+ memory_manager = std::make_unique<MemoryManager>();
+ maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
+ fermi_2d = std::make_unique<Engines::Fermi2D>();
+ maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+}
+
+GPU::~GPU() = default;
+
+const Tegra::Engines::Maxwell3D& GPU::Get3DEngine() const {
+ return *maxwell_3d;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
new file mode 100644
index 000000000..8183b12e9
--- /dev/null
+++ b/src/video_core/gpu.h
@@ -0,0 +1,106 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include "common/common_types.h"
+#include "core/hle/service/nvflinger/buffer_queue.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+
+enum class RenderTargetFormat {
+ RGBA8_UNORM = 0xD5,
+};
+
+class DebugContext;
+
+/**
+ * Struct describing framebuffer configuration
+ */
+struct FramebufferConfig {
+ enum class PixelFormat : u32 {
+ ABGR8 = 1,
+ };
+
+ /**
+ * Returns the number of bytes per pixel.
+ */
+ static u32 BytesPerPixel(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ABGR8:
+ return 4;
+ }
+
+ UNREACHABLE();
+ }
+
+ VAddr address;
+ u32 offset;
+ u32 width;
+ u32 height;
+ u32 stride;
+ PixelFormat pixel_format;
+
+ using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
+ TransformFlags transform_flags;
+};
+
+namespace Engines {
+class Fermi2D;
+class Maxwell3D;
+class MaxwellCompute;
+} // namespace Engines
+
+enum class EngineID {
+ FERMI_TWOD_A = 0x902D, // 2D Engine
+ MAXWELL_B = 0xB197, // 3D Engine
+ MAXWELL_COMPUTE_B = 0xB1C0,
+ KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+ MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+class GPU final {
+public:
+ GPU();
+ ~GPU();
+
+ /// Processes a command list stored at the specified address in GPU memory.
+ void ProcessCommandList(GPUVAddr address, u32 size);
+
+ /// Returns a reference to the Maxwell3D GPU engine.
+ const Engines::Maxwell3D& Get3DEngine() const;
+
+ std::unique_ptr<MemoryManager> memory_manager;
+
+ Engines::Maxwell3D& Maxwell3D() {
+ return *maxwell_3d;
+ }
+
+private:
+ static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF;
+
+ /// Writes a single register in the engine bound to the specified subchannel
+ void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
+
+ /// Mapping of command subchannels to their bound engine ids.
+ std::unordered_map<u32, EngineID> bound_engines;
+
+ /// 3D engine
+ std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+ /// 2D engine
+ std::unique_ptr<Engines::Fermi2D> fermi_2d;
+ /// Compute engine
+ std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+
+ /// Entry of the macro that is currently being uploaded
+ u32 current_macro_entry = InvalidGraphMacroEntry;
+ /// Code being uploaded for the current macro
+ std::vector<u32> current_macro_code;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
new file mode 100644
index 000000000..2789a4ca1
--- /dev/null
+++ b/src/video_core/memory_manager.cpp
@@ -0,0 +1,110 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+
+PAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
+ boost::optional<PAddr> paddr = FindFreeBlock(size, align);
+ ASSERT(paddr);
+
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ PageSlot(*paddr + offset) = static_cast<u64>(PageStatus::Allocated);
+ }
+
+ return *paddr;
+}
+
+PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) {
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ if (IsPageMapped(paddr + offset)) {
+ return AllocateSpace(size, align);
+ }
+ }
+
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ PageSlot(paddr + offset) = static_cast<u64>(PageStatus::Allocated);
+ }
+
+ return paddr;
+}
+
+PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) {
+ vaddr &= ~Memory::PAGE_MASK;
+
+ boost::optional<PAddr> paddr = FindFreeBlock(size);
+ ASSERT(paddr);
+
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ PageSlot(*paddr + offset) = vaddr + offset;
+ }
+
+ return *paddr;
+}
+
+PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) {
+ vaddr &= ~Memory::PAGE_MASK;
+ paddr &= ~Memory::PAGE_MASK;
+
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ if (PageSlot(paddr + offset) != static_cast<u64>(PageStatus::Allocated)) {
+ return MapBufferEx(vaddr, size);
+ }
+ }
+
+ for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ PageSlot(paddr + offset) = vaddr + offset;
+ }
+
+ return paddr;
+}
+
+boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
+ PAddr paddr{};
+ u64 free_space{};
+ align = (align + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
+
+ while (paddr + free_space < MAX_ADDRESS) {
+ if (!IsPageMapped(paddr + free_space)) {
+ free_space += Memory::PAGE_SIZE;
+ if (free_space >= size) {
+ return paddr;
+ }
+ } else {
+ paddr += free_space + Memory::PAGE_SIZE;
+ free_space = 0;
+ const u64 remainder{paddr % align};
+ if (!remainder) {
+ paddr = (paddr - remainder) + align;
+ }
+ }
+ }
+
+ return {};
+}
+
+VAddr MemoryManager::PhysicalToVirtualAddress(PAddr paddr) {
+ VAddr base_addr = PageSlot(paddr);
+ ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));
+ return base_addr + (paddr & Memory::PAGE_MASK);
+}
+
+bool MemoryManager::IsPageMapped(PAddr paddr) {
+ return PageSlot(paddr) != static_cast<u64>(PageStatus::Unmapped);
+}
+
+VAddr& MemoryManager::PageSlot(PAddr paddr) {
+ auto& block = page_table[(paddr >> (Memory::PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
+ if (!block) {
+ block = std::make_unique<PageBlock>();
+ for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) {
+ (*block)[index] = static_cast<u64>(PageStatus::Unmapped);
+ }
+ }
+ return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK];
+}
+
+} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
new file mode 100644
index 000000000..47da7acd6
--- /dev/null
+++ b/src/video_core/memory_manager.h
@@ -0,0 +1,49 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include "common/common_types.h"
+#include "core/memory.h"
+
+namespace Tegra {
+
+/// Virtual addresses in the GPU's memory map are 64 bit.
+using GPUVAddr = u64;
+
+class MemoryManager final {
+public:
+ MemoryManager() = default;
+
+ PAddr AllocateSpace(u64 size, u64 align);
+ PAddr AllocateSpace(PAddr paddr, u64 size, u64 align);
+ PAddr MapBufferEx(VAddr vaddr, u64 size);
+ PAddr MapBufferEx(VAddr vaddr, PAddr paddr, u64 size);
+ VAddr PhysicalToVirtualAddress(PAddr paddr);
+
+private:
+ boost::optional<PAddr> FindFreeBlock(u64 size, u64 align = 1);
+ bool IsPageMapped(PAddr paddr);
+ VAddr& PageSlot(PAddr paddr);
+
+ enum class PageStatus : u64 {
+ Unmapped = 0xFFFFFFFFFFFFFFFFULL,
+ Allocated = 0xFFFFFFFFFFFFFFFEULL,
+ };
+
+ static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
+ static constexpr u64 PAGE_TABLE_BITS{14};
+ static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
+ static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
+ static constexpr u64 PAGE_BLOCK_BITS{14};
+ static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
+ static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
+
+ using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
+ std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
+};
+
+} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
new file mode 100644
index 000000000..a493e1d60
--- /dev/null
+++ b/src/video_core/rasterizer_interface.h
@@ -0,0 +1,63 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+
+struct ScreenInfo;
+
+namespace VideoCore {
+
+class RasterizerInterface {
+public:
+ virtual ~RasterizerInterface() {}
+
+ /// Draw the current batch of triangles
+ virtual void DrawTriangles() = 0;
+
+ /// Notify rasterizer that the specified Maxwell register has been changed
+ virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
+
+ /// Notify rasterizer that all caches should be flushed to 3DS memory
+ virtual void FlushAll() = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
+ virtual void FlushRegion(VAddr addr, u64 size) = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be invalidated
+ virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
+ /// and invalidated
+ virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+
+ /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0
+ virtual bool AccelerateDisplayTransfer(const void* config) {
+ return false;
+ }
+
+ /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1
+ virtual bool AccelerateTextureCopy(const void* config) {
+ return false;
+ }
+
+ /// Attempt to use a faster method to fill a region
+ virtual bool AccelerateFill(const void* config) {
+ return false;
+ }
+
+ /// Attempt to use a faster method to display the framebuffer to screen
+ virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+ VAddr framebuffer_addr, u32 pixel_stride,
+ ScreenInfo& screen_info) {
+ return false;
+ }
+
+ virtual bool AccelerateDrawBatch(bool is_indexed) {
+ return false;
+ }
+};
+} // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 51e1d45f9..30075b23c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -5,6 +5,11 @@
#include <atomic>
#include <memory>
#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/video_core.h"
-void RendererBase::RefreshRasterizerSetting() {}
+void RendererBase::RefreshRasterizerSetting() {
+ if (rasterizer == nullptr) {
+ rasterizer = std::make_unique<RasterizerOpenGL>();
+ }
+}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 28893b181..89a960eaf 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -8,6 +8,8 @@
#include <boost/optional.hpp>
#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
class EmuWindow;
@@ -16,39 +18,10 @@ public:
/// Used to reference a framebuffer
enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture };
- /**
- * Struct describing framebuffer metadata
- * TODO(bunnei): This struct belongs in the GPU code, but we don't have a good place for it yet.
- */
- struct FramebufferInfo {
- enum class PixelFormat : u32 {
- ABGR8 = 1,
- };
-
- /**
- * Returns the number of bytes per pixel.
- */
- static u32 BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- return 4;
- }
-
- UNREACHABLE();
- }
-
- VAddr address;
- u32 offset;
- u32 width;
- u32 height;
- u32 stride;
- PixelFormat pixel_format;
- };
-
virtual ~RendererBase() {}
/// Swap buffers (render frame)
- virtual void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) = 0;
+ virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0;
/**
* Set the emulator window to use for renderer
@@ -73,12 +46,16 @@ public:
return m_current_frame;
}
+ VideoCore::RasterizerInterface* Rasterizer() const {
+ return rasterizer.get();
+ }
+
void RefreshRasterizerSetting();
protected:
+ std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
private:
- bool opengl_rasterizer_active = false;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
new file mode 100644
index 000000000..286491b73
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -0,0 +1,349 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <glad/glad.h>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
+#include "common/vector_math.h"
+#include "core/settings.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/renderer_opengl.h"
+
+using PixelFormat = SurfaceParams::PixelFormat;
+using SurfaceType = SurfaceParams::SurfaceType;
+
+MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_FS, "OpenGL", "Fragment Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
+
+enum class UniformBindings : GLuint { Common, VS, FS };
+
+static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
+ size_t expected_size) {
+ GLuint ub_index = glGetUniformBlockIndex(shader, name);
+ if (ub_index != GL_INVALID_INDEX) {
+ GLint ub_size = 0;
+ glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
+ ASSERT_MSG(ub_size == expected_size,
+ "Uniform block size did not match! Got %d, expected %zu",
+ static_cast<int>(ub_size), expected_size);
+ glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
+ }
+}
+
+static void SetShaderUniformBlockBindings(GLuint shader) {
+ SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
+ sizeof(RasterizerOpenGL::UniformData));
+ SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS,
+ sizeof(RasterizerOpenGL::VSUniformData));
+ SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS,
+ sizeof(RasterizerOpenGL::FSUniformData));
+}
+
+RasterizerOpenGL::RasterizerOpenGL() {
+ shader_dirty = true;
+
+ has_ARB_buffer_storage = false;
+ has_ARB_direct_state_access = false;
+ has_ARB_separate_shader_objects = false;
+ has_ARB_vertex_attrib_binding = false;
+
+ GLint ext_num;
+ glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
+ for (GLint i = 0; i < ext_num; i++) {
+ std::string extension{reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
+
+ if (extension == "GL_ARB_buffer_storage") {
+ has_ARB_buffer_storage = true;
+ } else if (extension == "GL_ARB_direct_state_access") {
+ has_ARB_direct_state_access = true;
+ } else if (extension == "GL_ARB_separate_shader_objects") {
+ has_ARB_separate_shader_objects = true;
+ } else if (extension == "GL_ARB_vertex_attrib_binding") {
+ has_ARB_vertex_attrib_binding = true;
+ }
+ }
+
+ // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
+ state.clip_distance[0] = true;
+
+ // Generate VBO, VAO and UBO
+ vertex_buffer = OGLStreamBuffer::MakeBuffer(GLAD_GL_ARB_buffer_storage, GL_ARRAY_BUFFER);
+ vertex_buffer->Create(VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE / 2);
+ sw_vao.Create();
+ uniform_buffer.Create();
+
+ state.draw.vertex_array = sw_vao.handle;
+ state.draw.vertex_buffer = vertex_buffer->GetHandle();
+ state.draw.uniform_buffer = uniform_buffer.handle;
+ state.Apply();
+
+ glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW);
+ glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
+
+ uniform_block_data.dirty = true;
+
+ // Create render framebuffer
+ framebuffer.Create();
+
+ if (has_ARB_separate_shader_objects) {
+ hw_vao.Create();
+ hw_vao_enabled_attributes.fill(false);
+
+ stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
+ stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
+ state.draw.vertex_buffer = stream_buffer->GetHandle();
+
+ pipeline.Create();
+ state.draw.program_pipeline = pipeline.handle;
+ state.draw.shader_program = 0;
+ state.draw.vertex_array = hw_vao.handle;
+ state.Apply();
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
+
+ vs_uniform_buffer.Create();
+ glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle);
+ glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY);
+ glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle);
+ } else {
+ ASSERT_MSG(false, "Unimplemented");
+ }
+
+ accelerate_draw = AccelDraw::Disabled;
+
+ glEnable(GL_BLEND);
+
+ LOG_WARNING(HW_GPU, "Sync fixed function OpenGL state here when ready");
+}
+
+RasterizerOpenGL::~RasterizerOpenGL() {
+ if (stream_buffer != nullptr) {
+ state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.Apply();
+ stream_buffer->Release();
+ }
+}
+
+static constexpr std::array<GLenum, 4> vs_attrib_types{
+ GL_BYTE, // VertexAttributeFormat::BYTE
+ GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
+ GL_SHORT, // VertexAttributeFormat::SHORT
+ GL_FLOAT // VertexAttributeFormat::FLOAT
+};
+
+void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
+ UNIMPLEMENTED();
+}
+
+void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
+ MICROPROFILE_SCOPE(OpenGL_VAO);
+ UNIMPLEMENTED();
+}
+
+void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) {
+ MICROPROFILE_SCOPE(OpenGL_VS);
+ UNIMPLEMENTED();
+}
+
+void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) {
+ MICROPROFILE_SCOPE(OpenGL_FS);
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
+ if (!has_ARB_separate_shader_objects) {
+ ASSERT_MSG(false, "Unimplemented");
+ return false;
+ }
+
+ accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
+ DrawTriangles();
+
+ return true;
+}
+
+void RasterizerOpenGL::DrawTriangles() {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+ UNIMPLEMENTED();
+}
+
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
+
+void RasterizerOpenGL::FlushAll() {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ res_cache.FlushAll();
+}
+
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ res_cache.FlushRegion(addr, size);
+}
+
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ res_cache.InvalidateRegion(addr, size, nullptr);
+}
+
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ res_cache.FlushRegion(addr, size);
+ res_cache.InvalidateRegion(addr, size, nullptr);
+}
+
+bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
+ MICROPROFILE_SCOPE(OpenGL_Blits);
+ ASSERT_MSG(false, "Unimplemented");
+ return true;
+}
+
+bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) {
+ ASSERT_MSG(false, "Unimplemented");
+ return true;
+}
+
+bool RasterizerOpenGL::AccelerateFill(const void* config) {
+ ASSERT_MSG(false, "Unimplemented");
+ return true;
+}
+
+bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+ VAddr framebuffer_addr, u32 pixel_stride,
+ ScreenInfo& screen_info) {
+ if (framebuffer_addr == 0) {
+ return false;
+ }
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+
+ SurfaceParams src_params;
+ src_params.addr = framebuffer_addr;
+ src_params.width = std::min(framebuffer.width, pixel_stride);
+ src_params.height = framebuffer.height;
+ src_params.stride = pixel_stride;
+ src_params.is_tiled = false;
+ src_params.pixel_format =
+ SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+ src_params.UpdateParams();
+
+ MathUtil::Rectangle<u32> src_rect;
+ Surface src_surface;
+ std::tie(src_surface, src_rect) =
+ res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
+
+ if (src_surface == nullptr) {
+ return false;
+ }
+
+ u32 scaled_width = src_surface->GetScaledWidth();
+ u32 scaled_height = src_surface->GetScaledHeight();
+
+ screen_info.display_texcoords = MathUtil::Rectangle<float>(
+ (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
+ (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
+
+ screen_info.display_texture = src_surface->texture.handle;
+
+ return true;
+}
+
+void RasterizerOpenGL::SetShader() {
+ // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
+ // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
+ // shaders.
+
+ static constexpr char vertex_shader[] = R"(
+#version 150 core
+
+in vec2 vert_position;
+in vec2 vert_tex_coord;
+out vec2 frag_tex_coord;
+
+void main() {
+ // Multiply input position by the rotscale part of the matrix and then manually translate by
+ // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+ // to `vec3(vert_position.xy, 1.0)`
+ gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
+ frag_tex_coord = vert_tex_coord;
+}
+)";
+
+ static constexpr char fragment_shader[] = R"(
+#version 150 core
+
+in vec2 frag_tex_coord;
+out vec4 color;
+
+uniform sampler2D color_texture;
+
+void main() {
+ color = vec4(1.0, 0.0, 1.0, 0.0);
+}
+)";
+
+ if (current_shader) {
+ return;
+ }
+
+ LOG_ERROR(HW_GPU, "Emulated shaders are not supported! Using a passthrough shader.");
+
+ current_shader = &test_shader;
+ if (has_ARB_separate_shader_objects) {
+ test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
+ glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
+ } else {
+ ASSERT_MSG(false, "Unimplemented");
+ }
+
+ state.draw.shader_program = test_shader.shader.handle;
+ state.Apply();
+
+ if (has_ARB_separate_shader_objects) {
+ state.draw.shader_program = 0;
+ state.Apply();
+ }
+}
+
+void RasterizerOpenGL::SyncClipEnabled() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncClipCoef() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncCullMode() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncDepthScale() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncDepthOffset() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncBlendEnabled() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncBlendFuncs() {
+ ASSERT_MSG(false, "Unimplemented");
+}
+
+void RasterizerOpenGL::SyncBlendColor() {
+ ASSERT_MSG(false, "Unimplemented");
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
new file mode 100644
index 000000000..b387f383b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -0,0 +1,172 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <glad/glad.h>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "common/vector_math.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_stream_buffer.h"
+
+struct ScreenInfo;
+
+class RasterizerOpenGL : public VideoCore::RasterizerInterface {
+public:
+ RasterizerOpenGL();
+ ~RasterizerOpenGL() override;
+
+ void DrawTriangles() override;
+ void NotifyMaxwellRegisterChanged(u32 id) override;
+ void FlushAll() override;
+ void FlushRegion(VAddr addr, u64 size) override;
+ void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ bool AccelerateDisplayTransfer(const void* config) override;
+ bool AccelerateTextureCopy(const void* config) override;
+ bool AccelerateFill(const void* config) override;
+ bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr,
+ u32 pixel_stride, ScreenInfo& screen_info) override;
+ bool AccelerateDrawBatch(bool is_indexed) override;
+
+ /// OpenGL shader generated for a given Maxwell register state
+ struct MaxwellShader {
+ /// OpenGL shader resource
+ OGLShader shader;
+ };
+
+ struct VertexShader {
+ OGLShader shader;
+ };
+
+ struct FragmentShader {
+ OGLShader shader;
+ };
+
+ /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
+ // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+ // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+ // Not following that rule will cause problems on some AMD drivers.
+ struct UniformData {};
+
+ // static_assert(
+ // sizeof(UniformData) == 0x460,
+ // "The size of the UniformData structure has changed, update the structure in the shader");
+ static_assert(sizeof(UniformData) < 16384,
+ "UniformData structure must be less than 16kb as per the OpenGL spec");
+
+ struct VSUniformData {};
+ // static_assert(
+ // sizeof(VSUniformData) == 1856,
+ // "The size of the VSUniformData structure has changed, update the structure in the
+ // shader");
+ static_assert(sizeof(VSUniformData) < 16384,
+ "VSUniformData structure must be less than 16kb as per the OpenGL spec");
+
+ struct FSUniformData {};
+ // static_assert(
+ // sizeof(FSUniformData) == 1856,
+ // "The size of the FSUniformData structure has changed, update the structure in the
+ // shader");
+ static_assert(sizeof(FSUniformData) < 16384,
+ "FSUniformData structure must be less than 16kb as per the OpenGL spec");
+
+private:
+ struct SamplerInfo {};
+
+ /// Syncs the clip enabled status to match the guest state
+ void SyncClipEnabled();
+
+ /// Syncs the clip coefficients to match the guest state
+ void SyncClipCoef();
+
+ /// Sets the OpenGL shader in accordance with the current guest state
+ void SetShader();
+
+ /// Syncs the cull mode to match the guest state
+ void SyncCullMode();
+
+ /// Syncs the depth scale to match the guest state
+ void SyncDepthScale();
+
+ /// Syncs the depth offset to match the guest state
+ void SyncDepthOffset();
+
+ /// Syncs the blend enabled status to match the guest state
+ void SyncBlendEnabled();
+
+ /// Syncs the blend functions to match the guest state
+ void SyncBlendFuncs();
+
+ /// Syncs the blend color to match the guest state
+ void SyncBlendColor();
+
+ bool has_ARB_buffer_storage;
+ bool has_ARB_direct_state_access;
+ bool has_ARB_separate_shader_objects;
+ bool has_ARB_vertex_attrib_binding;
+
+ OpenGLState state;
+
+ RasterizerCacheOpenGL res_cache;
+
+ /// Shader used for test renderering - to be removed once we have emulated shaders
+ MaxwellShader test_shader{};
+
+ const MaxwellShader* current_shader{};
+ bool shader_dirty{};
+
+ struct {
+ UniformData data;
+ bool dirty;
+ } uniform_block_data = {};
+
+ OGLPipeline pipeline;
+ OGLVertexArray sw_vao;
+ OGLVertexArray hw_vao;
+ std::array<bool, 16> hw_vao_enabled_attributes;
+
+ std::array<SamplerInfo, 3> texture_samplers;
+ static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
+ std::unique_ptr<OGLStreamBuffer> vertex_buffer;
+ OGLBuffer uniform_buffer;
+ OGLFramebuffer framebuffer;
+
+ static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
+ std::unique_ptr<OGLStreamBuffer> stream_buffer;
+
+ GLsizeiptr vs_input_size;
+
+ void AnalyzeVertexArray(bool is_indexed);
+ void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
+
+ OGLBuffer vs_uniform_buffer;
+ std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map;
+ std::unordered_map<std::string, VertexShader> vs_shader_cache;
+ OGLShader vs_default_shader;
+
+ void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset);
+
+ OGLBuffer fs_uniform_buffer;
+ std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map;
+ std::unordered_map<std::string, FragmentShader> fs_shader_cache;
+ OGLShader fs_default_shader;
+
+ void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset);
+
+ enum class AccelDraw { Disabled, Arrays, Indexed };
+ AccelDraw accelerate_draw;
+};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
new file mode 100644
index 000000000..78fa7c051
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -0,0 +1,1352 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <atomic>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+#include <glad/glad.h>
+#include "common/alignment.h"
+#include "common/bit_field.h"
+#include "common/color.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
+#include "common/vector_math.h"
+#include "core/frontend/emu_window.h"
+#include "core/hle/kernel/vm_manager.h"
+#include "core/memory.h"
+#include "core/settings.h"
+#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/utils.h"
+#include "video_core/video_core.h"
+
+using SurfaceType = SurfaceParams::SurfaceType;
+using PixelFormat = SurfaceParams::PixelFormat;
+
+struct FormatTuple {
+ GLint internal_format;
+ GLenum format;
+ GLenum type;
+};
+
+static constexpr std::array<FormatTuple, 5> fb_format_tuples = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
+ {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
+}};
+
+static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
+ {},
+ {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
+}};
+
+static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
+
+static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
+ const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
+ if (type == SurfaceType::Color) {
+ ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size());
+ return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
+ } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
+ size_t tuple_idx = static_cast<size_t>(pixel_format) - 14;
+ ASSERT(tuple_idx < depth_format_tuples.size());
+ return depth_format_tuples[tuple_idx];
+ }
+ return tex_tuple;
+}
+
+template <typename Map, typename Interval>
+constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+ return boost::make_iterator_range(map.equal_range(interval));
+}
+
+static u16 GetResolutionScaleFactor() {
+ return static_cast<u16>(!Settings::values.resolution_factor
+ ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio()
+ : Settings::values.resolution_factor);
+}
+
+template <bool morton_to_gl, PixelFormat format>
+static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
+ constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
+ constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
+ for (u32 y = 0; y < 8; ++y) {
+ for (u32 x = 0; x < 8; ++x) {
+ u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
+ u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
+ if (morton_to_gl) {
+ if (format == PixelFormat::D24S8) {
+ gl_ptr[0] = tile_ptr[3];
+ std::memcpy(gl_ptr + 1, tile_ptr, 3);
+ } else {
+ std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
+ }
+ } else {
+ if (format == PixelFormat::D24S8) {
+ std::memcpy(tile_ptr, gl_ptr + 1, 3);
+ tile_ptr[3] = gl_ptr[0];
+ } else {
+ std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
+ }
+ }
+ }
+ }
+}
+
+template <bool morton_to_gl, PixelFormat format>
+static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
+ constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
+ constexpr u32 tile_size = bytes_per_pixel * 64;
+
+ constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
+ static_assert(gl_bytes_per_pixel >= bytes_per_pixel, "");
+ gl_buffer += gl_bytes_per_pixel - bytes_per_pixel;
+
+ const VAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
+ const VAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
+ const VAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
+
+ ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end));
+
+ const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
+ u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8);
+ u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8);
+
+ gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel;
+
+ auto glbuf_next_tile = [&] {
+ x = (x + 8) % stride;
+ gl_buffer += 8 * gl_bytes_per_pixel;
+ if (!x) {
+ y += 8;
+ gl_buffer -= stride * 9 * gl_bytes_per_pixel;
+ }
+ };
+
+ u8* tile_buffer = Memory::GetPointer(start);
+
+ if (start < aligned_start && !morton_to_gl) {
+ std::array<u8, tile_size> tmp_buf;
+ MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
+ std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
+ std::min(aligned_start, end) - start);
+
+ tile_buffer += aligned_start - start;
+ glbuf_next_tile();
+ }
+
+ const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
+ while (tile_buffer < buffer_end) {
+ MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
+ tile_buffer += tile_size;
+ glbuf_next_tile();
+ }
+
+ if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) {
+ std::array<u8, tile_size> tmp_buf;
+ MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
+ std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
+ }
+}
+
+static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> morton_to_gl_fns = {
+ MortonCopy<true, PixelFormat::RGBA8>, // 0
+ MortonCopy<true, PixelFormat::RGB8>, // 1
+ MortonCopy<true, PixelFormat::RGB5A1>, // 2
+ MortonCopy<true, PixelFormat::RGB565>, // 3
+ MortonCopy<true, PixelFormat::RGBA4>, // 4
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr, // 5 - 13
+ MortonCopy<true, PixelFormat::D16>, // 14
+ nullptr, // 15
+ MortonCopy<true, PixelFormat::D24>, // 16
+ MortonCopy<true, PixelFormat::D24S8> // 17
+};
+
+static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl_to_morton_fns = {
+ MortonCopy<false, PixelFormat::RGBA8>, // 0
+ MortonCopy<false, PixelFormat::RGB8>, // 1
+ MortonCopy<false, PixelFormat::RGB5A1>, // 2
+ MortonCopy<false, PixelFormat::RGB565>, // 3
+ MortonCopy<false, PixelFormat::RGBA4>, // 4
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr, // 5 - 13
+ MortonCopy<false, PixelFormat::D16>, // 14
+ nullptr, // 15
+ MortonCopy<false, PixelFormat::D24>, // 16
+ MortonCopy<false, PixelFormat::D24S8> // 17
+};
+
+// Allocate an uninitialized texture of appropriate size and format for the surface
+static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width,
+ u32 height) {
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ // Keep track of previous texture bindings
+ GLuint old_tex = cur_state.texture_units[0].texture_2d;
+ cur_state.texture_units[0].texture_2d = texture;
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
+ format_tuple.format, format_tuple.type, nullptr);
+
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ // Restore previous texture bindings
+ cur_state.texture_units[0].texture_2d = old_tex;
+ cur_state.Apply();
+}
+
+static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
+ const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
+ GLuint read_fb_handle, GLuint draw_fb_handle) {
+ OpenGLState state = OpenGLState::GetCurState();
+
+ OpenGLState prev_state = state;
+ SCOPE_EXIT({ prev_state.Apply(); });
+
+ // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
+ // components
+ state.ResetTexture(src_tex);
+ state.ResetTexture(dst_tex);
+
+ state.draw.read_framebuffer = read_fb_handle;
+ state.draw.draw_framebuffer = draw_fb_handle;
+ state.Apply();
+
+ u32 buffers = 0;
+
+ if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
+ 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
+ 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+
+ buffers = GL_COLOR_BUFFER_BIT;
+ } else if (type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT;
+ } else if (type == SurfaceType::DepthStencil) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ src_tex, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ dst_tex, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ }
+
+ glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
+ dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
+ buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+
+ return true;
+}
+
+static bool FillSurface(const Surface& surface, const u8* fill_data,
+ const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) {
+ ASSERT_MSG(false, "Unimplemented");
+ return true;
+}
+
+SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
+ SurfaceParams params = *this;
+ const u32 tiled_size = is_tiled ? 8 : 1;
+ const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
+ VAddr aligned_start =
+ addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
+ VAddr aligned_end =
+ addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
+
+ if (aligned_end - aligned_start > stride_tiled_bytes) {
+ params.addr = aligned_start;
+ params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride));
+ } else {
+ // 1 row
+ ASSERT(aligned_end - aligned_start == stride_tiled_bytes);
+ const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1);
+ aligned_start =
+ addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment);
+ aligned_end =
+ addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment);
+ params.addr = aligned_start;
+ params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size);
+ params.stride = params.width;
+ params.height = tiled_size;
+ }
+ params.UpdateParams();
+
+ return params;
+}
+
+SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const {
+ if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) {
+ return {};
+ }
+
+ if (is_tiled) {
+ unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8;
+ unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8;
+ unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8;
+ unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8;
+ }
+
+ const u32 stride_tiled = !is_tiled ? stride : stride * 8;
+
+ const u32 pixel_offset =
+ stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) +
+ unscaled_rect.left;
+
+ const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth();
+
+ return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)};
+}
+
+MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const {
+ const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr));
+
+ if (is_tiled) {
+ const int x0 = (begin_pixel_index % (stride * 8)) / 8;
+ const int y0 = (begin_pixel_index / (stride * 8)) * 8;
+ // Top to bottom
+ return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
+ height - (y0 + sub_surface.height));
+ }
+
+ const int x0 = begin_pixel_index % stride;
+ const int y0 = begin_pixel_index / stride;
+ // Bottom to top
+ return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
+}
+
+MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const {
+ auto rect = GetSubRect(sub_surface);
+ rect.left = rect.left * res_scale;
+ rect.right = rect.right * res_scale;
+ rect.top = rect.top * res_scale;
+ rect.bottom = rect.bottom * res_scale;
+ return rect;
+}
+
+bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
+ return std::tie(other_surface.addr, other_surface.width, other_surface.height,
+ other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
+ std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
+ pixel_format != PixelFormat::Invalid;
+}
+
+bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
+ return sub_surface.addr >= addr && sub_surface.end <= end &&
+ sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
+ sub_surface.is_tiled == is_tiled &&
+ (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
+ (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
+ GetSubRect(sub_surface).left + sub_surface.width <= stride;
+}
+
+bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
+ return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
+ addr <= expanded_surface.end && expanded_surface.addr <= end &&
+ is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
+ (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
+ BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
+ 0;
+}
+
+bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
+ if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||
+ end < texcopy_params.end) {
+ return false;
+ }
+ if (texcopy_params.width != texcopy_params.stride) {
+ const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
+ return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
+ texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
+ (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) &&
+ ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride;
+ }
+ return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval();
+}
+
+bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
+ SurfaceInterval fill_interval) const {
+ if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
+ boost::icl::first(fill_interval) >= addr &&
+ boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
+ dest_surface.FromInterval(fill_interval).GetInterval() ==
+ fill_interval) { // make sure interval is a rectangle in dest surface
+ if (fill_size * 8 != dest_surface.GetFormatBpp()) {
+ // Check if bits repeat for our fill_size
+ const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u);
+ std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
+
+ for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
+ std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size);
+
+ for (u32 i = 0; i < fill_size; ++i)
+ if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0],
+ dest_bytes_per_pixel) != 0)
+ return false;
+
+ if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4))
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool CachedSurface::CanCopy(const SurfaceParams& dest_surface,
+ SurfaceInterval copy_interval) const {
+ SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
+ ASSERT(subrect_params.GetInterval() == copy_interval);
+ if (CanSubRect(subrect_params))
+ return true;
+
+ if (CanFill(dest_surface, copy_interval))
+ return true;
+
+ return false;
+}
+
+SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const {
+ SurfaceInterval result{};
+ const auto valid_regions =
+ SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions;
+ for (auto& valid_interval : valid_regions) {
+ const SurfaceInterval aligned_interval{
+ addr + Common::AlignUp(boost::icl::first(valid_interval) - addr,
+ BytesInPixels(is_tiled ? 8 * 8 : 1)),
+ addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr,
+ BytesInPixels(is_tiled ? 8 * 8 : 1))};
+
+ if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) ||
+ boost::icl::length(aligned_interval) == 0) {
+ continue;
+ }
+
+ // Get the rectangle within aligned_interval
+ const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1);
+ SurfaceInterval rect_interval{
+ addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes),
+ addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes),
+ };
+ if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) {
+ // 1 row
+ rect_interval = aligned_interval;
+ } else if (boost::icl::length(rect_interval) == 0) {
+ // 2 rows that do not make a rectangle, return the larger one
+ const SurfaceInterval row1{boost::icl::first(aligned_interval),
+ boost::icl::first(rect_interval)};
+ const SurfaceInterval row2{boost::icl::first(rect_interval),
+ boost::icl::last_next(aligned_interval)};
+ rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2;
+ }
+
+ if (boost::icl::length(rect_interval) > boost::icl::length(result)) {
+ result = rect_interval;
+ }
+ }
+ return result;
+}
+
+void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
+ SurfaceInterval copy_interval) {
+ SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval);
+ ASSERT(subrect_params.GetInterval() == copy_interval);
+
+ ASSERT(src_surface != dst_surface);
+
+ // This is only called when CanCopy is true, no need to run checks here
+ if (src_surface->type == SurfaceType::Fill) {
+ // FillSurface needs a 4 bytes buffer
+ const u64 fill_offset =
+ (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size;
+ std::array<u8, 4> fill_buffer;
+
+ u64 fill_buff_pos = fill_offset;
+ for (int i : {0, 1, 2, 3})
+ fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size];
+
+ FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params),
+ draw_framebuffer.handle);
+ return;
+ }
+ if (src_surface->CanSubRect(subrect_params)) {
+ BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params),
+ dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params),
+ src_surface->type, read_framebuffer.handle, draw_framebuffer.handle);
+ return;
+ }
+ UNREACHABLE();
+}
+
+MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
+void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
+ ASSERT(type != SurfaceType::Fill);
+
+ u8* texture_src_data = Memory::GetPointer(addr);
+ if (texture_src_data == nullptr)
+ return;
+
+ if (gl_buffer == nullptr) {
+ gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format);
+ gl_buffer.reset(new u8[gl_buffer_size]);
+ }
+
+ MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
+
+ ASSERT(load_start >= addr && load_end <= end);
+ const u64 start_offset = load_start - addr;
+
+ if (!is_tiled) {
+ ASSERT(type == SurfaceType::Color);
+ const u32 bytes_per_pixel{GetFormatBpp() >> 3};
+ VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
+ texture_src_data + start_offset, &gl_buffer[start_offset],
+ true);
+ } else {
+ ASSERT_MSG(false, "Unimplemented");
+ }
+}
+
+MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
+void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
+ u8* const dst_buffer = Memory::GetPointer(addr);
+ if (dst_buffer == nullptr)
+ return;
+
+ ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format));
+
+ // TODO: Should probably be done in ::Memory:: and check for other regions too
+ // same as loadglbuffer()
+ if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
+ flush_end = Memory::VRAM_VADDR_END;
+
+ if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
+ flush_start = Memory::VRAM_VADDR;
+
+ MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
+
+ ASSERT(flush_start >= addr && flush_end <= end);
+ const u64 start_offset = flush_start - addr;
+ const u64 end_offset = flush_end - addr;
+
+ if (type == SurfaceType::Fill) {
+ const u64 coarse_start_offset = start_offset - (start_offset % fill_size);
+ const u64 backup_bytes = start_offset % fill_size;
+ std::array<u8, 4> backup_data;
+ if (backup_bytes)
+ std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
+
+ for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
+ std::memcpy(&dst_buffer[offset], &fill_data[0],
+ std::min(fill_size, end_offset - offset));
+ }
+
+ if (backup_bytes)
+ std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
+ } else if (!is_tiled) {
+ ASSERT(type == SurfaceType::Color);
+ std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
+ } else {
+ gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
+ flush_start, flush_end);
+ }
+}
+
+MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
+void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
+ GLuint draw_fb_handle) {
+ if (type == SurfaceType::Fill)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_TextureUL);
+
+ ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format));
+
+ // Load data from memory to the surface
+ GLint x0 = static_cast<GLint>(rect.left);
+ GLint y0 = static_cast<GLint>(rect.bottom);
+ size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
+
+ const FormatTuple& tuple = GetFormatTuple(pixel_format);
+ GLuint target_tex = texture.handle;
+
+ // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
+ // surface
+ OGLTexture unscaled_tex;
+ if (res_scale != 1) {
+ x0 = 0;
+ y0 = 0;
+
+ unscaled_tex.Create();
+ AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
+ target_tex = unscaled_tex.handle;
+ }
+
+ OpenGLState cur_state = OpenGLState::GetCurState();
+
+ GLuint old_tex = cur_state.texture_units[0].texture_2d;
+ cur_state.texture_units[0].texture_2d = target_tex;
+ cur_state.Apply();
+
+ // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
+ ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
+
+ glActiveTexture(GL_TEXTURE0);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+ cur_state.texture_units[0].texture_2d = old_tex;
+ cur_state.Apply();
+
+ if (res_scale != 1) {
+ auto scaled_rect = rect;
+ scaled_rect.left *= res_scale;
+ scaled_rect.top *= res_scale;
+ scaled_rect.right *= res_scale;
+ scaled_rect.bottom *= res_scale;
+
+ BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle,
+ scaled_rect, type, read_fb_handle, draw_fb_handle);
+ }
+}
+
+MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
+void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
+ GLuint draw_fb_handle) {
+ if (type == SurfaceType::Fill)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_TextureDL);
+
+ if (gl_buffer == nullptr) {
+ gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format);
+ gl_buffer.reset(new u8[gl_buffer_size]);
+ }
+
+ OpenGLState state = OpenGLState::GetCurState();
+ OpenGLState prev_state = state;
+ SCOPE_EXIT({ prev_state.Apply(); });
+
+ const FormatTuple& tuple = GetFormatTuple(pixel_format);
+
+ // Ensure no bad interactions with GL_PACK_ALIGNMENT
+ ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
+ size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format);
+
+ // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
+ if (res_scale != 1) {
+ auto scaled_rect = rect;
+ scaled_rect.left *= res_scale;
+ scaled_rect.top *= res_scale;
+ scaled_rect.right *= res_scale;
+ scaled_rect.bottom *= res_scale;
+
+ OGLTexture unscaled_tex;
+ unscaled_tex.Create();
+
+ MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
+ AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
+ BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type,
+ read_fb_handle, draw_fb_handle);
+
+ state.texture_units[0].texture_2d = unscaled_tex.handle;
+ state.Apply();
+
+ glActiveTexture(GL_TEXTURE0);
+ glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
+ } else {
+ state.ResetTexture(texture.handle);
+ state.draw.read_framebuffer = read_fb_handle;
+ state.Apply();
+
+ if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+ texture.handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ } else if (type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ texture.handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ } else {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ texture.handle, 0);
+ }
+ glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
+ static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
+ tuple.format, tuple.type, &gl_buffer[buffer_offset]);
+ }
+
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+}
+
+enum MatchFlags {
+ Invalid = 1, // Flag that can be applied to other match types, invalid matches require
+ // validation before they can be used
+ Exact = 1 << 1, // Surfaces perfectly match
+ SubRect = 1 << 2, // Surface encompasses params
+ Copy = 1 << 3, // Surface we can copy from
+ Expand = 1 << 4, // Surface that can expand params
+ TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters
+};
+
+constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
+ return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
+}
+
+/// Get the best surface match (and its match type) for the given flags
+template <MatchFlags find_flags>
+Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params,
+ ScaleMatch match_scale_type,
+ boost::optional<SurfaceInterval> validate_interval = boost::none) {
+ Surface match_surface = nullptr;
+ bool match_valid = false;
+ u32 match_scale = 0;
+ SurfaceInterval match_interval{};
+
+ for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) {
+ for (auto& surface : pair.second) {
+ bool res_scale_matched = match_scale_type == ScaleMatch::Exact
+ ? (params.res_scale == surface->res_scale)
+ : (params.res_scale <= surface->res_scale);
+ // validity will be checked in GetCopyableInterval
+ bool is_valid =
+ find_flags & MatchFlags::Copy
+ ? true
+ : surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
+
+ if (!(find_flags & MatchFlags::Invalid) && !is_valid)
+ continue;
+
+ auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
+ if (!(find_flags & check_type))
+ return;
+
+ bool matched;
+ SurfaceInterval surface_interval;
+ std::tie(matched, surface_interval) = match_fn();
+ if (!matched)
+ return;
+
+ if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
+ surface->type != SurfaceType::Fill)
+ return;
+
+ // Found a match, update only if this is better than the previous one
+ auto UpdateMatch = [&] {
+ match_surface = surface;
+ match_valid = is_valid;
+ match_scale = surface->res_scale;
+ match_interval = surface_interval;
+ };
+
+ if (surface->res_scale > match_scale) {
+ UpdateMatch();
+ return;
+ } else if (surface->res_scale < match_scale) {
+ return;
+ }
+
+ if (is_valid && !match_valid) {
+ UpdateMatch();
+ return;
+ } else if (is_valid != match_valid) {
+ return;
+ }
+
+ if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
+ UpdateMatch();
+ }
+ };
+ IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
+ return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
+ });
+ IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
+ return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
+ });
+ IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
+ auto copy_interval =
+ params.FromInterval(*validate_interval).GetCopyableInterval(surface);
+ bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
+ surface->CanCopy(params, copy_interval);
+ return std::make_pair(matched, copy_interval);
+ });
+ IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
+ return std::make_pair(surface->CanExpand(params), surface->GetInterval());
+ });
+ IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
+ return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
+ });
+ }
+ }
+ return match_surface;
+}
+
+RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
+ read_framebuffer.Create();
+ draw_framebuffer.Create();
+
+ attributeless_vao.Create();
+
+ d24s8_abgr_buffer.Create();
+ d24s8_abgr_buffer_size = 0;
+
+ const char* vs_source = R"(
+#version 330 core
+const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
+void main() {
+ gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
+}
+)";
+ const char* fs_source = R"(
+#version 330 core
+
+uniform samplerBuffer tbo;
+uniform vec2 tbo_size;
+uniform vec4 viewport;
+
+out vec4 color;
+
+void main() {
+ vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw;
+ int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x);
+ color = texelFetch(tbo, tbo_offset).rabg;
+}
+)";
+ d24s8_abgr_shader.Create(vs_source, nullptr, fs_source);
+
+ OpenGLState state = OpenGLState::GetCurState();
+ GLuint old_program = state.draw.shader_program;
+ state.draw.shader_program = d24s8_abgr_shader.handle;
+ state.Apply();
+
+ GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo");
+ ASSERT(tbo_u_id != -1);
+ glUniform1i(tbo_u_id, 0);
+
+ state.draw.shader_program = old_program;
+ state.Apply();
+
+ d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size");
+ ASSERT(d24s8_abgr_tbo_size_u_id != -1);
+ d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport");
+ ASSERT(d24s8_abgr_viewport_u_id != -1);
+}
+
+RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
+ FlushAll();
+ while (!surface_cache.empty())
+ UnregisterSurface(*surface_cache.begin()->second.begin());
+}
+
+bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface,
+ const MathUtil::Rectangle<u32>& src_rect,
+ const Surface& dst_surface,
+ const MathUtil::Rectangle<u32>& dst_rect) {
+ if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format))
+ return false;
+
+ return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle,
+ dst_rect, src_surface->type, read_framebuffer.handle,
+ draw_framebuffer.handle);
+}
+
+void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex,
+ const MathUtil::Rectangle<u32>& src_rect,
+ GLuint dst_tex,
+ const MathUtil::Rectangle<u32>& dst_rect) {
+ OpenGLState prev_state = OpenGLState::GetCurState();
+ SCOPE_EXIT({ prev_state.Apply(); });
+
+ OpenGLState state;
+ state.draw.read_framebuffer = read_framebuffer.handle;
+ state.draw.draw_framebuffer = draw_framebuffer.handle;
+ state.Apply();
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle);
+
+ GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4;
+ if (target_pbo_size > d24s8_abgr_buffer_size) {
+ d24s8_abgr_buffer_size = target_pbo_size * 2;
+ glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY);
+ }
+
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex,
+ 0);
+ glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom),
+ static_cast<GLsizei>(src_rect.GetWidth()),
+ static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
+ 0);
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+ // PBO now contains src_tex in RABG format
+ state.draw.shader_program = d24s8_abgr_shader.handle;
+ state.draw.vertex_array = attributeless_vao.handle;
+ state.viewport.x = static_cast<GLint>(dst_rect.left);
+ state.viewport.y = static_cast<GLint>(dst_rect.bottom);
+ state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth());
+ state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight());
+ state.Apply();
+
+ OGLTexture tbo;
+ tbo.Create();
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_BUFFER, tbo.handle);
+ glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle);
+
+ glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()),
+ static_cast<GLfloat>(src_rect.GetHeight()));
+ glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x),
+ static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width),
+ static_cast<GLfloat>(state.viewport.height));
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glBindTexture(GL_TEXTURE_BUFFER, 0);
+}
+
+Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
+ bool load_if_create) {
+ if (params.addr == 0 || params.height * params.width == 0) {
+ return nullptr;
+ }
+ // Use GetSurfaceSubRect instead
+ ASSERT(params.width == params.stride);
+
+ ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0));
+
+ // Check for an exact match in existing surfaces
+ Surface surface =
+ FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
+
+ if (surface == nullptr) {
+ u16 target_res_scale = params.res_scale;
+ if (match_res_scale != ScaleMatch::Exact) {
+ // This surface may have a subrect of another surface with a higher res_scale, find it
+ // to adjust our params
+ SurfaceParams find_params = params;
+ Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
+ surface_cache, find_params, match_res_scale);
+ if (expandable != nullptr && expandable->res_scale > target_res_scale) {
+ target_res_scale = expandable->res_scale;
+ }
+ // Keep res_scale when reinterpreting d24s8 -> rgba8
+ if (params.pixel_format == PixelFormat::RGBA8) {
+ find_params.pixel_format = PixelFormat::D24S8;
+ expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
+ surface_cache, find_params, match_res_scale);
+ if (expandable != nullptr && expandable->res_scale > target_res_scale) {
+ target_res_scale = expandable->res_scale;
+ }
+ }
+ }
+ SurfaceParams new_params = params;
+ new_params.res_scale = target_res_scale;
+ surface = CreateSurface(new_params);
+ RegisterSurface(surface);
+ }
+
+ if (load_if_create) {
+ ValidateSurface(surface, params.addr, params.size);
+ }
+
+ return surface;
+}
+
+SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params,
+ ScaleMatch match_res_scale,
+ bool load_if_create) {
+ if (params.addr == 0 || params.height * params.width == 0) {
+ return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{});
+ }
+
+ // Attempt to find encompassing surface
+ Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
+ match_res_scale);
+
+ // Check if FindMatch failed because of res scaling
+ // If that's the case create a new surface with
+ // the dimensions of the lower res_scale surface
+ // to suggest it should not be used again
+ if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) {
+ surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
+ ScaleMatch::Ignore);
+ if (surface != nullptr) {
+ ASSERT(surface->res_scale < params.res_scale);
+ SurfaceParams new_params = *surface;
+ new_params.res_scale = params.res_scale;
+
+ surface = CreateSurface(new_params);
+ RegisterSurface(surface);
+ }
+ }
+
+ SurfaceParams aligned_params = params;
+ if (params.is_tiled) {
+ aligned_params.height = Common::AlignUp(params.height, 8);
+ aligned_params.width = Common::AlignUp(params.width, 8);
+ aligned_params.stride = Common::AlignUp(params.stride, 8);
+ aligned_params.UpdateParams();
+ }
+
+ // Check for a surface we can expand before creating a new one
+ if (surface == nullptr) {
+ surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params,
+ match_res_scale);
+ if (surface != nullptr) {
+ aligned_params.width = aligned_params.stride;
+ aligned_params.UpdateParams();
+
+ SurfaceParams new_params = *surface;
+ new_params.addr = std::min(aligned_params.addr, surface->addr);
+ new_params.end = std::max(aligned_params.end, surface->end);
+ new_params.size = new_params.end - new_params.addr;
+ new_params.height = static_cast<u32>(
+ new_params.size / aligned_params.BytesInPixels(aligned_params.stride));
+ ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
+
+ Surface new_surface = CreateSurface(new_params);
+ DuplicateSurface(surface, new_surface);
+
+ // Delete the expanded surface, this can't be done safely yet
+ // because it may still be in use
+ remove_surfaces.emplace(surface);
+
+ surface = new_surface;
+ RegisterSurface(new_surface);
+ }
+ }
+
+ // No subrect found - create and return a new surface
+ if (surface == nullptr) {
+ SurfaceParams new_params = aligned_params;
+ // Can't have gaps in a surface
+ new_params.width = aligned_params.stride;
+ new_params.UpdateParams();
+ // GetSurface will create the new surface and possibly adjust res_scale if necessary
+ surface = GetSurface(new_params, match_res_scale, load_if_create);
+ } else if (load_if_create) {
+ ValidateSurface(surface, aligned_params.addr, aligned_params.size);
+ }
+
+ return std::make_tuple(surface, surface->GetScaledSubRect(params));
+}
+
+Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) {
+ ASSERT_MSG(false, "Unimplemented");
+ return {};
+}
+
+SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
+ bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) {
+ UNIMPLEMENTED();
+ return {};
+}
+
+Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) {
+ ASSERT_MSG(false, "Unimplemented");
+ return {};
+}
+
+SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) {
+ MathUtil::Rectangle<u32> rect{};
+
+ Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>(
+ surface_cache, params, ScaleMatch::Ignore);
+
+ if (match_surface != nullptr) {
+ ValidateSurface(match_surface, params.addr, params.size);
+
+ SurfaceParams match_subrect;
+ if (params.width != params.stride) {
+ const u32 tiled_size = match_surface->is_tiled ? 8 : 1;
+ match_subrect = params;
+ match_subrect.width =
+ static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size);
+ match_subrect.stride =
+ static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size);
+ match_subrect.height *= tiled_size;
+ } else {
+ match_subrect = match_surface->FromInterval(params.GetInterval());
+ ASSERT(match_subrect.GetInterval() == params.GetInterval());
+ }
+
+ rect = match_surface->GetScaledSubRect(match_subrect);
+ }
+
+ return std::make_tuple(match_surface, rect);
+}
+
+void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
+ const Surface& dest_surface) {
+ ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end);
+
+ BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface,
+ dest_surface->GetScaledSubRect(*src_surface));
+
+ dest_surface->invalid_regions -= src_surface->GetInterval();
+ dest_surface->invalid_regions += src_surface->invalid_regions;
+
+ SurfaceRegions regions;
+ for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) {
+ if (pair.second == src_surface) {
+ regions += pair.first;
+ }
+ }
+ for (auto& interval : regions) {
+ dirty_regions.set({interval, dest_surface});
+ }
+}
+
+void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) {
+ if (size == 0)
+ return;
+
+ const SurfaceInterval validate_interval(addr, addr + size);
+
+ if (surface->type == SurfaceType::Fill) {
+ // Sanity check, fill surfaces will always be valid when used
+ ASSERT(surface->IsRegionValid(validate_interval));
+ return;
+ }
+
+ while (true) {
+ const auto it = surface->invalid_regions.find(validate_interval);
+ if (it == surface->invalid_regions.end())
+ break;
+
+ const auto interval = *it & validate_interval;
+ // Look for a valid surface to copy from
+ SurfaceParams params = surface->FromInterval(interval);
+
+ Surface copy_surface =
+ FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
+ if (copy_surface != nullptr) {
+ SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface);
+ CopySurface(copy_surface, surface, copy_interval);
+ surface->invalid_regions.erase(copy_interval);
+ continue;
+ }
+
+ // D24S8 to RGBA8
+ if (surface->pixel_format == PixelFormat::RGBA8) {
+ params.pixel_format = PixelFormat::D24S8;
+ Surface reinterpret_surface =
+ FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
+ if (reinterpret_surface != nullptr) {
+ ASSERT(reinterpret_surface->pixel_format == PixelFormat::D24S8);
+
+ SurfaceInterval convert_interval = params.GetCopyableInterval(reinterpret_surface);
+ SurfaceParams convert_params = surface->FromInterval(convert_interval);
+ auto src_rect = reinterpret_surface->GetScaledSubRect(convert_params);
+ auto dest_rect = surface->GetScaledSubRect(convert_params);
+
+ ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect,
+ surface->texture.handle, dest_rect);
+
+ surface->invalid_regions.erase(convert_interval);
+ continue;
+ }
+ }
+
+ // Load data from 3DS memory
+ FlushRegion(params.addr, params.size);
+ surface->LoadGLBuffer(params.addr, params.end);
+ surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
+ draw_framebuffer.handle);
+ surface->invalid_regions.erase(params.GetInterval());
+ }
+}
+
+void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) {
+ if (size == 0)
+ return;
+
+ const SurfaceInterval flush_interval(addr, addr + size);
+ SurfaceRegions flushed_intervals;
+
+ for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) {
+ // small sizes imply that this most likely comes from the cpu, flush the entire region
+ // the point is to avoid thousands of small writes every frame if the cpu decides to access
+ // that region, anything higher than 8 you're guaranteed it comes from a service
+ const auto interval = size <= 8 ? pair.first : pair.first & flush_interval;
+ auto& surface = pair.second;
+
+ if (flush_surface != nullptr && surface != flush_surface)
+ continue;
+
+ // Sanity check, this surface is the last one that marked this region dirty
+ ASSERT(surface->IsRegionValid(interval));
+
+ if (surface->type != SurfaceType::Fill) {
+ SurfaceParams params = surface->FromInterval(interval);
+ surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
+ draw_framebuffer.handle);
+ }
+ surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval));
+ flushed_intervals += interval;
+ }
+ // Reset dirty regions
+ dirty_regions -= flushed_intervals;
+}
+
+void RasterizerCacheOpenGL::FlushAll() {
+ FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
+}
+
+void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) {
+ if (size == 0)
+ return;
+
+ const SurfaceInterval invalid_interval(addr, addr + size);
+
+ if (region_owner != nullptr) {
+ ASSERT(region_owner->type != SurfaceType::Texture);
+ ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
+ // Surfaces can't have a gap
+ ASSERT(region_owner->width == region_owner->stride);
+ region_owner->invalid_regions.erase(invalid_interval);
+ }
+
+ for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) {
+ for (auto& cached_surface : pair.second) {
+ if (cached_surface == region_owner)
+ continue;
+
+ // If cpu is invalidating this region we want to remove it
+ // to (likely) mark the memory pages as uncached
+ if (region_owner == nullptr && size <= 8) {
+ FlushRegion(cached_surface->addr, cached_surface->size, cached_surface);
+ remove_surfaces.emplace(cached_surface);
+ continue;
+ }
+
+ const auto interval = cached_surface->GetInterval() & invalid_interval;
+ cached_surface->invalid_regions.insert(interval);
+
+ // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures
+ if (cached_surface->type == SurfaceType::Fill &&
+ cached_surface->IsSurfaceFullyInvalid()) {
+ remove_surfaces.emplace(cached_surface);
+ }
+ }
+ }
+
+ if (region_owner != nullptr)
+ dirty_regions.set({invalid_interval, region_owner});
+ else
+ dirty_regions.erase(invalid_interval);
+
+ for (auto& remove_surface : remove_surfaces) {
+ if (remove_surface == region_owner) {
+ Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(
+ surface_cache, *region_owner, ScaleMatch::Ignore);
+ ASSERT(expanded_surface);
+
+ if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) {
+ DuplicateSurface(region_owner, expanded_surface);
+ } else {
+ continue;
+ }
+ }
+ UnregisterSurface(remove_surface);
+ }
+
+ remove_surfaces.clear();
+}
+
+Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
+ Surface surface = std::make_shared<CachedSurface>();
+ static_cast<SurfaceParams&>(*surface) = params;
+
+ surface->texture.Create();
+
+ surface->gl_buffer_size = 0;
+ surface->invalid_regions.insert(surface->GetInterval());
+ AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
+ surface->GetScaledWidth(), surface->GetScaledHeight());
+
+ return surface;
+}
+
+void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
+ if (surface->registered) {
+ return;
+ }
+ surface->registered = true;
+ surface_cache.add({surface->GetInterval(), SurfaceSet{surface}});
+ UpdatePagesCachedCount(surface->addr, surface->size, 1);
+}
+
+void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
+ if (!surface->registered) {
+ return;
+ }
+ surface->registered = false;
+ UpdatePagesCachedCount(surface->addr, surface->size, -1);
+ surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
+}
+
+void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ // ASSERT_MSG(false, "Unimplemented");
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
new file mode 100644
index 000000000..14f3cdc38
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -0,0 +1,360 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <set>
+#include <tuple>
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
+#endif
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#include <glad/glad.h>
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+struct CachedSurface;
+using Surface = std::shared_ptr<CachedSurface>;
+using SurfaceSet = std::set<Surface>;
+
+using SurfaceRegions = boost::icl::interval_set<VAddr>;
+using SurfaceMap = boost::icl::interval_map<VAddr, Surface>;
+using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>;
+
+using SurfaceInterval = SurfaceCache::interval_type;
+static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
+ std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
+ "incorrect interval types");
+
+using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+
+using PageMap = boost::icl::interval_map<u32, int>;
+
+enum class ScaleMatch {
+ Exact, // only accept same res scale
+ Upscale, // only allow higher scale than params
+ Ignore // accept every scaled res
+};
+
+struct SurfaceParams {
+ enum class PixelFormat {
+ // First 5 formats are shared between textures and color buffers
+ RGBA8 = 0,
+ RGB8 = 1,
+ RGB5A1 = 2,
+ RGB565 = 3,
+ RGBA4 = 4,
+
+ // Texture-only formats
+ IA8 = 5,
+ RG8 = 6,
+ I8 = 7,
+ A8 = 8,
+ IA4 = 9,
+ I4 = 10,
+ A4 = 11,
+ ETC1 = 12,
+ ETC1A4 = 13,
+
+ // Depth buffer-only formats
+ D16 = 14,
+ // gap
+ D24 = 16,
+ D24S8 = 17,
+
+ Invalid = 255,
+ };
+
+ enum class SurfaceType {
+ Color = 0,
+ Texture = 1,
+ Depth = 2,
+ DepthStencil = 3,
+ Fill = 4,
+ Invalid = 5
+ };
+
+ static constexpr unsigned int GetFormatBpp(PixelFormat format) {
+ constexpr std::array<unsigned int, 18> bpp_table = {
+ 32, // RGBA8
+ 24, // RGB8
+ 16, // RGB5A1
+ 16, // RGB565
+ 16, // RGBA4
+ 16, // IA8
+ 16, // RG8
+ 8, // I8
+ 8, // A8
+ 8, // IA4
+ 4, // I4
+ 4, // A4
+ 4, // ETC1
+ 8, // ETC1A4
+ 16, // D16
+ 0,
+ 24, // D24
+ 32, // D24S8
+ };
+
+ assert(static_cast<size_t>(format) < bpp_table.size());
+ return bpp_table[static_cast<size_t>(format)];
+ }
+ unsigned int GetFormatBpp() const {
+ return GetFormatBpp(pixel_format);
+ }
+
+ static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
+ switch (format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ return PixelFormat::RGBA8;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
+ SurfaceType a_type = GetFormatType(pixel_format_a);
+ SurfaceType b_type = GetFormatType(pixel_format_b);
+
+ if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
+ (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
+ return true;
+ }
+
+ if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
+ return true;
+ }
+
+ if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
+ return true;
+ }
+
+ return false;
+ }
+
+ static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) {
+ if ((unsigned int)pixel_format < 5) {
+ return SurfaceType::Color;
+ }
+
+ if ((unsigned int)pixel_format < 14) {
+ return SurfaceType::Texture;
+ }
+
+ if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
+ return SurfaceType::Depth;
+ }
+
+ if (pixel_format == PixelFormat::D24S8) {
+ return SurfaceType::DepthStencil;
+ }
+
+ return SurfaceType::Invalid;
+ }
+
+ /// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
+ /// and "pixel_format"
+ void UpdateParams() {
+ if (stride == 0) {
+ stride = width;
+ }
+ type = GetFormatType(pixel_format);
+ size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
+ : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
+ end = addr + size;
+ }
+
+ SurfaceInterval GetInterval() const {
+ return SurfaceInterval::right_open(addr, end);
+ }
+
+ // Returns the outer rectangle containing "interval"
+ SurfaceParams FromInterval(SurfaceInterval interval) const;
+
+ SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
+
+ // Returns the region of the biggest valid rectange within interval
+ SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
+
+ u32 GetScaledWidth() const {
+ return width * res_scale;
+ }
+
+ u32 GetScaledHeight() const {
+ return height * res_scale;
+ }
+
+ MathUtil::Rectangle<u32> GetRect() const {
+ return {0, height, width, 0};
+ }
+
+ MathUtil::Rectangle<u32> GetScaledRect() const {
+ return {0, GetScaledHeight(), GetScaledWidth(), 0};
+ }
+
+ u64 PixelsInBytes(u64 size) const {
+ return size * CHAR_BIT / GetFormatBpp(pixel_format);
+ }
+
+ u64 BytesInPixels(u64 pixels) const {
+ return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
+ }
+
+ bool ExactMatch(const SurfaceParams& other_surface) const;
+ bool CanSubRect(const SurfaceParams& sub_surface) const;
+ bool CanExpand(const SurfaceParams& expanded_surface) const;
+ bool CanTexCopy(const SurfaceParams& texcopy_params) const;
+
+ MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
+ MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
+
+ VAddr addr = 0;
+ VAddr end = 0;
+ u64 size = 0;
+
+ u32 width = 0;
+ u32 height = 0;
+ u32 stride = 0;
+ u16 res_scale = 1;
+
+ bool is_tiled = false;
+ PixelFormat pixel_format = PixelFormat::Invalid;
+ SurfaceType type = SurfaceType::Invalid;
+};
+
+struct CachedSurface : SurfaceParams {
+ bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
+ bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
+
+ bool IsRegionValid(SurfaceInterval interval) const {
+ return (invalid_regions.find(interval) == invalid_regions.end());
+ }
+
+ bool IsSurfaceFullyInvalid() const {
+ return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval());
+ }
+
+ bool registered = false;
+ SurfaceRegions invalid_regions;
+
+ u64 fill_size = 0; /// Number of bytes to read from fill_data
+ std::array<u8, 4> fill_data;
+
+ OGLTexture texture;
+
+ static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
+ // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
+ return format == PixelFormat::Invalid
+ ? 0
+ : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture)
+ ? 4
+ : SurfaceParams::GetFormatBpp(format) / 8;
+ }
+
+ std::unique_ptr<u8[]> gl_buffer;
+ size_t gl_buffer_size = 0;
+
+ // Read/Write data in Switch memory to/from gl_buffer
+ void LoadGLBuffer(VAddr load_start, VAddr load_end);
+ void FlushGLBuffer(VAddr flush_start, VAddr flush_end);
+
+ // Upload/Download data in gl_buffer in/to this surface's texture
+ void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
+ GLuint draw_fb_handle);
+ void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
+ GLuint draw_fb_handle);
+};
+
+class RasterizerCacheOpenGL : NonCopyable {
+public:
+ RasterizerCacheOpenGL();
+ ~RasterizerCacheOpenGL();
+
+ /// Blit one surface's texture to another
+ bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
+ const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
+
+ void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
+ GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
+
+ /// Copy one surface's region to another
+ void CopySurface(const Surface& src_surface, const Surface& dst_surface,
+ SurfaceInterval copy_interval);
+
+ /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
+ Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
+ bool load_if_create);
+
+ /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
+ /// 3DS memory to OpenGL and caches it (if not already cached)
+ SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
+ bool load_if_create);
+
+ /// Get a surface based on the texture configuration
+ Surface GetTextureSurface(const void* config);
+
+ /// Get the color and depth surfaces based on the framebuffer configuration
+ SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
+ const MathUtil::Rectangle<s32>& viewport_rect);
+
+ /// Get a surface that matches the fill config
+ Surface GetFillSurface(const void* config);
+
+ /// Get a surface that matches a "texture copy" display transfer config
+ SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
+
+ /// Write any cached resources overlapping the region back to memory (if dirty)
+ void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr);
+
+ /// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
+ void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner);
+
+ /// Flush all cached resources tracked by this cache manager
+ void FlushAll();
+
+private:
+ void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
+
+ /// Update surface's texture for given region when necessary
+ void ValidateSurface(const Surface& surface, VAddr addr, u64 size);
+
+ /// Create a new surface
+ Surface CreateSurface(const SurfaceParams& params);
+
+ /// Register surface into the cache
+ void RegisterSurface(const Surface& surface);
+
+ /// Remove surface from the cache
+ void UnregisterSurface(const Surface& surface);
+
+ /// Increase/decrease the number of surface in pages touching the specified region
+ void UpdatePagesCachedCount(VAddr addr, u64 size, int delta);
+
+ SurfaceCache surface_cache;
+ PageMap cached_pages;
+ SurfaceMap dirty_regions;
+ SurfaceSet remove_surfaces;
+
+ OGLFramebuffer read_framebuffer;
+ OGLFramebuffer draw_framebuffer;
+
+ OGLVertexArray attributeless_vao;
+ OGLBuffer d24s8_abgr_buffer;
+ GLsizeiptr d24s8_abgr_buffer_size;
+ OGLShader d24s8_abgr_shader;
+ GLint d24s8_abgr_tbo_size_u_id;
+ GLint d24s8_abgr_viewport_u_id;
+};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 13301ec9f..7da5e74d1 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -36,7 +36,7 @@ public:
if (handle == 0)
return;
glDeleteTextures(1, &handle);
- OpenGLState::ResetTexture(handle);
+ OpenGLState::GetCurState().ResetTexture(handle).Apply();
handle = 0;
}
@@ -69,7 +69,7 @@ public:
if (handle == 0)
return;
glDeleteSamplers(1, &handle);
- OpenGLState::ResetSampler(handle);
+ OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0;
}
@@ -91,10 +91,13 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create(const char* vert_shader, const char* frag_shader) {
+ void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader,
+ const std::vector<const char*>& feedback_vars = {},
+ bool separable_program = false) {
if (handle != 0)
return;
- handle = GLShader::LoadProgram(vert_shader, frag_shader);
+ handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars,
+ separable_program);
}
/// Deletes the internal OpenGL resource
@@ -102,7 +105,40 @@ public:
if (handle == 0)
return;
glDeleteProgram(handle);
- OpenGLState::ResetProgram(handle);
+ OpenGLState::GetCurState().ResetProgram(handle).Apply();
+ handle = 0;
+ }
+
+ GLuint handle = 0;
+};
+
+class OGLPipeline : private NonCopyable {
+public:
+ OGLPipeline() = default;
+ OGLPipeline(OGLPipeline&& o) {
+ handle = std::exchange<GLuint>(o.handle, 0);
+ }
+ ~OGLPipeline() {
+ Release();
+ }
+ OGLPipeline& operator=(OGLPipeline&& o) {
+ handle = std::exchange<GLuint>(o.handle, 0);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create() {
+ if (handle != 0)
+ return;
+ glGenProgramPipelines(1, &handle);
+ }
+
+ /// Deletes the internal OpenGL resource
+ void Release() {
+ if (handle == 0)
+ return;
+ glDeleteProgramPipelines(1, &handle);
+ OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0;
}
@@ -135,13 +171,46 @@ public:
if (handle == 0)
return;
glDeleteBuffers(1, &handle);
- OpenGLState::ResetBuffer(handle);
+ OpenGLState::GetCurState().ResetBuffer(handle).Apply();
handle = 0;
}
GLuint handle = 0;
};
+class OGLSync : private NonCopyable {
+public:
+ OGLSync() = default;
+
+ OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {}
+
+ ~OGLSync() {
+ Release();
+ }
+ OGLSync& operator=(OGLSync&& o) {
+ Release();
+ handle = std::exchange(o.handle, nullptr);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create() {
+ if (handle != 0)
+ return;
+ handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ }
+
+ /// Deletes the internal OpenGL resource
+ void Release() {
+ if (handle == 0)
+ return;
+ glDeleteSync(handle);
+ handle = 0;
+ }
+
+ GLsync handle = 0;
+};
+
class OGLVertexArray : private NonCopyable {
public:
OGLVertexArray() = default;
@@ -168,7 +237,7 @@ public:
if (handle == 0)
return;
glDeleteVertexArrays(1, &handle);
- OpenGLState::ResetVertexArray(handle);
+ OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
handle = 0;
}
@@ -201,7 +270,7 @@ public:
if (handle == 0)
return;
glDeleteFramebuffers(1, &handle);
- OpenGLState::ResetFramebuffer(handle);
+ OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
new file mode 100644
index 000000000..0e0ef18cc
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -0,0 +1,58 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <queue>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+
+namespace Maxwell3D {
+namespace Shader {
+namespace Decompiler {
+
+constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
+
+class Impl {
+public:
+ Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset,
+ const std::function<std::string(u32)>& inputreg_getter,
+ const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul,
+ const std::string& emit_cb, const std::string& setemit_cb)
+ : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset),
+ inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter),
+ sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {}
+
+ std::string Decompile() {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+private:
+ const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code;
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data;
+ u32 main_offset;
+ const std::function<std::string(u32)>& inputreg_getter;
+ const std::function<std::string(u32)>& outputreg_getter;
+ bool sanitize_mul;
+ const std::string& emit_cb;
+ const std::string& setemit_cb;
+};
+
+std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
+ u32 main_offset,
+ const std::function<std::string(u32)>& inputreg_getter,
+ const std::function<std::string(u32)>& outputreg_getter,
+ bool sanitize_mul, const std::string& emit_cb,
+ const std::string& setemit_cb) {
+ Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter,
+ sanitize_mul, emit_cb, setemit_cb);
+ return impl.Decompile();
+}
+
+} // namespace Decompiler
+} // namespace Shader
+} // namespace Maxwell3D
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
new file mode 100644
index 000000000..02ebfcbe8
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -0,0 +1,27 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <functional>
+#include <string>
+#include "common/common_types.h"
+
+namespace Maxwell3D {
+namespace Shader {
+namespace Decompiler {
+
+constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000};
+constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000};
+
+std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
+ u32 main_offset,
+ const std::function<std::string(u32)>& inputreg_getter,
+ const std::function<std::string(u32)>& outputreg_getter,
+ bool sanitize_mul, const std::string& emit_cb = "",
+ const std::string& setemit_cb = "");
+
+} // namespace Decompiler
+} // namespace Shader
+} // namespace Maxwell3D
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
new file mode 100644
index 000000000..f242bce1d
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -0,0 +1,20 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+
+namespace GLShader {
+
+std::string GenerateVertexShader(const MaxwellVSConfig& config) {
+ UNIMPLEMENTED();
+ return {};
+}
+
+std::string GenerateFragmentShader(const MaxwellFSConfig& config) {
+ UNIMPLEMENTED();
+ return {};
+}
+
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
new file mode 100644
index 000000000..5101e7d30
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -0,0 +1,66 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <string>
+#include <type_traits>
+#include "common/hash.h"
+
+namespace GLShader {
+
+enum Attributes {
+ ATTRIBUTE_POSITION,
+ ATTRIBUTE_COLOR,
+ ATTRIBUTE_TEXCOORD0,
+ ATTRIBUTE_TEXCOORD1,
+ ATTRIBUTE_TEXCOORD2,
+ ATTRIBUTE_TEXCOORD0_W,
+ ATTRIBUTE_NORMQUAT,
+ ATTRIBUTE_VIEW,
+};
+
+struct MaxwellShaderConfigCommon {
+ explicit MaxwellShaderConfigCommon(){};
+};
+
+struct MaxwellVSConfig : MaxwellShaderConfigCommon {
+ explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {}
+
+ bool operator==(const MaxwellVSConfig& o) const {
+ return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0;
+ };
+};
+
+struct MaxwellFSConfig : MaxwellShaderConfigCommon {
+ explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {}
+
+ bool operator==(const MaxwellFSConfig& o) const {
+ return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0;
+ };
+};
+
+std::string GenerateVertexShader(const MaxwellVSConfig& config);
+std::string GenerateFragmentShader(const MaxwellFSConfig& config);
+
+} // namespace GLShader
+
+namespace std {
+
+template <>
+struct hash<GLShader::MaxwellVSConfig> {
+ size_t operator()(const GLShader::MaxwellVSConfig& k) const {
+ return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig));
+ }
+};
+
+template <>
+struct hash<GLShader::MaxwellFSConfig> {
+ size_t operator()(const GLShader::MaxwellFSConfig& k) const {
+ return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig));
+ }
+};
+
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4da241d83..a3ba16761 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,53 +10,85 @@
namespace GLShader {
-GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
-
+GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
+ const char* fragment_shader, const std::vector<const char*>& feedback_vars,
+ bool separable_program) {
// Create the shaders
- GLuint vertex_shader_id = glCreateShader(GL_VERTEX_SHADER);
- GLuint fragment_shader_id = glCreateShader(GL_FRAGMENT_SHADER);
+ GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0;
+ GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0;
+ GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0;
GLint result = GL_FALSE;
int info_log_length;
- // Compile Vertex Shader
- LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
-
- glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
- glCompileShader(vertex_shader_id);
-
- // Check Vertex Shader
- glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::vector<char> vertex_shader_error(info_log_length);
- glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
- } else {
- LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", &vertex_shader_error[0]);
+ if (vertex_shader) {
+ // Compile Vertex Shader
+ LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
+
+ glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
+ glCompileShader(vertex_shader_id);
+
+ // Check Vertex Shader
+ glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
+ glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+
+ if (info_log_length > 1) {
+ std::vector<char> vertex_shader_error(info_log_length);
+ glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
+ if (result == GL_TRUE) {
+ LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
+ } else {
+ LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s",
+ &vertex_shader_error[0]);
+ }
}
}
- // Compile Fragment Shader
- LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
-
- glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
- glCompileShader(fragment_shader_id);
-
- // Check Fragment Shader
- glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+ if (geometry_shader) {
+ // Compile Geometry Shader
+ LOG_DEBUG(Render_OpenGL, "Compiling geometry shader...");
+
+ glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr);
+ glCompileShader(geometry_shader_id);
+
+ // Check Geometry Shader
+ glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result);
+ glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+
+ if (info_log_length > 1) {
+ std::vector<char> geometry_shader_error(info_log_length);
+ glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr,
+ &geometry_shader_error[0]);
+ if (result == GL_TRUE) {
+ LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
+ } else {
+ LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s",
+ &geometry_shader_error[0]);
+ }
+ }
+ }
- if (info_log_length > 1) {
- std::vector<char> fragment_shader_error(info_log_length);
- glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, &fragment_shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
- } else {
- LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
- &fragment_shader_error[0]);
+ if (fragment_shader) {
+ // Compile Fragment Shader
+ LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
+
+ glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
+ glCompileShader(fragment_shader_id);
+
+ // Check Fragment Shader
+ glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
+ glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+
+ if (info_log_length > 1) {
+ std::vector<char> fragment_shader_error(info_log_length);
+ glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr,
+ &fragment_shader_error[0]);
+ if (result == GL_TRUE) {
+ LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
+ } else {
+ LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
+ &fragment_shader_error[0]);
+ }
}
}
@@ -64,8 +96,25 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
LOG_DEBUG(Render_OpenGL, "Linking program...");
GLuint program_id = glCreateProgram();
- glAttachShader(program_id, vertex_shader_id);
- glAttachShader(program_id, fragment_shader_id);
+ if (vertex_shader) {
+ glAttachShader(program_id, vertex_shader_id);
+ }
+ if (geometry_shader) {
+ glAttachShader(program_id, geometry_shader_id);
+ }
+ if (fragment_shader) {
+ glAttachShader(program_id, fragment_shader_id);
+ }
+
+ if (!feedback_vars.empty()) {
+ auto varyings = feedback_vars;
+ glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()),
+ &varyings[0], GL_INTERLEAVED_ATTRIBS);
+ }
+
+ if (separable_program) {
+ glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ }
glLinkProgram(program_id);
@@ -85,13 +134,30 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
// If the program linking failed at least one of the shaders was probably bad
if (result == GL_FALSE) {
- LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
- LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
+ if (vertex_shader) {
+ LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
+ }
+ if (geometry_shader) {
+ LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
+ }
+ if (fragment_shader) {
+ LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
+ }
}
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
- glDeleteShader(vertex_shader_id);
- glDeleteShader(fragment_shader_id);
+ if (vertex_shader) {
+ glDetachShader(program_id, vertex_shader_id);
+ glDeleteShader(vertex_shader_id);
+ }
+ if (geometry_shader) {
+ glDetachShader(program_id, geometry_shader_id);
+ glDeleteShader(geometry_shader_id);
+ }
+ if (fragment_shader) {
+ glDetachShader(program_id, fragment_shader_id);
+ glDeleteShader(fragment_shader_id);
+ }
return program_id;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index c66e8acd3..fc7b5e080 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,6 +4,7 @@
#pragma once
+#include <vector>
#include <glad/glad.h>
namespace GLShader {
@@ -11,9 +12,12 @@ namespace GLShader {
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
* @param vertex_shader String of the GLSL vertex shader program
+ * @param geometry_shader String of the GLSL geometry shader program
* @param fragment_shader String of the GLSL fragment shader program
* @returns Handle of the newly created OpenGL shader object
*/
-GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader);
+GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
+ const char* fragment_shader, const std::vector<const char*>& feedback_vars = {},
+ bool separable_program = false);
-} // namespace
+} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 5770ae08f..1d396728b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -33,7 +33,7 @@ OpenGLState::OpenGLState() {
stencil.action_depth_pass = GL_KEEP;
stencil.action_stencil_fail = GL_KEEP;
- blend.enabled = false;
+ blend.enabled = true;
blend.rgb_equation = GL_FUNC_ADD;
blend.a_equation = GL_FUNC_ADD;
blend.src_rgb_func = GL_ONE;
@@ -68,6 +68,18 @@ OpenGLState::OpenGLState() {
draw.vertex_buffer = 0;
draw.uniform_buffer = 0;
draw.shader_program = 0;
+ draw.program_pipeline = 0;
+
+ scissor.enabled = false;
+ scissor.x = 0;
+ scissor.y = 0;
+ scissor.width = 0;
+ scissor.height = 0;
+
+ viewport.x = 0;
+ viewport.y = 0;
+ viewport.width = 0;
+ viewport.height = 0;
clip_distance = {};
}
@@ -148,9 +160,6 @@ void OpenGLState::Apply() const {
if (blend.enabled != cur_state.blend.enabled) {
if (blend.enabled) {
glEnable(GL_BLEND);
-
- cur_state.logic_op = GL_COPY;
- glLogicOp(cur_state.logic_op);
glDisable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_BLEND);
@@ -196,7 +205,7 @@ void OpenGLState::Apply() const {
// Lighting LUTs
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
glActiveTexture(TextureUnits::LightingLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer);
+ glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer);
}
// Fog LUT
@@ -263,6 +272,31 @@ void OpenGLState::Apply() const {
glUseProgram(draw.shader_program);
}
+ // Program pipeline
+ if (draw.program_pipeline != cur_state.draw.program_pipeline) {
+ glBindProgramPipeline(draw.program_pipeline);
+ }
+
+ // Scissor test
+ if (scissor.enabled != cur_state.scissor.enabled) {
+ if (scissor.enabled) {
+ glEnable(GL_SCISSOR_TEST);
+ } else {
+ glDisable(GL_SCISSOR_TEST);
+ }
+ }
+
+ if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
+ scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) {
+ glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
+ }
+
+ if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y ||
+ viewport.width != cur_state.viewport.width ||
+ viewport.height != cur_state.viewport.height) {
+ glViewport(viewport.x, viewport.y, viewport.width, viewport.height);
+ }
+
// Clip distance
for (size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
@@ -277,62 +311,75 @@ void OpenGLState::Apply() const {
cur_state = *this;
}
-void OpenGLState::ResetTexture(GLuint handle) {
- for (auto& unit : cur_state.texture_units) {
+OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
+ for (auto& unit : texture_units) {
if (unit.texture_2d == handle) {
unit.texture_2d = 0;
}
}
- if (cur_state.lighting_lut.texture_buffer == handle)
- cur_state.lighting_lut.texture_buffer = 0;
- if (cur_state.fog_lut.texture_buffer == handle)
- cur_state.fog_lut.texture_buffer = 0;
- if (cur_state.proctex_noise_lut.texture_buffer == handle)
- cur_state.proctex_noise_lut.texture_buffer = 0;
- if (cur_state.proctex_color_map.texture_buffer == handle)
- cur_state.proctex_color_map.texture_buffer = 0;
- if (cur_state.proctex_alpha_map.texture_buffer == handle)
- cur_state.proctex_alpha_map.texture_buffer = 0;
- if (cur_state.proctex_lut.texture_buffer == handle)
- cur_state.proctex_lut.texture_buffer = 0;
- if (cur_state.proctex_diff_lut.texture_buffer == handle)
- cur_state.proctex_diff_lut.texture_buffer = 0;
+ if (lighting_lut.texture_buffer == handle)
+ lighting_lut.texture_buffer = 0;
+ if (fog_lut.texture_buffer == handle)
+ fog_lut.texture_buffer = 0;
+ if (proctex_noise_lut.texture_buffer == handle)
+ proctex_noise_lut.texture_buffer = 0;
+ if (proctex_color_map.texture_buffer == handle)
+ proctex_color_map.texture_buffer = 0;
+ if (proctex_alpha_map.texture_buffer == handle)
+ proctex_alpha_map.texture_buffer = 0;
+ if (proctex_lut.texture_buffer == handle)
+ proctex_lut.texture_buffer = 0;
+ if (proctex_diff_lut.texture_buffer == handle)
+ proctex_diff_lut.texture_buffer = 0;
+ return *this;
}
-void OpenGLState::ResetSampler(GLuint handle) {
- for (auto& unit : cur_state.texture_units) {
+OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
+ for (auto& unit : texture_units) {
if (unit.sampler == handle) {
unit.sampler = 0;
}
}
+ return *this;
+}
+
+OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
+ if (draw.shader_program == handle) {
+ draw.shader_program = 0;
+ }
+ return *this;
}
-void OpenGLState::ResetProgram(GLuint handle) {
- if (cur_state.draw.shader_program == handle) {
- cur_state.draw.shader_program = 0;
+OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
+ if (draw.program_pipeline == handle) {
+ draw.program_pipeline = 0;
}
+ return *this;
}
-void OpenGLState::ResetBuffer(GLuint handle) {
- if (cur_state.draw.vertex_buffer == handle) {
- cur_state.draw.vertex_buffer = 0;
+OpenGLState& OpenGLState::ResetBuffer(GLuint handle) {
+ if (draw.vertex_buffer == handle) {
+ draw.vertex_buffer = 0;
}
- if (cur_state.draw.uniform_buffer == handle) {
- cur_state.draw.uniform_buffer = 0;
+ if (draw.uniform_buffer == handle) {
+ draw.uniform_buffer = 0;
}
+ return *this;
}
-void OpenGLState::ResetVertexArray(GLuint handle) {
- if (cur_state.draw.vertex_array == handle) {
- cur_state.draw.vertex_array = 0;
+OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
+ if (draw.vertex_array == handle) {
+ draw.vertex_array = 0;
}
+ return *this;
}
-void OpenGLState::ResetFramebuffer(GLuint handle) {
- if (cur_state.draw.read_framebuffer == handle) {
- cur_state.draw.read_framebuffer = 0;
+OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
+ if (draw.read_framebuffer == handle) {
+ draw.read_framebuffer = 0;
}
- if (cur_state.draw.draw_framebuffer == handle) {
- cur_state.draw.draw_framebuffer = 0;
+ if (draw.draw_framebuffer == handle) {
+ draw.draw_framebuffer = 0;
}
+ return *this;
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 437fe34c4..940575dfa 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -122,27 +122,44 @@ public:
GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
+ GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
} draw;
+ struct {
+ bool enabled; // GL_SCISSOR_TEST
+ GLint x;
+ GLint y;
+ GLsizei width;
+ GLsizei height;
+ } scissor;
+
+ struct {
+ GLint x;
+ GLint y;
+ GLsizei width;
+ GLsizei height;
+ } viewport;
+
std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
OpenGLState();
/// Get the currently active OpenGL state
- static const OpenGLState& GetCurState() {
+ static OpenGLState GetCurState() {
return cur_state;
}
/// Apply this state as the current OpenGL state
void Apply() const;
- /// Resets and unbinds any references to the given resource in the current OpenGL state
- static void ResetTexture(GLuint handle);
- static void ResetSampler(GLuint handle);
- static void ResetProgram(GLuint handle);
- static void ResetBuffer(GLuint handle);
- static void ResetVertexArray(GLuint handle);
- static void ResetFramebuffer(GLuint handle);
+ /// Resets any references to the given resource
+ OpenGLState& ResetTexture(GLuint handle);
+ OpenGLState& ResetSampler(GLuint handle);
+ OpenGLState& ResetProgram(GLuint handle);
+ OpenGLState& ResetPipeline(GLuint handle);
+ OpenGLState& ResetBuffer(GLuint handle);
+ OpenGLState& ResetVertexArray(GLuint handle);
+ OpenGLState& ResetFramebuffer(GLuint handle);
private:
static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
new file mode 100644
index 000000000..a2713e9f0
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -0,0 +1,182 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <deque>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_stream_buffer.h"
+
+class OrphanBuffer : public OGLStreamBuffer {
+public:
+ explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {}
+ ~OrphanBuffer() override;
+
+private:
+ void Create(size_t size, size_t sync_subdivide) override;
+ void Release() override;
+
+ std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
+ void Unmap() override;
+
+ std::vector<u8> data;
+};
+
+class StorageBuffer : public OGLStreamBuffer {
+public:
+ explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
+ ~StorageBuffer() override;
+
+private:
+ void Create(size_t size, size_t sync_subdivide) override;
+ void Release() override;
+
+ std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
+ void Unmap() override;
+
+ struct Fence {
+ OGLSync sync;
+ size_t offset;
+ };
+ std::deque<Fence> head;
+ std::deque<Fence> tail;
+
+ u8* mapped_ptr;
+};
+
+OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
+ gl_target = target;
+}
+
+GLuint OGLStreamBuffer::GetHandle() const {
+ return gl_buffer.handle;
+}
+
+std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) {
+ if (storage_buffer) {
+ return std::make_unique<StorageBuffer>(target);
+ }
+ return std::make_unique<OrphanBuffer>(target);
+}
+
+OrphanBuffer::~OrphanBuffer() {
+ Release();
+}
+
+void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) {
+ buffer_pos = 0;
+ buffer_size = size;
+ data.resize(buffer_size);
+
+ if (gl_buffer.handle == 0) {
+ gl_buffer.Create();
+ glBindBuffer(gl_target, gl_buffer.handle);
+ }
+
+ glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
+}
+
+void OrphanBuffer::Release() {
+ gl_buffer.Release();
+}
+
+std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) {
+ buffer_pos = Common::AlignUp(buffer_pos, alignment);
+
+ if (buffer_pos + size > buffer_size) {
+ Create(std::max(buffer_size, size), 0);
+ }
+
+ mapped_size = size;
+ return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
+}
+
+void OrphanBuffer::Unmap() {
+ glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
+ static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
+ buffer_pos += mapped_size;
+}
+
+StorageBuffer::~StorageBuffer() {
+ Release();
+}
+
+void StorageBuffer::Create(size_t size, size_t sync_subdivide) {
+ if (gl_buffer.handle != 0)
+ return;
+
+ buffer_pos = 0;
+ buffer_size = size;
+ buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
+
+ gl_buffer.Create();
+ glBindBuffer(gl_target, gl_buffer.handle);
+
+ glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
+ GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+ mapped_ptr = reinterpret_cast<u8*>(
+ glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
+ GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
+}
+
+void StorageBuffer::Release() {
+ if (gl_buffer.handle == 0)
+ return;
+
+ glUnmapBuffer(gl_target);
+
+ gl_buffer.Release();
+ head.clear();
+ tail.clear();
+}
+
+std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
+ ASSERT(size <= buffer_size);
+
+ OGLSync sync;
+
+ buffer_pos = Common::AlignUp(buffer_pos, alignment);
+ size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
+
+ if (!head.empty() &&
+ (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
+ ASSERT(head.back().sync.handle == 0);
+ head.back().sync.Create();
+ }
+
+ if (buffer_pos + size > buffer_size) {
+ if (!tail.empty()) {
+ std::swap(sync, tail.back().sync);
+ tail.clear();
+ }
+ std::swap(tail, head);
+ buffer_pos = 0;
+ effective_offset = 0;
+ }
+
+ while (!tail.empty() && buffer_pos + size > tail.front().offset) {
+ std::swap(sync, tail.front().sync);
+ tail.pop_front();
+ }
+
+ if (sync.handle != 0) {
+ glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
+ sync.Release();
+ }
+
+ if (head.empty() || effective_offset > head.back().offset) {
+ head.emplace_back();
+ head.back().offset = effective_offset;
+ }
+
+ mapped_size = size;
+ return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos));
+}
+
+void StorageBuffer::Unmap() {
+ glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
+ static_cast<GLsizeiptr>(mapped_size));
+ buffer_pos += mapped_size;
+}
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
new file mode 100644
index 000000000..4bc2f52e0
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -0,0 +1,34 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <glad/glad.h>
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+class OGLStreamBuffer : private NonCopyable {
+public:
+ explicit OGLStreamBuffer(GLenum target);
+ virtual ~OGLStreamBuffer() = default;
+
+public:
+ static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
+
+ virtual void Create(size_t size, size_t sync_subdivide) = 0;
+ virtual void Release() {}
+
+ GLuint GetHandle() const;
+
+ virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0;
+ virtual void Unmap() = 0;
+
+protected:
+ OGLBuffer gl_buffer;
+ GLenum gl_target;
+
+ size_t buffer_pos = 0;
+ size_t buffer_size = 0;
+ size_t buffer_sync_subdivide = 0;
+ size_t mapped_size = 0;
+};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 50396b5c1..1a24855d7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -20,6 +20,7 @@
#include "core/settings.h"
#include "core/tracer/recorder.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/utils.h"
#include "video_core/video_core.h"
static const char vertex_shader[] = R"(
@@ -98,22 +99,22 @@ RendererOpenGL::RendererOpenGL() = default;
RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
-void RendererOpenGL::SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) {
+void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.Apply();
- if (framebuffer_info != boost::none) {
- // If framebuffer_info is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer_info->width ||
- screen_info.texture.height != (GLsizei)framebuffer_info->height ||
- screen_info.texture.pixel_format != framebuffer_info->pixel_format) {
+ if (framebuffer != boost::none) {
+ // If framebuffer is provided, reload it from memory to a texture
+ if (screen_info.texture.width != (GLsizei)framebuffer->width ||
+ screen_info.texture.height != (GLsizei)framebuffer->height ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
- ConfigureFramebufferTexture(screen_info.texture, *framebuffer_info);
+ ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
- LoadFBToScreenInfo(*framebuffer_info, screen_info);
+ LoadFBToScreenInfo(*framebuffer, screen_info);
}
DrawScreens();
@@ -131,162 +132,59 @@ void RendererOpenGL::SwapBuffers(boost::optional<const FramebufferInfo&> framebu
RefreshRasterizerSetting();
}
-static inline u32 MortonInterleave128(u32 x, u32 y) {
- // 128x128 Z-Order coordinate from 2D coordinates
- static constexpr u32 xlut[] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
- 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
- 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
- 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
- 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
- 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
- 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
- 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
- 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
- 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
- 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
- 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
- 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
- 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
- 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
- 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
- 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
- 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
- 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
- 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
- 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
- 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
- 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
- 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
- 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
- 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
- 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
- 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
- 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
- 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
- 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
- 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
- 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
- };
- static constexpr u32 ylut[] = {
- 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
- 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
- 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
- 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
- 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
- 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
- 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
- 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
- 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
- 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
- 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
- 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
- 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
- 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
- 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
- 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
- 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
- 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
- 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
- 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
- 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
- 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
- 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
- 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
- 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
- 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
- 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
- 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
- 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
- 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
- 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
- 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
- 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
- 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
- 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
- };
- return xlut[x % 128] + ylut[y % 128];
-}
-
-static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
- // Calculates the offset of the position of the pixel in Morton order
- // Framebuffer images are split into 128x128 tiles.
-
- const unsigned int block_height = 128;
- const unsigned int coarse_x = x & ~127;
-
- u32 i = MortonInterleave128(x, y);
-
- const unsigned int offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
-static void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel,
- u8* morton_data, u8* gl_data, bool morton_to_gl) {
- u8* data_ptrs[2];
- for (unsigned y = 0; y < height; ++y) {
- for (unsigned x = 0; x < width; ++x) {
- const u32 coarse_y = y & ~127;
- u32 morton_offset =
- GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
-
- data_ptrs[morton_to_gl] = morton_data + morton_offset;
- data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
-
- memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- }
-}
-
/**
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
-void RendererOpenGL::LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info,
+void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer,
ScreenInfo& screen_info) {
- const u32 bpp{FramebufferInfo::BytesPerPixel(framebuffer_info.pixel_format)};
- const u32 size_in_bytes{framebuffer_info.stride * framebuffer_info.height * bpp};
+ const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
+ const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
+ const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- MortonCopyPixels128(framebuffer_info.width, framebuffer_info.height, bpp, 4,
- Memory::GetPointer(framebuffer_info.address), gl_framebuffer_data.data(),
- true);
+ // TODO(bunnei): The framebuffer region should only be invalidated if it is written to, not
+ // every frame. When we find the right place for this, the below line can be removed.
+ Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
+ Memory::FlushMode::Invalidate);
- LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%llx(%dx%d), fmt %x", size_in_bytes,
- framebuffer_info.address, framebuffer_info.width, framebuffer_info.height,
- (int)framebuffer_info.pixel_format);
+ // Framebuffer orientation handling
+ framebuffer_transform_flags = framebuffer.transform_flags;
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
// only allows rows to have a memory alignement of 4.
- ASSERT(framebuffer_info.stride % 4 == 0);
+ ASSERT(framebuffer.stride % 4 == 0);
- // Reset the screen info's display texture to its own permanent texture
- screen_info.display_texture = screen_info.texture.resource.handle;
- screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
+ if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride,
+ screen_info)) {
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
+ screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
- Memory::RasterizerFlushRegion(framebuffer_info.address, size_in_bytes);
+ Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes);
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
- state.Apply();
+ VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
+ Memory::GetPointer(framebuffer_addr),
+ gl_framebuffer_data.data(), true);
- glActiveTexture(GL_TEXTURE0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer_info.stride);
+ state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.Apply();
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
- // they differ from the LCD resolution.
- // TODO: Applications could theoretically crash Citra here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer_info.width, framebuffer_info.height,
- screen_info.texture.gl_format, screen_info.texture.gl_type,
- gl_framebuffer_data.data());
+ glActiveTexture(GL_TEXTURE0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
+ // they differ from the LCD resolution.
+ // TODO: Applications could theoretically crash yuzu here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
+ screen_info.texture.gl_format, screen_info.texture.gl_type,
+ gl_framebuffer_data.data());
- state.texture_units[0].texture_2d = 0;
- state.Apply();
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+ state.texture_units[0].texture_2d = 0;
+ state.Apply();
+ }
}
/**
@@ -316,7 +214,7 @@ void RendererOpenGL::InitOpenGLObjects() {
0.0f);
// Link shaders and get variable locations
- shader.Create(vertex_shader, fragment_shader);
+ shader.Create(vertex_shader, nullptr, fragment_shader);
state.draw.shader_program = shader.handle;
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
@@ -370,14 +268,14 @@ void RendererOpenGL::InitOpenGLObjects() {
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
- const FramebufferInfo& framebuffer_info) {
+ const Tegra::FramebufferConfig& framebuffer) {
- texture.width = framebuffer_info.width;
- texture.height = framebuffer_info.height;
+ texture.width = framebuffer.width;
+ texture.height = framebuffer.height;
GLint internal_format;
- switch (framebuffer_info.pixel_format) {
- case FramebufferInfo::PixelFormat::ABGR8:
+ switch (framebuffer.pixel_format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
// Use RGBA8 and swap in the fragment shader
internal_format = GL_RGBA;
texture.gl_format = GL_RGBA;
@@ -401,13 +299,26 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w,
float h) {
- auto& texcoords = screen_info.display_texcoords;
+ const auto& texcoords = screen_info.display_texcoords;
+ auto left = texcoords.left;
+ auto right = texcoords.right;
+ if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset)
+ if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
+ // Flip the framebuffer vertically
+ left = texcoords.right;
+ right = texcoords.left;
+ } else {
+ // Other transformations are unsupported
+ LOG_CRITICAL(HW_GPU, "unsupported framebuffer_transform_flags=%d",
+ framebuffer_transform_flags);
+ UNIMPLEMENTED();
+ }
std::array<ScreenRectVertex, 4> vertices = {{
- ScreenRectVertex(x, y, texcoords.top, texcoords.right),
- ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right),
- ScreenRectVertex(x, y + h, texcoords.top, texcoords.left),
- ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.left),
+ ScreenRectVertex(x, y, texcoords.top, right),
+ ScreenRectVertex(x + w, y, texcoords.bottom, right),
+ ScreenRectVertex(x, y + h, texcoords.top, left),
+ ScreenRectVertex(x + w, y + h, texcoords.bottom, left),
}};
state.texture_units[0].texture_2d = screen_info.display_texture;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index db6c355a5..29516baf4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -21,7 +21,7 @@ struct TextureInfo {
GLsizei height;
GLenum gl_format;
GLenum gl_type;
- RendererBase::FramebufferInfo::PixelFormat pixel_format;
+ Tegra::FramebufferConfig::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for each 3DS screen
@@ -37,7 +37,7 @@ public:
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(boost::optional<const FramebufferInfo&> framebuffer_info) override;
+ void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) override;
/**
* Set the emulator window to use for renderer
@@ -53,13 +53,14 @@ public:
private:
void InitOpenGLObjects();
- void ConfigureFramebufferTexture(TextureInfo& texture, const FramebufferInfo& framebuffer_info);
+ void ConfigureFramebufferTexture(TextureInfo& texture,
+ const Tegra::FramebufferConfig& framebuffer);
void DrawScreens();
void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h);
void UpdateFramerate();
// Loads framebuffer from emulated memory into the display information structure
- void LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, ScreenInfo& screen_info);
+ void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info);
// Fills active OpenGL texture with the given RGBA color.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
@@ -86,4 +87,7 @@ private:
// Shader attribute input indices
GLuint attrib_position;
GLuint attrib_tex_coord;
+
+ /// Used for transforming the framebuffer orientation
+ Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
};
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
new file mode 100644
index 000000000..2e87281eb
--- /dev/null
+++ b/src/video_core/textures/decoders.cpp
@@ -0,0 +1,105 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include "common/assert.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Texture {
+
+/**
+ * Calculates the offset of an (x, y) position within a swizzled texture.
+ * Taken from the Tegra X1 TRM.
+ */
+static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, u32 block_height) {
+ u32 image_width_in_gobs = image_width * bytes_per_pixel / 64;
+ u32 GOB_address = 0 + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
+ (x * bytes_per_pixel / 64) * 512 * block_height +
+ (y % (8 * block_height) / 8) * 512;
+ x *= bytes_per_pixel;
+ u32 address = GOB_address + ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
+ (y % 2) * 16 + (x % 16);
+
+ return address;
+}
+
+static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
+ u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
+ u32 block_height) {
+ u8* data_ptrs[2];
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ u32 swizzle_offset = GetSwizzleOffset(x, y, width, bytes_per_pixel, block_height);
+ u32 pixel_index = (x + y * width) * out_bytes_per_pixel;
+
+ data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+ data_ptrs[!unswizzle] = &unswizzled_data[pixel_index];
+
+ std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
+ }
+ }
+}
+
+u32 BytesPerPixel(TextureFormat format) {
+ switch (format) {
+ case TextureFormat::DXT1:
+ // In this case a 'pixel' actually refers to a 4x4 tile.
+ return 8;
+ case TextureFormat::A8R8G8B8:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+}
+
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+ u8* data = Memory::GetPointer(address);
+ u32 bytes_per_pixel = BytesPerPixel(format);
+
+ static constexpr u32 DefaultBlockHeight = 16;
+
+ std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
+
+ switch (format) {
+ case TextureFormat::DXT1:
+ // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+ CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
+ unswizzled_data.data(), true, DefaultBlockHeight);
+ break;
+ case TextureFormat::A8R8G8B8:
+ CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
+ unswizzled_data.data(), true, DefaultBlockHeight);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+
+ return unswizzled_data;
+}
+
+std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
+ u32 height) {
+ std::vector<u8> rgba_data;
+
+ // TODO(Subv): Implement.
+ switch (format) {
+ case TextureFormat::DXT1:
+ case TextureFormat::A8R8G8B8:
+ // TODO(Subv): For the time being just forward the same data without any decoding.
+ rgba_data = texture_data;
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Format not implemented");
+ break;
+ }
+
+ return rgba_data;
+}
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
new file mode 100644
index 000000000..0c21694ff
--- /dev/null
+++ b/src/video_core/textures/decoders.h
@@ -0,0 +1,26 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+namespace Texture {
+
+/**
+ * Unswizzles a swizzled texture without changing its format.
+ */
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+
+/**
+ * Decodes an unswizzled texture into a A8R8G8B8 texture.
+ */
+std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
+ u32 height);
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
new file mode 100644
index 000000000..d969bcdd9
--- /dev/null
+++ b/src/video_core/textures/texture.h
@@ -0,0 +1,61 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+namespace Texture {
+
+enum class TextureFormat : u32 {
+ A8R8G8B8 = 8,
+ DXT1 = 0x24,
+};
+
+union TextureHandle {
+ u32 raw;
+ BitField<0, 20, u32> tic_id;
+ BitField<20, 12, u32> tsc_id;
+};
+
+struct TICEntry {
+ union {
+ u32 raw;
+ BitField<0, 7, TextureFormat> format;
+ BitField<7, 3, u32> r_type;
+ BitField<10, 3, u32> g_type;
+ BitField<13, 3, u32> b_type;
+ BitField<16, 3, u32> a_type;
+ };
+ u32 address_low;
+ u16 address_high;
+ INSERT_PADDING_BYTES(6);
+ u16 width_minus_1;
+ INSERT_PADDING_BYTES(2);
+ u16 height_minus_1;
+ INSERT_PADDING_BYTES(10);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+ }
+
+ u32 Width() const {
+ return width_minus_1 + 1;
+ }
+
+ u32 Height() const {
+ return height_minus_1 + 1;
+ }
+};
+static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
+
+/// Returns the number of bytes per pixel of the input texture format.
+u32 BytesPerPixel(TextureFormat format);
+
+} // namespace Texture
+} // namespace Tegra
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index d8567f314..be0f7e22b 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -49,4 +49,116 @@ static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
return (i + offset) * bytes_per_pixel;
}
-} // namespace
+static inline u32 MortonInterleave128(u32 x, u32 y) {
+ // 128x128 Z-Order coordinate from 2D coordinates
+ static constexpr u32 xlut[] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
+ 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
+ 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
+ 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
+ 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
+ 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
+ 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
+ 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
+ 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
+ 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
+ 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
+ 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
+ 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
+ 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
+ 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
+ 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
+ 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
+ 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
+ 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
+ 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
+ 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
+ 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
+ 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
+ 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
+ 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
+ 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
+ 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
+ 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
+ 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
+ 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
+ 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
+ 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
+ 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
+ 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
+ };
+ static constexpr u32 ylut[] = {
+ 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
+ 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
+ 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
+ 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
+ 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
+ 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
+ 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
+ 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
+ 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
+ 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
+ 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
+ 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
+ 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
+ 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
+ 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
+ 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
+ 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
+ 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
+ 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
+ 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
+ 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
+ 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
+ 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
+ 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
+ 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
+ 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
+ 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
+ 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
+ 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
+ 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
+ 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
+ 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
+ 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
+ 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
+ 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
+ };
+ return xlut[x % 128] + ylut[y % 128];
+}
+
+static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
+ // Calculates the offset of the position of the pixel in Morton order
+ // Framebuffer images are split into 128x128 tiles.
+
+ const unsigned int block_height = 128;
+ const unsigned int coarse_x = x & ~127;
+
+ u32 i = MortonInterleave128(x, y);
+
+ const unsigned int offset = coarse_x * block_height;
+
+ return (i + offset) * bytes_per_pixel;
+}
+
+static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
+ u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
+ bool morton_to_gl) {
+ u8* data_ptrs[2];
+ for (unsigned y = 0; y < height; ++y) {
+ for (unsigned x = 0; x < width; ++x) {
+ const u32 coarse_y = y & ~127;
+ u32 morton_offset =
+ GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
+ u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
+
+ data_ptrs[morton_to_gl] = morton_data + morton_offset;
+ data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
+
+ memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
+ }
+ }
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 106d62562..864691baa 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -39,4 +39,4 @@ void Shutdown() {
LOG_DEBUG(Render, "shutdown OK");
}
-} // namespace
+} // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 0b8785898..37da62436 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -15,6 +15,8 @@ class RendererBase;
namespace VideoCore {
+enum class Renderer { Software, OpenGL };
+
extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
extern EmuWindow* g_emu_window; ///< Emu window
@@ -31,4 +33,4 @@ bool Init(EmuWindow* emu_window);
/// Shutdown the video core
void Shutdown();
-} // namespace
+} // namespace VideoCore