diff options
Diffstat (limited to 'src/video_core')
25 files changed, 1148 insertions, 565 deletions
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index f7214ffec..a01153e0b 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -30,8 +30,7 @@ union CommandHeader { BitField<29, 3, SubmissionMode> mode; }; -static_assert(std::is_standard_layout<CommandHeader>::value == true, - "CommandHeader does not use standard layout"); +static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a46ed4bd7..68f91cc75 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -222,6 +222,18 @@ void Maxwell3D::DrawArrays() { debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); } + // Both instance configuration registers can not be set at the same time. + ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, + "Illegal combination of instancing parameters"); + + if (regs.draw.instance_next) { + // Increment the current instance *before* drawing. + state.current_instance += 1; + } else if (!regs.draw.instance_cont) { + // Reset the current instance to 0. + state.current_instance = 0; + } + const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; rasterizer.AccelerateDrawBatch(is_indexed); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0506ac8fe..771eb5abc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -93,6 +93,7 @@ public: struct VertexAttribute { enum class Size : u32 { + Invalid = 0x0, Size_32_32_32_32 = 0x01, Size_32_32_32 = 0x02, Size_16_16_16_16 = 0x03, @@ -257,6 +258,10 @@ public: bool IsNormalized() const { return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } + + bool IsValid() const { + return size != Size::Invalid; + } }; enum class PrimitiveTopology : u32 { @@ -352,6 +357,27 @@ public: OneMinusConstantColor = 0x62, ConstantAlpha = 0x63, OneMinusConstantAlpha = 0x64, + + // These values are used by Nouveau and some games. + ZeroGL = 0x4000, + OneGL = 0x4001, + SourceColorGL = 0x4300, + OneMinusSourceColorGL = 0x4301, + SourceAlphaGL = 0x4302, + OneMinusSourceAlphaGL = 0x4303, + DestAlphaGL = 0x4304, + OneMinusDestAlphaGL = 0x4305, + DestColorGL = 0x4306, + OneMinusDestColorGL = 0x4307, + SourceAlphaSaturateGL = 0x4308, + ConstantColorGL = 0xc001, + OneMinusConstantColorGL = 0xc002, + ConstantAlphaGL = 0xc003, + OneMinusConstantAlphaGL = 0xc004, + Source1ColorGL = 0xc900, + OneMinusSource1ColorGL = 0xc901, + Source1AlphaGL = 0xc902, + OneMinusSource1AlphaGL = 0xc903, }; u32 separate_alpha; @@ -612,6 +638,8 @@ public: union { u32 vertex_begin_gl; BitField<0, 16, PrimitiveTopology> topology; + BitField<26, 1, u32> instance_next; + BitField<27, 1, u32> instance_cont; }; } draw; @@ -804,6 +832,7 @@ public: }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; + u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. }; State state{}; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 3d4557b7e..9413a81fb 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -12,6 +12,7 @@ #include <boost/optional.hpp> +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" @@ -74,12 +75,14 @@ union Attribute { enum class Index : u64 { Position = 7, Attribute_0 = 8, + Attribute_31 = 39, // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. TessCoordInstanceIDVertexID = 47, - // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. - Unknown_63 = 63, + // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment + // shader. It is unknown what the other values contain. + FrontFacing = 63, }; union { @@ -142,6 +145,7 @@ enum class PredCondition : u64 { NotEqual = 5, GreaterEqual = 6, LessThanWithNan = 9, + GreaterThanWithNan = 12, NotEqualWithNan = 13, // TODO(Subv): Other condition types }; @@ -201,11 +205,24 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class XmadMode : u64 { + None = 0, + CLo = 1, + CHi = 2, + CSfu = 3, + CBcc = 4, +}; + enum class FlowCondition : u64 { Always = 0xF, Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? }; +enum class PredicateResultMode : u64 { + None = 0x0, + NotZero = 0x3, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -246,7 +263,7 @@ union Instruction { BitField<39, 1, u64> invert_a; BitField<40, 1, u64> invert_b; BitField<41, 2, LogicOperation> operation; - BitField<44, 2, u64> unk44; + BitField<44, 2, PredicateResultMode> pred_result_mode; BitField<48, 3, Pred> pred48; } lop; @@ -276,6 +293,10 @@ union Instruction { } alu; union { + BitField<48, 1, u64> negate_b; + } fmul; + + union { BitField<48, 1, u64> is_signed; } shift; @@ -430,16 +451,20 @@ union Instruction { } bool IsComponentEnabled(size_t component) const { - static constexpr std::array<std::array<u32, 8>, 4> mask_lut{ - {{}, - {0x1, 0x2, 0x4, 0x8, 0x3}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}}}; + static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ + {}, + {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, + {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, + {0x7, 0xb, 0xd, 0xe, 0xf}, + }}; size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0; + u32 mask = mask_lut[index][component_mask_selector]; + // A mask of 0 means this instruction uses an unimplemented mask. + ASSERT(mask != 0); + return ((1ull << component) & mask) != 0; } } texs; @@ -458,6 +483,18 @@ union Instruction { } bra; union { + BitField<20, 16, u64> imm20_16; + BitField<36, 1, u64> product_shift_left; + BitField<37, 1, u64> merge_37; + BitField<48, 1, u64> sign_a; + BitField<49, 1, u64> sign_b; + BitField<50, 3, XmadMode> mode; + BitField<52, 1, u64> high_b; + BitField<53, 1, u64> high_a; + BitField<56, 1, u64> merge_56; + } xmad; + + union { BitField<20, 14, u64> offset; BitField<34, 5, u64> index; } cbuf34; @@ -477,8 +514,7 @@ union Instruction { u64 value; }; static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout<Instruction>::value, - "Structure does not have standard layout"); +static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout"); class OpCode { public: @@ -494,6 +530,8 @@ public: LD_A, LD_C, ST_A, + LDG, // Load from global memory + STG, // Store in global memory TEX, TEXQ, // Texture Query TEXS, // Texture Fetch with scalar/non-vec4 source/destinations @@ -595,9 +633,17 @@ public: IntegerSetPredicate, PredicateSetPredicate, Conversion, + Xmad, Unknown, }; + /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be + /// conditionally executed). + static bool IsPredicatedInstruction(Id opcode) { + // TODO(Subv): Add the rest of unpredicated instructions. + return opcode != Id::SSY; + } + class Matcher { public: Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) @@ -697,6 +743,8 @@ private: INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), @@ -777,10 +825,10 @@ private: INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"), + INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), + INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), + INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), + INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), }; #undef INST std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4ff4d71c5..9758adcfd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_compute.h" @@ -11,6 +12,15 @@ namespace Tegra { +u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { + switch (format) { + case PixelFormat::ABGR8: + return 4; + } + + UNREACHABLE(); +} + GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique<MemoryManager>(); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); @@ -34,19 +44,60 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { switch (format) { case RenderTargetFormat::RGBA32_FLOAT: + case RenderTargetFormat::RGBA32_UINT: return 16; + case RenderTargetFormat::RGBA16_UINT: + case RenderTargetFormat::RGBA16_UNORM: case RenderTargetFormat::RGBA16_FLOAT: case RenderTargetFormat::RG32_FLOAT: + case RenderTargetFormat::RG32_UINT: return 8; case RenderTargetFormat::RGBA8_UNORM: + case RenderTargetFormat::RGBA8_SNORM: + case RenderTargetFormat::RGBA8_SRGB: + case RenderTargetFormat::RGBA8_UINT: case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: + case RenderTargetFormat::RG16_UNORM: + case RenderTargetFormat::RG16_SNORM: + case RenderTargetFormat::RG16_UINT: + case RenderTargetFormat::RG16_SINT: + case RenderTargetFormat::RG16_FLOAT: case RenderTargetFormat::R32_FLOAT: case RenderTargetFormat::R11G11B10_FLOAT: + case RenderTargetFormat::R32_UINT: return 4; + case RenderTargetFormat::R16_UNORM: + case RenderTargetFormat::R16_SNORM: + case RenderTargetFormat::R16_UINT: + case RenderTargetFormat::R16_SINT: + case RenderTargetFormat::R16_FLOAT: + case RenderTargetFormat::RG8_UNORM: + case RenderTargetFormat::RG8_SNORM: + return 2; + case RenderTargetFormat::R8_UNORM: + case RenderTargetFormat::R8_UINT: + return 1; default: UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); } } +u32 DepthFormatBytesPerPixel(DepthFormat format) { + switch (format) { + case DepthFormat::Z32_S8_X24_FLOAT: + return 8; + case DepthFormat::Z32_FLOAT: + case DepthFormat::S8_Z24_UNORM: + case DepthFormat::Z24_X8_UNORM: + case DepthFormat::Z24_S8_UNORM: + case DepthFormat::Z24_C8_UNORM: + return 4; + case DepthFormat::Z16_UNORM: + return 2; + default: + UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 874eddd78..2697e1c27 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -6,7 +6,6 @@ #include <memory> #include <unordered_map> -#include <vector> #include "common/common_types.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/memory_manager.h" @@ -21,22 +20,35 @@ enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, + RGBA16_UNORM = 0xC6, + RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, + RG32_UINT = 0xCD, BGRA8_UNORM = 0xCF, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, RGBA8_SRGB = 0xD6, + RGBA8_SNORM = 0xD7, + RGBA8_UINT = 0xD9, RG16_UNORM = 0xDA, RG16_SNORM = 0xDB, RG16_SINT = 0xDC, RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, + RG8_UNORM = 0xEA, + RG8_SNORM = 0xEB, + R16_UNORM = 0xEE, + R16_SNORM = 0xEF, + R16_SINT = 0xF0, + R16_UINT = 0xF1, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, + R8_UINT = 0xF6, }; enum class DepthFormat : u32 { @@ -52,6 +64,9 @@ enum class DepthFormat : u32 { /// Returns the number of bytes per pixel of each rendertarget format. u32 RenderTargetBytesPerPixel(RenderTargetFormat format); +/// Returns the number of bytes per pixel of each depth format. +u32 DepthFormatBytesPerPixel(DepthFormat format); + class DebugContext; /** @@ -65,14 +80,7 @@ struct FramebufferConfig { /** * Returns the number of bytes per pixel. */ - static u32 BytesPerPixel(PixelFormat format) { - switch (format) { - case PixelFormat::ABGR8: - return 4; - } - - UNREACHABLE(); - } + static u32 BytesPerPixel(PixelFormat format); VAddr address; u32 offset; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 3ca350243..afd86a83a 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -4,18 +4,23 @@ #include <memory> #include "core/frontend/emu_window.h" +#include "core/settings.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_rasterizer.h" namespace VideoCore { -RendererBase::RendererBase(EmuWindow& window) : render_window{window} {} +RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} { + RefreshBaseSettings(); +} + RendererBase::~RendererBase() = default; -void RendererBase::UpdateCurrentFramebufferLayout() { - const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); +void RendererBase::RefreshBaseSettings() { + RefreshRasterizerSetting(); + UpdateCurrentFramebufferLayout(); - render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); + renderer_settings.use_framelimiter = Settings::values.toggle_framelimit; } void RendererBase::RefreshRasterizerSetting() { @@ -24,4 +29,10 @@ void RendererBase::RefreshRasterizerSetting() { } } +void RendererBase::UpdateCurrentFramebufferLayout() { + const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); + + render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); +} + } // namespace VideoCore diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 235de23a1..d9f16b8e6 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -4,23 +4,26 @@ #pragma once +#include <atomic> #include <memory> #include <boost/optional.hpp> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" +namespace Core::Frontend { class EmuWindow; +} namespace VideoCore { +struct RendererSettings { + std::atomic_bool use_framelimiter{false}; +}; + class RendererBase : NonCopyable { public: - /// Used to reference a framebuffer - enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; - - explicit RendererBase(EmuWindow& window); + explicit RendererBase(Core::Frontend::EmuWindow& window); virtual ~RendererBase(); /// Swap buffers (render frame) @@ -32,9 +35,6 @@ public: /// Shutdown the renderer virtual void ShutDown() = 0; - /// Updates the framebuffer layout of the contained render window handle. - void UpdateCurrentFramebufferLayout(); - // Getter/setter functions: // ------------------------ @@ -54,13 +54,23 @@ public: return *rasterizer; } - void RefreshRasterizerSetting(); + /// Refreshes the settings common to all renderers + void RefreshBaseSettings(); protected: - EmuWindow& render_window; ///< Reference to the render window handle. + /// Refreshes settings specific to the rasterizer. + void RefreshRasterizerSetting(); + + Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer + + RendererSettings renderer_settings; + +private: + /// Updates the framebuffer layout of the contained render window handle. + void UpdateCurrentFramebufferLayout(); }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8360feb5d..93eadde7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) + : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } - // Create SSBOs - for (size_t stage = 0; stage < ssbos.size(); ++stage) { - for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { - ssbos[stage][buffer].Create(); - state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; - } - } - GLint ext_num; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); for (GLint i = 0; i < ext_num; i++) { const std::string_view extension{ reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; - if (extension == "GL_ARB_buffer_storage") { - has_ARB_buffer_storage = true; - } else if (extension == "GL_ARB_direct_state_access") { + if (extension == "GL_ARB_direct_state_access") { has_ARB_direct_state_access = true; } else if (extension == "GL_ARB_separate_shader_objects") { has_ARB_separate_shader_objects = true; @@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} { hw_vao.Create(); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - - for (unsigned index = 0; index < uniform_buffers.size(); ++index) { - auto& buffer = uniform_buffers[index]; - buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, - GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); glEnable(GL_BLEND); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } -RasterizerOpenGL::~RasterizerOpenGL() { - if (stream_buffer != nullptr) { - state.draw.vertex_buffer = stream_buffer->GetHandle(); - state.Apply(); - stream_buffer->Release(); - } -} +RasterizerOpenGL::~RasterizerOpenGL() {} std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); // Upload all guest vertex arrays sequentially to our buffer @@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ASSERT(end > start); u64 size = end - start + 1; - // Copy vertex array data - Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); + GLintptr vertex_buffer_offset; + std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = + UploadMemory(array_ptr, buffer_offset, start, size); // Bind the vertex array to the buffer at the current offset. - glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); + glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, + vertex_array.stride); - ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); - - array_ptr += size; - buffer_offset += size; + ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported"); } // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. @@ -161,11 +135,16 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; + + // Ignore invalid attributes. + if (!attrib.IsValid()) + continue; + + auto& buffer = regs.vertex_array[attrib.buffer]; LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); - auto& buffer = regs.vertex_array[attrib.buffer]; ASSERT(buffer.IsEnabled()); glEnableVertexAttribArray(index); @@ -196,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program return program_code; } -void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { - // Helper function for uploading uniform data - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - +std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. - u32 current_constbuffer_bindpoint = uniform_buffers.size(); + u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -223,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { continue; } + std::tie(buffer_ptr, buffer_offset) = + AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment)); + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); - // Flush the buffer so that the GPU can see the data we just wrote. - glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); - - // Upload uniform data as one UBO per stage - const GLintptr ubo_offset = buffer_offset; - copy_buffer(uniform_buffers[stage].handle, ubo_offset, - sizeof(GLShader::MaxwellUniformData)); + // Bind the buffer + glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, + sizeof(ubo)); - buffer_ptr += sizeof(GLShader::MaxwellUniformData); - buffer_offset += sizeof(GLShader::MaxwellUniformData); + buffer_ptr += sizeof(ubo); + buffer_offset += sizeof(ubo); GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; @@ -277,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { static_cast<Maxwell::ShaderStage>(stage)); // Configure the const buffers for this shader stage. - current_constbuffer_bindpoint = - SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, - current_constbuffer_bindpoint, shader_resources.const_buffer_entries); + std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( + buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, + current_constbuffer_bindpoint, shader_resources.const_buffer_entries); // Configure the textures for this shader stage. current_texture_bindpoint = @@ -294,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { } shader_program_manager->UseTrivialGeometryShader(); + + return {buffer_ptr, buffer_offset}; } size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -427,6 +397,31 @@ void RasterizerOpenGL::Clear() { } } +std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, + size_t alignment) { + // Align the offset, not the mapped pointer + GLintptr offset_aligned = + static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); + return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; +} + +std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr, + GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, + size_t size, size_t alignment) { + std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); + GLintptr uploaded_offset = buffer_offset; + + const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; + const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + return {buffer_ptr, buffer_offset, uploaded_offset}; +} + void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; @@ -451,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); size_t buffer_size = CalculateVertexArraysSize(); @@ -461,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() { } // Uniform space for the 5 shader stages - buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + - sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; + buffer_size = + Common::AlignUp<size_t>(buffer_size, 4) + + (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; + + // Add space for at least 18 constant buffers + buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); u8* buffer_ptr; GLintptr buffer_offset; - std::tie(buffer_ptr, buffer_offset) = - stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); + std::tie(buffer_ptr, buffer_offset, std::ignore) = + stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); + u8* buffer_ptr_base = buffer_ptr; - u8* offseted_buffer; - std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); - - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); + std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; - const boost::optional<VAddr> index_data_addr{ - memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; - Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); - - index_buffer_offset = buffer_offset; - offseted_buffer += index_buffer_size; - buffer_offset += index_buffer_size; + std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( + buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); } - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); - - SetupShaders(offseted_buffer, buffer_offset); + std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); - stream_buffer->Unmap(); + stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); shader_program_manager->ApplyTo(state); state.Apply(); @@ -642,45 +627,32 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries) { +std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, + u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); - // Reset all buffer draw state for this stage. - for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { - buffer.bindpoint = 0; - buffer.enabled = false; - } - // Upload only the enabled buffers from the 16 constbuffers of each shader stage const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - auto& buffer_draw_state = - state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; if (!buffer.enabled) { continue; } - buffer_draw_state.enabled = true; - buffer_draw_state.bindpoint = current_bindpoint + bindpoint; - - boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); - size_t size = 0; if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size * sizeof(float); + size = buffer.size; if (size > MaxConstbufferSize) { - LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); + LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); size = MaxConstbufferSize; } } else { @@ -693,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - std::vector<u8> data(size); - Memory::ReadBlock(*addr, data.data(), data.size()); + GLintptr const_buffer_offset; + std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = + UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, + static_cast<size_t>(uniform_buffer_alignment)); - glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); - glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_UNIFORM_BUFFER, 0); + glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, + stream_buffer.GetHandle(), const_buffer_offset, size); // Now configure the bindpoint of the buffer inside the shader const std::string buffer_name = used_buffer.GetName(); const GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); if (index != GL_INVALID_INDEX) { - glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); + glUniformBlockBinding(program, index, current_bindpoint + bindpoint); } } state.Apply(); - return current_bindpoint + static_cast<u32>(entries.size()); + return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; } u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6d6d85cc1..74307f626 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <tuple> #include <utility> #include <vector> #include <glad/glad.h> @@ -21,12 +22,15 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class EmuWindow; struct ScreenInfo; +namespace Core::Frontend { +class EmuWindow; +} + class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(EmuWindow& renderer); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer); ~RasterizerOpenGL() override; void DrawArrays() override; @@ -97,9 +101,10 @@ private: * @param entries Vector describing the buffers that are actually used in the guest shader. * @returns The next available bindpoint for use in the next shader stage. */ - u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries); + std::tuple<u8*, GLintptr, u32> SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + GLuint program, u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries); /* * Configures the current textures to use for the draw command. @@ -136,7 +141,6 @@ private: /// Syncs the blend state to match the guest state void SyncBlendState(); - bool has_ARB_buffer_storage = false; bool has_ARB_direct_state_access = false; bool has_ARB_separate_shader_objects = false; bool has_ARB_vertex_attrib_binding = false; @@ -145,29 +149,31 @@ private: RasterizerCacheOpenGL res_cache; - EmuWindow& emu_window; + Core::Frontend::EmuWindow& emu_window; std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; - std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, - Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> - ssbos; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - std::unique_ptr<OGLStreamBuffer> stream_buffer; + OGLStreamBuffer stream_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; size_t CalculateVertexArraysSize() const; std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); - std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; + std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); + + std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); - void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); + std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, size_t size, + size_t alignment = 4); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9fb734b77..fb7476fb8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -92,13 +92,18 @@ struct FormatTuple { } static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U + {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + false}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U + {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI @@ -112,16 +117,20 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN2UNORM {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM + {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U + {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI @@ -129,15 +138,21 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI + + // Depth formats + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, + false}, // Z16 // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, false}, // Z24S8 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // S8Z24 - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, - false}, // Z16 + false}, // S8Z24 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, ComponentType::Float, false}, // Z32FS8 }}; @@ -228,35 +243,73 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, + // clang-format off + MortonCopy<true, PixelFormat::ABGR8U>, + MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::ABGR8UI>, + MortonCopy<true, PixelFormat::B5G6R5U>, + MortonCopy<true, PixelFormat::A2B10G10R10U>, + MortonCopy<true, PixelFormat::A1B5G5R5U>, + MortonCopy<true, PixelFormat::R8U>, + MortonCopy<true, PixelFormat::R8UI>, + MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::RGBA16U>, + MortonCopy<true, PixelFormat::RGBA16UI>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, + MortonCopy<true, PixelFormat::RGBA32UI>, + MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::DXN2UNORM>, + MortonCopy<true, PixelFormat::DXN2SNORM>, + MortonCopy<true, PixelFormat::BC7U>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4>, + MortonCopy<true, PixelFormat::G8R8U>, + MortonCopy<true, PixelFormat::G8R8S>, + MortonCopy<true, PixelFormat::BGRA8>, + MortonCopy<true, PixelFormat::RGBA32F>, + MortonCopy<true, PixelFormat::RG32F>, + MortonCopy<true, PixelFormat::R32F>, + MortonCopy<true, PixelFormat::R16F>, + MortonCopy<true, PixelFormat::R16U>, + MortonCopy<true, PixelFormat::R16S>, + MortonCopy<true, PixelFormat::R16UI>, + MortonCopy<true, PixelFormat::R16I>, + MortonCopy<true, PixelFormat::RG16>, + MortonCopy<true, PixelFormat::RG16F>, + MortonCopy<true, PixelFormat::RG16UI>, + MortonCopy<true, PixelFormat::RG16I>, + MortonCopy<true, PixelFormat::RG16S>, + MortonCopy<true, PixelFormat::RGB32F>, + MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RG8U>, + MortonCopy<true, PixelFormat::RG8S>, + MortonCopy<true, PixelFormat::RG32UI>, + MortonCopy<true, PixelFormat::R32UI>, + MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, + MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32FS8>, + // clang-format on }; static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8>, - MortonCopy<false, PixelFormat::B5G6R5>, - MortonCopy<false, PixelFormat::A2B10G10R10>, - MortonCopy<false, PixelFormat::A1B5G5R5>, - MortonCopy<false, PixelFormat::R8>, + // clang-format off + MortonCopy<false, PixelFormat::ABGR8U>, + MortonCopy<false, PixelFormat::ABGR8S>, + MortonCopy<false, PixelFormat::ABGR8UI>, + MortonCopy<false, PixelFormat::B5G6R5U>, + MortonCopy<false, PixelFormat::A2B10G10R10U>, + MortonCopy<false, PixelFormat::A1B5G5R5U>, + MortonCopy<false, PixelFormat::R8U>, + MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16U>, + MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not @@ -269,13 +322,17 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::G8R8>, + MortonCopy<false, PixelFormat::G8R8U>, + MortonCopy<false, PixelFormat::G8R8S>, MortonCopy<false, PixelFormat::BGRA8>, MortonCopy<false, PixelFormat::RGBA32F>, MortonCopy<false, PixelFormat::RG32F>, MortonCopy<false, PixelFormat::R32F>, MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16UNORM>, + MortonCopy<false, PixelFormat::R16U>, + MortonCopy<false, PixelFormat::R16S>, + MortonCopy<false, PixelFormat::R16UI>, + MortonCopy<false, PixelFormat::R16I>, MortonCopy<false, PixelFormat::RG16>, MortonCopy<false, PixelFormat::RG16F>, MortonCopy<false, PixelFormat::RG16UI>, @@ -283,11 +340,16 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, MortonCopy<false, PixelFormat::SRGBA8>, - MortonCopy<false, PixelFormat::Z24S8>, - MortonCopy<false, PixelFormat::S8Z24>, + MortonCopy<false, PixelFormat::RG8U>, + MortonCopy<false, PixelFormat::RG8S>, + MortonCopy<false, PixelFormat::RG32UI>, + MortonCopy<false, PixelFormat::R32UI>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, + MortonCopy<false, PixelFormat::Z24S8>, + MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32FS8>, + // clang-format on }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -407,7 +469,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)}; + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { const size_t offset{bpp * (y * width + x)}; @@ -439,7 +501,8 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma ConvertS8Z24ToZ24S8(data, width, height); break; - case PixelFormat::G8R8: + case PixelFormat::G8R8U: + case PixelFormat::G8R8S: // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. ConvertG8R8ToR8G8(data, width, height); break; @@ -730,8 +793,6 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // Verify surface is compatible for blitting const auto& params{surface->GetSurfaceParams()}; ASSERT(params.type == new_params.type); - ASSERT(params.pixel_format == new_params.pixel_format); - ASSERT(params.component_type == new_params.component_type); // Create a new surface with the new parameters, and blit the previous surface to it Surface new_surface{std::make_shared<CachedSurface>(new_params)}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 829a76dfe..fc8b44219 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -23,45 +23,62 @@ using PageMap = boost::icl::interval_map<u64, int>; struct SurfaceParams { enum class PixelFormat { - ABGR8 = 0, - B5G6R5 = 1, - A2B10G10R10 = 2, - A1B5G5R5 = 3, - R8 = 4, - RGBA16F = 5, - R11FG11FB10F = 6, - RGBA32UI = 7, - DXT1 = 8, - DXT23 = 9, - DXT45 = 10, - DXN1 = 11, // This is also known as BC4 - DXN2UNORM = 12, - DXN2SNORM = 13, - BC7U = 14, - ASTC_2D_4X4 = 15, - G8R8 = 16, - BGRA8 = 17, - RGBA32F = 18, - RG32F = 19, - R32F = 20, - R16F = 21, - R16UNORM = 22, - RG16 = 23, - RG16F = 24, - RG16UI = 25, - RG16I = 26, - RG16S = 27, - RGB32F = 28, - SRGBA8 = 29, + ABGR8U = 0, + ABGR8S = 1, + ABGR8UI = 2, + B5G6R5U = 3, + A2B10G10R10U = 4, + A1B5G5R5U = 5, + R8U = 6, + R8UI = 7, + RGBA16F = 8, + RGBA16U = 9, + RGBA16UI = 10, + R11FG11FB10F = 11, + RGBA32UI = 12, + DXT1 = 13, + DXT23 = 14, + DXT45 = 15, + DXN1 = 16, // This is also known as BC4 + DXN2UNORM = 17, + DXN2SNORM = 18, + BC7U = 19, + ASTC_2D_4X4 = 20, + G8R8U = 21, + G8R8S = 22, + BGRA8 = 23, + RGBA32F = 24, + RG32F = 25, + R32F = 26, + R16F = 27, + R16U = 28, + R16S = 29, + R16UI = 30, + R16I = 31, + RG16 = 32, + RG16F = 33, + RG16UI = 34, + RG16I = 35, + RG16S = 36, + RGB32F = 37, + SRGBA8 = 38, + RG8U = 39, + RG8S = 40, + RG32UI = 41, + R32UI = 42, MaxColorFormat, + // Depth formats + Z32F = 43, + Z16 = 44, + + MaxDepthFormat, + // DepthStencil formats - Z24S8 = 30, - S8Z24 = 31, - Z32F = 32, - Z16 = 33, - Z32FS8 = 34, + Z24S8 = 45, + S8Z24 = 46, + Z32FS8 = 47, MaxDepthStencilFormat, @@ -99,12 +116,17 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ - 1, // ABGR8 - 1, // B5G6R5 - 1, // A2B10G10R10 - 1, // A1B5G5R5 - 1, // R8 + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI 4, // DXT1 @@ -115,13 +137,17 @@ struct SurfaceParams { 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 - 1, // G8R8 + 1, // G8R8U + 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F 1, // R32F 1, // R16F - 1, // R16UNORM + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I 1, // RG16 1, // RG16F 1, // RG16UI @@ -129,10 +155,14 @@ struct SurfaceParams { 1, // RG16S 1, // RGB32F 1, // SRGBA8 - 1, // Z24S8 - 1, // S8Z24 + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI 1, // Z32F 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 1, // Z32FS8 }}; @@ -145,12 +175,17 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8 - 16, // B5G6R5 - 32, // A2B10G10R10 - 16, // A1B5G5R5 - 8, // R8 + 32, // ABGR8U + 32, // ABGR8S + 32, // ABGR8UI + 16, // B5G6R5U + 32, // A2B10G10R10U + 16, // A1B5G5R5U + 8, // R8U + 8, // R8UI 64, // RGBA16F + 64, // RGBA16U + 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI 64, // DXT1 @@ -161,13 +196,17 @@ struct SurfaceParams { 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 - 16, // G8R8 + 16, // G8R8U + 16, // G8R8S 32, // BGRA8 128, // RGBA32F 64, // RG32F 32, // R32F 16, // R16F - 16, // R16UNORM + 16, // R16U + 16, // R16S + 16, // R16UI + 16, // R16I 32, // RG16 32, // RG16F 32, // RG16UI @@ -175,10 +214,14 @@ struct SurfaceParams { 32, // RG16S 96, // RGB32F 32, // SRGBA8 - 32, // Z24S8 - 32, // S8Z24 + 16, // RG8U + 16, // RG8S + 64, // RG32UI + 32, // R32UI 32, // Z32F 16, // Z16 + 32, // Z24S8 + 32, // S8Z24 64, // Z32FS8 }}; @@ -214,13 +257,21 @@ struct SurfaceParams { // gamma. case Tegra::RenderTargetFormat::RGBA8_SRGB: case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; + case Tegra::RenderTargetFormat::RGBA8_SNORM: + return PixelFormat::ABGR8S; + case Tegra::RenderTargetFormat::RGBA8_UINT: + return PixelFormat::ABGR8UI; case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10; + return PixelFormat::A2B10G10R10U; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UNORM: + return PixelFormat::RGBA16U; + case Tegra::RenderTargetFormat::RGBA16_UINT: + return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: return PixelFormat::RGBA32F; case Tegra::RenderTargetFormat::RG32_FLOAT: @@ -228,11 +279,13 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5; + return PixelFormat::B5G6R5U; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8; + return PixelFormat::R8U; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8UI; case Tegra::RenderTargetFormat::RG16_FLOAT: return PixelFormat::RG16F; case Tegra::RenderTargetFormat::RG16_UINT: @@ -243,10 +296,26 @@ struct SurfaceParams { return PixelFormat::RG16; case Tegra::RenderTargetFormat::RG16_SNORM: return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_UNORM: + return PixelFormat::RG8U; + case Tegra::RenderTargetFormat::RG8_SNORM: + return PixelFormat::RG8S; case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R16_UNORM: + return PixelFormat::R16U; + case Tegra::RenderTargetFormat::R16_SNORM: + return PixelFormat::R16S; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16UI; + case Tegra::RenderTargetFormat::R16_SINT: + return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32UI; + case Tegra::RenderTargetFormat::RG32_UINT: + return PixelFormat::RG32UI; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -258,21 +327,79 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::ABGR8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::ABGR8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::ABGR8S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::ABGR8UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::B5G6R5: - return PixelFormat::B5G6R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::B5G6R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A2B10G10R10: - return PixelFormat::A2B10G10R10; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A2B10G10R10U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A1B5G5R5: - return PixelFormat::A1B5G5R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A1B5G5R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R8: - return PixelFormat::R8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R8U; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R8UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::G8R8: - return PixelFormat::G8R8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::G8R8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::G8R8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16_G16_B16_A16: - return PixelFormat::RGBA16F; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::RGBA16U; + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGBA16F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BF10GF11RF11: - return PixelFormat::R11FG11FB10F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R11FG11FB10F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -284,23 +411,53 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32: - return PixelFormat::RG32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32: - return PixelFormat::RGB32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGB32F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::R16F; case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R16UNORM; + return PixelFormat::R16U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::R16S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R16UI; + case Tegra::Texture::ComponentType::SINT: + return PixelFormat::R16I; } LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32: - return PixelFormat::R32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; + case Tegra::Texture::TextureFormat::Z16: + return PixelFormat::Z16; case Tegra::Texture::TextureFormat::Z24S8: return PixelFormat::Z24S8; case Tegra::Texture::TextureFormat::DXT1: @@ -376,9 +533,15 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::RG8_UNORM: + case Tegra::RenderTargetFormat::RGBA16_UNORM: return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: + case Tegra::RenderTargetFormat::R16_SNORM: + case Tegra::RenderTargetFormat::RG8_SNORM: return ComponentType::SNorm; case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: @@ -389,9 +552,16 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R32_FLOAT: return ComponentType::Float; case Tegra::RenderTargetFormat::RGBA32_UINT: + case Tegra::RenderTargetFormat::RGBA16_UINT: case Tegra::RenderTargetFormat::RG16_UINT: + case Tegra::RenderTargetFormat::R8_UINT: + case Tegra::RenderTargetFormat::R16_UINT: + case Tegra::RenderTargetFormat::RG32_UINT: + case Tegra::RenderTargetFormat::R32_UINT: + case Tegra::RenderTargetFormat::RGBA8_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: + case Tegra::RenderTargetFormat::R16_SINT: return ComponentType::SInt; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); @@ -402,7 +572,7 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -429,6 +599,10 @@ struct SurfaceParams { return SurfaceType::ColorTexture; } + if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) { + return SurfaceType::Depth; + } + if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { return SurfaceType::DepthStencil; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 32f06f409..57cf9f213 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -141,6 +141,15 @@ private: ExitMethod jmp = Scan(target, end, labels); return exit_method = ParallelExit(no_jmp, jmp); } + case OpCode::Id::SSY: { + // The SSY instruction uses a similar encoding as the BRA instruction. + ASSERT_MSG(instr.bra.constant_buffer == 0, + "Constant buffer SSY is not supported"); + u32 target = offset + instr.bra.GetBranchTarget(); + labels.insert(target); + // Continue scanning for an exit method. + break; + } } } } @@ -347,35 +356,43 @@ public: * @param reg The register to use as the source value. */ void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { - std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem); + std::string dest = GetOutputAttribute(attribute); std::string src = GetRegisterAsFloat(reg); - shader.AddLine(dest + " = " + src + ';'); + + if (!dest.empty()) { + // Can happen with unknown/unimplemented output attributes, in which case we ignore the + // instruction for now. + shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + } } /// Generates code representing a uniform (C buffer) register, interpreted as the input type. - std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { + std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, + Register::Size size = Register::Size::Word) { declr_const_buffers[index].MarkAsUsed(index, offset, stage); std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + std::to_string(offset % 4) + ']'; if (type == GLSLRegister::Type::Float) { - return value; + // Do nothing, default } else if (type == GLSLRegister::Type::Integer) { - return "floatBitsToInt(" + value + ')'; + value = "floatBitsToInt(" + value + ')'; + } else if (type == GLSLRegister::Type::UnsignedInteger) { + value = "floatBitsToUint(" + value + ')'; } else { UNREACHABLE(); } + + return ConvertIntegerSize(value, size); } - std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, + std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, GLSLRegister::Type type) { - declr_const_buffers[index].MarkAsUsedIndirect(index, stage); - - std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + - std::to_string(offset) + ") / 4)"; + declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); - std::string value = - 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; + std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); + std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -524,21 +541,21 @@ private: // vertex shader, and what's the value of the fourth element when inside a Tess Eval // shader. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); - return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; - case Attribute::Index::Unknown_63: - // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. - LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63"); - UNREACHABLE(); - break; + return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; - if (attribute >= Attribute::Index::Attribute_0) { + if (attribute >= Attribute::Index::Attribute_0 && + attribute <= Attribute::Index::Attribute_31) { declr_input_attribute.insert(attribute); return "input_attribute_" + std::to_string(index); } - LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute)); UNREACHABLE(); } @@ -560,6 +577,7 @@ private: LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); UNREACHABLE(); + return {}; } } @@ -689,10 +707,11 @@ private: const std::string& op_a, const std::string& op_b) const { using Tegra::Shader::PredCondition; static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { - {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, - {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, - {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, - {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="}, + {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, + {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, + {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="}, + {PredCondition::GreaterThanWithNan, ">"}, }; const auto& comparison{PredicateComparisonStrings.find(condition)}; @@ -701,7 +720,8 @@ private: std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; if (condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan) { + condition == PredCondition::NotEqualWithNan || + condition == PredCondition::GreaterThanWithNan) { predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; } @@ -727,6 +747,30 @@ private: return op->second; } + /** + * Transforms the input string GLSL operand into one that applies the abs() function and negates + * the output if necessary. When both abs and neg are true, the negation will be applied after + * taking the absolute value. + * @param operand The input operand to take the abs() of, negate, or both. + * @param abs Whether to apply the abs() function to the input operand. + * @param neg Whether to negate the input operand. + * @returns String corresponding to the operand after being transformed by the abs() and + * negation operations. + */ + static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) { + std::string result = operand; + + if (abs) { + result = "abs(" + result + ')'; + } + + if (neg) { + result = "-(" + result + ')'; + } + + return result; + } + /* * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -740,28 +784,51 @@ private: } void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, - const std::string& op_b) { + const std::string& op_b, + Tegra::Shader::PredicateResultMode predicate_mode, + Tegra::Shader::Pred predicate) { + std::string result{}; switch (logic_op) { case LogicOperation::And: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1); + result = '(' + op_a + " & " + op_b + ')'; break; } case LogicOperation::Or: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1); + result = '(' + op_a + " | " + op_b + ')'; break; } case LogicOperation::Xor: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1); + result = '(' + op_a + " ^ " + op_b + ')'; break; } case LogicOperation::PassB: { - regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1); + result = op_b; break; } default: LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op)); UNREACHABLE(); } + + if (dest != Tegra::Shader::Register::ZeroIndex) { + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + } + + using Tegra::Shader::PredicateResultMode; + // Write the predicate value depending on the predicate mode. + switch (predicate_mode) { + case PredicateResultMode::None: + // Do nothing. + return; + case PredicateResultMode::NotZero: + // Set the predicate to true if the result is not zero. + SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); + break; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}", + static_cast<u32>(predicate_mode)); + UNREACHABLE(); + } } void WriteTexsInstruction(const Instruction& instr, const std::string& coord, @@ -799,6 +866,33 @@ private: shader.AddLine('}'); } + /* + * Emits code to push the input target address to the SSY address stack, incrementing the stack + * top. + */ + void EmitPushToSSYStack(u32 target) { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;"); + shader.AddLine("ssy_stack_top++;"); + --shader.scope; + shader.AddLine('}'); + } + + /* + * Emits code to pop an address from the SSY address stack, setting the jump address to the + * popped address and decrementing the stack top. + */ + void EmitPopFromSSYStack() { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack_top--;"); + shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];"); + shader.AddLine("break;"); + --shader.scope; + shader.AddLine('}'); + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -828,7 +922,11 @@ private: ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, "NeverExecute predicate not implemented"); - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + // Some instructions (like SSY) don't have a predicate field, they are always + // unconditionally executed. + bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId()); + + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { shader.AddLine("if (" + GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + ')'); @@ -839,13 +937,6 @@ private: switch (opcode->GetType()) { case OpCode::Type::Arithmetic: { std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - if (instr.alu.abs_a) { - op_a = "abs(" + op_a + ')'; - } - - if (instr.alu.negate_a) { - op_a = "-(" + op_a + ')'; - } std::string op_b; @@ -860,17 +951,10 @@ private: } } - if (instr.alu.abs_b) { - op_b = "abs(" + op_b + ')'; - } - - if (instr.alu.negate_b) { - op_b = "-(" + op_b + ')'; - } - switch (opcode->GetId()) { case OpCode::Id::MOV_C: case OpCode::Id::MOV_R: { + // MOV does not have neither 'abs' nor 'neg' bits. regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); break; } @@ -878,6 +962,8 @@ private: case OpCode::Id::FMUL_C: case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { + // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. + op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.saturate_d); break; @@ -885,11 +971,14 @@ private: case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.saturate_d); break; } case OpCode::Id::MUFU: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, @@ -929,6 +1018,9 @@ private: case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); + std::string condition = GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); std::string parameters = op_a + ',' + op_b; @@ -942,7 +1034,7 @@ private: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { // Currently RRO is only implemented as a register move. - // Usage of `abs_b` and `negate_b` here should also be correct. + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; @@ -1079,7 +1171,9 @@ private: if (instr.alu.lop32i.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, + Tegra::Shader::PredicateResultMode::None, + Tegra::Shader::Pred::UnusedIndex); break; } default: { @@ -1145,16 +1239,14 @@ private: case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented"); - ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented"); - if (instr.alu.lop.invert_a) op_a = "~(" + op_a + ')'; if (instr.alu.lop.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } case OpCode::Id::IMNMX_C: @@ -1219,8 +1311,6 @@ private: break; } case OpCode::Type::Conversion: { - ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); - switch (opcode->GetId()) { case OpCode::Id::I2I_R: { ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); @@ -1232,20 +1322,41 @@ private: op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); break; } - case OpCode::Id::I2F_R: { + case OpCode::Id::I2F_R: + case OpCode::Id::I2F_C: { ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); - std::string op_a = regs.GetRegisterAsInteger( - instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); + + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = + regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, + instr.conversion.src_size); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.conversion.is_input_signed + ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger, + instr.conversion.src_size); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; } @@ -1254,6 +1365,14 @@ private: ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + if (instr.conversion.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2f.rounding) { case Tegra::Shader::F2fRoundingOp::None: break; @@ -1276,21 +1395,29 @@ private: break; } - if (instr.conversion.abs_a) { - op_a = "abs(" + op_a + ')'; - } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); break; } - case OpCode::Id::F2I_R: { + case OpCode::Id::F2I_R: + case OpCode::Id::F2I_C: { ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); - std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = regs.GetRegisterAsFloat(instr.gpr20); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2i.rounding) { case Tegra::Shader::F2iRoundingOp::None: break; @@ -1338,11 +1465,16 @@ private: case OpCode::Id::LD_C: { ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine("{"); + ++shader.scope; + + shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); + std::string op_a = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8, - GLSLRegister::Type::Float); - std::string op_b = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8, + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", GLSLRegister::Type::Float); switch (instr.ld_c.type.Value()) { @@ -1350,16 +1482,22 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; - case Tegra::Shader::UniformType::Double: + case Tegra::Shader::UniformType::Double: { + std::string op_b = + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, + "index", GLSLRegister::Type::Float); regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); break; - + } default: LOG_CRITICAL(HW_GPU, "Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); UNREACHABLE(); } + + --shader.scope; + shader.AddLine("}"); break; } case OpCode::Id::ST_A: { @@ -1615,6 +1753,99 @@ private: } break; } + case OpCode::Type::Xmad: { + ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); + ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); + + std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; + std::string op_b; + std::string op_c; + + // TODO(bunnei): Needs to be fixed once op_a or op_b is signed + ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented"); + const bool is_signed{instr.xmad.sign_a == 1}; + + bool is_merge{}; + switch (opcode->GetId()) { + case OpCode::Id::XMAD_CR: { + is_merge = instr.xmad.merge_56; + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.xmad.sign_b ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RR: { + is_merge = instr.xmad.merge_37; + op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RC: { + op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); + op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + is_signed ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + break; + } + case OpCode::Id::XMAD_IMM: { + is_merge = instr.xmad.merge_37; + op_b += std::to_string(instr.xmad.imm20_16); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + + // TODO(bunnei): Ensure this is right with signed operands + if (instr.xmad.high_a) { + op_a = "((" + op_a + ") >> 16)"; + } else { + op_a = "((" + op_a + ") & 0xFFFF)"; + } + + std::string src2 = '(' + op_b + ')'; // Preserve original source 2 + if (instr.xmad.high_b) { + op_b = '(' + src2 + " >> 16)"; + } else { + op_b = '(' + src2 + " & 0xFFFF)"; + } + + std::string product = '(' + op_a + " * " + op_b + ')'; + if (instr.xmad.product_shift_left) { + product = '(' + product + " << 16)"; + } + + switch (instr.xmad.mode) { + case Tegra::Shader::XmadMode::None: + break; + case Tegra::Shader::XmadMode::CLo: + op_c = "((" + op_c + ") & 0xFFFF)"; + break; + case Tegra::Shader::XmadMode::CHi: + op_c = "((" + op_c + ") >> 16)"; + break; + case Tegra::Shader::XmadMode::CBcc: + op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; + break; + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}", + static_cast<u32>(instr.xmad.mode.Value())); + UNREACHABLE(); + } + } + + std::string sum{'(' + product + " + " + op_c + ')'}; + if (is_merge) { + sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; + } + + regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + break; + } default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { @@ -1652,7 +1883,15 @@ private: } case OpCode::Id::KIL: { ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + + // Enclose "discard" in a conditional, so that GLSL compilation does not complain + // about unexecuted instructions that may follow this. + shader.AddLine("if (true) {"); + ++shader.scope; shader.AddLine("discard;"); + --shader.scope; + shader.AddLine("}"); + break; } case OpCode::Id::BRA: { @@ -1668,16 +1907,25 @@ private: break; } case OpCode::Id::SSY: { - // The SSY opcode tells the GPU where to re-converge divergent execution paths, we - // can ignore this when generating GLSL code. + // The SSY opcode tells the GPU where to re-converge divergent execution paths, it + // sets the target of the jump that the SYNC instruction will make. The SSY opcode + // has a similar structure to the BRA opcode. + ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); + + u32 target = offset + instr.bra.GetBranchTarget(); + EmitPushToSSYStack(target); break; } - case OpCode::Id::SYNC: + case OpCode::Id::SYNC: { + // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + EmitPopFromSSYStack(); + break; + } case OpCode::Id::DEPBAR: { - // TODO(Subv): Find out if we actually have to care about these instructions or if + // TODO(Subv): Find out if we actually have to care about this instruction or if // the GLSL compiler takes care of that for us. - LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); + LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: { @@ -1691,7 +1939,7 @@ private: } // Close the predicate condition scope. - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { --shader.scope; shader.AddLine('}'); } @@ -1742,6 +1990,13 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + + // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems + // unlikely that shaders will use 20 nested SSYs. + constexpr u32 SSY_STACK_SIZE = 20; + shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];"); + shader.AddLine("uint ssy_stack_top = 0u;"); + shader.AddLine("while (true) {"); ++shader.scope; @@ -1757,7 +2012,7 @@ private: u32 compile_end = CompileRange(label, next_label); if (compile_end > next_label && compile_end != PROGRAM_END) { // This happens only when there is a label inside a IF/LOOP block - shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); labels.emplace(compile_end); } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 129c777d1..57e0e1726 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -38,6 +38,7 @@ out vec4 position; layout (std140) uniform vs_config { vec4 viewport_flip; + uvec4 instance_id; }; void main() { @@ -90,6 +91,7 @@ out vec4 color; layout (std140) uniform fs_config { vec4 viewport_flip; + uvec4 instance_id; }; void main() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 415d42fda..f0886caac 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -37,11 +37,16 @@ void SetShaderUniformBlockBindings(GLuint shader) { } // namespace Impl void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& regs = gpu.regs; + const auto& state = gpu.state; // TODO(bunnei): Support more than one viewport viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; + + // We only assign the instance to the first component of the vector, the rest is just padding. + instance_id[0] = state.current_instance; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 716933a0b..75fa73605 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,14 +24,15 @@ void SetShaderUniformBlockBindings(GLuint shader); } // namespace Impl /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); alignas(16) GLvec4 viewport_flip; + alignas(16) GLuvec4 instance_id; }; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 68bacd4c5..1d1975179 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -203,21 +203,6 @@ void OpenGLState::Apply() const { } } - // Constbuffers - for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) { - for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { - const auto& current = cur_state.draw.const_buffers[stage][buffer_id]; - const auto& new_state = draw.const_buffers[stage][buffer_id]; - - if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || - current.ssbo != new_state.ssbo) { - if (new_state.enabled) { - glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); - } - } - } - } - // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 5c7b636e4..bdb02ba25 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -119,12 +119,6 @@ public: GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING - struct ConstBufferConfig { - bool enabled = false; - GLuint bindpoint; - GLuint ssbo; - }; - std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; } draw; struct { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,174 +9,91 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class OrphanBuffer : public OGLStreamBuffer { -public: - explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~OrphanBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - std::vector<u8> data; -}; - -class StorageBuffer : public OGLStreamBuffer { -public: - explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~StorageBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - struct Fence { - OGLSync sync; - size_t offset; - }; - std::deque<Fence> head; - std::deque<Fence> tail; - - u8* mapped_ptr; -}; - -OGLStreamBuffer::OGLStreamBuffer(GLenum target) { - gl_target = target; -} - -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) + : gl_target(target), buffer_size(size) { + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); -std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { - if (storage_buffer) { - return std::make_unique<StorageBuffer>(target); + GLsizeiptr allocate_size = size; + if (target == GL_ARRAY_BUFFER) { + // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer + // read position is near the end and is an out-of-bound access to the vertex buffer. This is + // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the + // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the + // crash. + allocate_size *= 2; } - return std::make_unique<OrphanBuffer>(target); -} -OrphanBuffer::~OrphanBuffer() { - Release(); + if (GLAD_GL_ARB_buffer_storage) { + persistent = true; + coherent = prefer_coherent; + GLbitfield flags = + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glBufferStorage(gl_target, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>(glMapBufferRange( + gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + } else { + glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); + } } -void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { - buffer_pos = 0; - buffer_size = size; - data.resize(buffer_size); - - if (gl_buffer.handle == 0) { - gl_buffer.Create(); +OGLStreamBuffer::~OGLStreamBuffer() { + if (persistent) { glBindBuffer(gl_target, gl_buffer.handle); + glUnmapBuffer(gl_target); } - - glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); -} - -void OrphanBuffer::Release() { gl_buffer.Release(); } -std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); - - if (buffer_pos + size > buffer_size) { - Create(std::max(buffer_size, size), 0); - } - - mapped_size = size; - return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} - -void OrphanBuffer::Unmap() { - glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); - buffer_pos += mapped_size; -} - -StorageBuffer::~StorageBuffer() { - Release(); +GLuint OGLStreamBuffer::GetHandle() const { + return gl_buffer.handle; } -void StorageBuffer::Create(size_t size, size_t sync_subdivide) { - if (gl_buffer.handle != 0) - return; - - buffer_pos = 0; - buffer_size = size; - buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); - - gl_buffer.Create(); - glBindBuffer(gl_target, gl_buffer.handle); - - glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - mapped_ptr = reinterpret_cast<u8*>( - glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); +GLsizeiptr OGLStreamBuffer::GetSize() const { + return buffer_size; } -void StorageBuffer::Release() { - if (gl_buffer.handle == 0) - return; - - glUnmapBuffer(gl_target); - - gl_buffer.Release(); - head.clear(); - tail.clear(); -} - -std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { +std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); + ASSERT(alignment <= buffer_size); + mapped_size = size; - OGLSync sync; - - buffer_pos = Common::AlignUp(buffer_pos, alignment); - size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); - - if (!head.empty() && - (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { - ASSERT(head.back().sync.handle == 0); - head.back().sync.Create(); + if (alignment > 0) { + buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); } + bool invalidate = false; if (buffer_pos + size > buffer_size) { - if (!tail.empty()) { - std::swap(sync, tail.back().sync); - tail.clear(); - } - std::swap(tail, head); buffer_pos = 0; - effective_offset = 0; - } + invalidate = true; - while (!tail.empty() && buffer_pos + size > tail.front().offset) { - std::swap(sync, tail.front().sync); - tail.pop_front(); + if (persistent) { + glUnmapBuffer(gl_target); + } } - if (sync.handle != 0) { - glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - sync.Release(); + if (invalidate | !persistent) { + GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | + (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | + (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); + mapped_ptr = static_cast<u8*>( + glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); + mapped_offset = buffer_pos; } - if (head.empty() || effective_offset > head.back().offset) { - head.emplace_back(); - head.back().offset = effective_offset; + return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); +} + +void OGLStreamBuffer::Unmap(GLsizeiptr size) { + ASSERT(size <= mapped_size); + + if (!coherent && size > 0) { + glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } - mapped_size = size; - return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} + if (!persistent) { + glUnmapBuffer(gl_target); + } -void StorageBuffer::Unmap() { - glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size)); - buffer_pos += mapped_size; + buffer_pos += size; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e78dc5784..45592daaf 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,35 +2,41 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - -#include <memory> +#include <tuple> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLenum target); - virtual ~OGLStreamBuffer() = default; - -public: - static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); - - virtual void Create(size_t size, size_t sync_subdivide) = 0; - virtual void Release() {} + explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); + ~OGLStreamBuffer(); GLuint GetHandle() const; + GLsizeiptr GetSize() const; + + /* + * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + * and the optional alignment requirement. + * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. + * The return values are the pointer to the new chunk, the offset within the buffer, + * and the invalidation flag for previous chunks. + * The actual used size must be specified on unmapping the chunk. + */ + std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); - virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; - virtual void Unmap() = 0; + void Unmap(GLsizeiptr size); -protected: +private: OGLBuffer gl_buffer; GLenum gl_target; - size_t buffer_pos = 0; - size_t buffer_size = 0; - size_t buffer_sync_subdivide = 0; - size_t mapped_size = 0; + bool coherent = false; + bool persistent = false; + + GLintptr buffer_pos = 0; + GLsizeiptr buffer_size = 0; + GLintptr mapped_offset = 0; + GLsizeiptr mapped_size = 0; + u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index c439446b1..5d91a0c2f 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,15 +24,25 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; } @@ -42,16 +52,25 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } + case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; } @@ -61,9 +80,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } - case Maxwell::VertexAttribute::Type::UnsignedInt: - return GL_UNSIGNED_INT; - case Maxwell::VertexAttribute::Type::Float: return GL_FLOAT; } @@ -91,6 +107,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: return GL_POINTS; + case Maxwell::PrimitiveTopology::LineStrip: + return GL_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: @@ -129,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to // manually mix them. However the shader part of this is not yet implemented. return GL_CLAMP_TO_BORDER; + case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: + return GL_MIRROR_CLAMP_TO_EDGE; } LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); UNREACHABLE(); @@ -156,42 +176,61 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { switch (factor) { case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: return GL_ZERO; case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: return GL_ONE; case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: return GL_SRC_COLOR; case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: return GL_ONE_MINUS_SRC_COLOR; case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: return GL_SRC_ALPHA; case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: return GL_ONE_MINUS_SRC_ALPHA; case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: return GL_DST_ALPHA; case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: return GL_ONE_MINUS_DST_ALPHA; case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: return GL_DST_COLOR; case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: return GL_ONE_MINUS_DST_COLOR; case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: return GL_SRC_ALPHA_SATURATE; case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: return GL_SRC1_COLOR; case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: return GL_ONE_MINUS_SRC1_COLOR; case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: return GL_SRC1_ALPHA; case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: return GL_ONE_MINUS_SRC1_ALPHA; case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: return GL_CONSTANT_COLOR; case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: return GL_ONE_MINUS_CONSTANT_COLOR; case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: return GL_CONSTANT_ALPHA; case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 899865e3b..bf30eda6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -18,7 +18,6 @@ #include "core/tracer/recorder.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/utils.h" -#include "video_core/video_core.h" static const char vertex_shader[] = R"( #version 150 core @@ -92,7 +91,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } -ScopeAcquireGLContext::ScopeAcquireGLContext(EmuWindow& emu_window_) : emu_window{emu_window_} { +ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_) + : emu_window{emu_window_} { if (Settings::values.use_multi_core) { emu_window.MakeCurrent(); } @@ -103,7 +103,9 @@ ScopeAcquireGLContext::~ScopeAcquireGLContext() { } } -RendererOpenGL::RendererOpenGL(EmuWindow& window) : VideoCore::RendererBase{window} {} +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window) + : VideoCore::RendererBase{window} {} + RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) @@ -423,7 +425,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum switch (severity) { case GL_DEBUG_SEVERITY_HIGH: - LOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message); + LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message); break; case GL_DEBUG_SEVERITY_MEDIUM: LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 428afa3b7..a5eab6997 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,9 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" +namespace Core::Frontend { class EmuWindow; +} /// Structure used for storing information about the textures for the Switch screen struct TextureInfo { @@ -34,16 +36,16 @@ struct ScreenInfo { /// Helper class to acquire/release OpenGL context within a given scope class ScopeAcquireGLContext : NonCopyable { public: - explicit ScopeAcquireGLContext(EmuWindow& window); + explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window); ~ScopeAcquireGLContext(); private: - EmuWindow& emu_window; + Core::Frontend::EmuWindow& emu_window; }; class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(EmuWindow& window); + explicit RendererOpenGL(Core::Frontend::EmuWindow& window); ~RendererOpenGL() override; /// Swap buffers (render frame) diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 5085ef96b..6780d1c16 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -9,9 +9,7 @@ namespace VideoCore { -std::atomic<bool> g_toggle_framelimit_enabled; - -std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window) { +std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) { return std::make_unique<RendererOpenGL>(emu_window); } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 7c01c0b8d..f79f85dfe 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -4,27 +4,22 @@ #pragma once -#include <atomic> #include <memory> +namespace Core::Frontend { class EmuWindow; +} namespace VideoCore { class RendererBase; -enum class Renderer { Software, OpenGL }; - -// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from -// qt ui) -extern std::atomic<bool> g_toggle_framelimit_enabled; - /** * Creates a renderer instance. * * @note The returned renderer instance is simply allocated. Its Init() * function still needs to be called to fully complete its setup. */ -std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window); +std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window); } // namespace VideoCore |
