diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 11 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 72 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 138 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 7 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 6 |
13 files changed, 311 insertions, 114 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 3bca16364..dfbf80abd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const { - // The Vertex stage is always enabled. - if (stage == Regs::ShaderStage::Vertex) - return true; - - switch (stage) { - case Regs::ShaderStage::TesselationControl: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)] - .enable != 0; - case Regs::ShaderStage::TesselationEval: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)] - .enable != 0; - case Regs::ShaderStage::Geometry: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0; - case Regs::ShaderStage::Fragment: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0; - } - - UNREACHABLE(); -} - void Maxwell3D::ProcessClearBuffers() { ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && regs.clear_buffers.R == regs.clear_buffers.B && diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5a7cf0107..6f0170ff7 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -379,6 +379,14 @@ public: } }; + bool IsShaderConfigEnabled(size_t index) const { + // The VertexB is always enabled. + if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { + return true; + } + return shader_config[index].enable != 0; + } + union { struct { INSERT_PADDING_WORDS(0x45); @@ -780,9 +788,6 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; - /// Returns whether the specified shader stage is enabled or not. - bool IsShaderStageEnabled(Regs::ShaderStage stage) const; - private: std::unordered_map<u32, std::vector<u32>> uploaded_macros; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2bc1782ad..65fa1495f 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -142,6 +142,7 @@ enum class PredCondition : u64 { GreaterThan = 4, NotEqual = 5, GreaterEqual = 6, + LessThanWithNan = 9, NotEqualWithNan = 13, // TODO(Subv): Other condition types }; @@ -201,6 +202,11 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class FlowCondition : u64 { + Always = 0xF, + Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -298,6 +304,13 @@ union Instruction { } iadd32i; union { + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<56, 1, u64> negate_a; + BitField<57, 1, u64> abs_b; + } fadd32i; + + union { BitField<20, 8, u64> shift_position; BitField<28, 8, u64> shift_length; BitField<48, 1, u64> negate_b; @@ -309,6 +322,10 @@ union Instruction { } bfe; union { + BitField<0, 5, FlowCondition> cond; + } flow; + + union { BitField<48, 1, u64> negate_b; BitField<49, 1, u64> negate_c; } ffma; @@ -487,6 +504,7 @@ public: FADD_C, FADD_R, FADD_IMM, + FADD32I, FMUL_C, FMUL_R, FMUL_IMM, @@ -679,13 +697,14 @@ private: INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), + INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), + INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ea138d402..eecbc5ff0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -15,6 +15,7 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" +#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -22,6 +23,7 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = SurfaceParams::PixelFormat; @@ -181,6 +183,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, return {array_ptr, buffer_offset}; } +static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) { + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; + Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + + return program_code; +} + void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { // Helper function for uploading uniform data const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { @@ -193,26 +208,23 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { }; auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); - ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_texture_bindpoint = 0; - for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; - const auto& stage = index - 1; // Stage indices are 0 - 5 - - const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage)); - // Skip stages that are not enabled - if (!is_enabled) { + if (!gpu.regs.IsShaderConfigEnabled(index)) { continue; } + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 + GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); @@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { buffer_ptr += sizeof(GLShader::MaxwellUniformData); buffer_offset += sizeof(GLShader::MaxwellUniformData); - // Fetch program code from memory - GLShader::ProgramCode program_code; - const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; - const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; - Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); - GLShader::ShaderSetup setup{std::move(program_code)}; - + GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; switch (program) { + case Maxwell::ShaderProgram::VertexA: { + // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. + // Conventional HW does not support this, so we combine VertexA and VertexB into one + // stage here. + setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB)); + GLShader::MaxwellVSConfig vs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::VertexB: { GLShader::MaxwellVSConfig vs_config{setup}; shader_resources = @@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, current_texture_bindpoint, shader_resources.texture_samplers); + + // When VertexA is enabled, we have dual vertex shaders + if (program == Maxwell::ShaderProgram::VertexA) { + // VertexB was combined with VertexA, so we skip the VertexB iteration + index++; + } } shader_program_manager->UseTrivialGeometryShader(); @@ -301,9 +324,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c bool using_depth_fb) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - // Sync the depth test state before configuring the framebuffer surfaces. - SyncDepthTestState(); - // TODO(bunnei): Implement this const bool has_stencil = false; @@ -368,11 +388,20 @@ void RasterizerOpenGL::Clear() { if (regs.clear_buffers.Z) { clear_mask |= GL_DEPTH_BUFFER_BIT; use_depth_fb = true; + + // Always enable the depth write when clearing the depth buffer. The depth write mask is + // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. + state.depth.test_enabled = true; + state.depth.write_mask = GL_TRUE; + state.depth.test_func = GL_ALWAYS; + state.Apply(); } if (clear_mask == 0) return; + ScopeAcquireGLContext acquire_context; + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(use_color_fb, use_depth_fb); @@ -399,9 +428,12 @@ void RasterizerOpenGL::DrawArrays() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + ScopeAcquireGLContext acquire_context; + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(true, regs.zeta.Address() != 0); + SyncDepthTestState(); SyncBlendState(); SyncCullMode(); @@ -605,9 +637,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload constbuffer of disabled shader stage"); - // Reset all buffer draw state for this stage. for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { buffer.bindpoint = 0; @@ -674,9 +703,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload textures of disabled shader stage"); - ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), "Exceeded the number of active textures."); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 323ff7408..c171c4c5b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -105,6 +105,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -112,6 +113,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, false}, // S8Z24 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, + false}, // Z16 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -194,8 +197,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), @@ -215,10 +219,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::ABGR8>, + nullptr, + MortonCopy<false, PixelFormat::G8R8>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, + MortonCopy<false, PixelFormat::Z16>, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -271,9 +277,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { S8Z24 input_pixel{}; Z24S8 output_pixel{}; + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { - const size_t offset{y * width + x}; + const size_t offset{bpp * (y * width + x)}; std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); output_pixel.s8.Assign(input_pixel.s8); output_pixel.z24.Assign(input_pixel.z24); @@ -281,6 +288,19 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } } } + +static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)}; + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const size_t offset{bpp * (y * width + x)}; + const u8 temp{data[offset]}; + data[offset] = data[offset + 1]; + data[offset + 1] = temp; + } + } +} + /** * Helper function to perform software conversion (as needed) when loading a buffer from Switch * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with @@ -301,6 +321,11 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. ConvertS8Z24ToZ24S8(data, width, height); break; + + case PixelFormat::G8R8: + // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. + ConvertG8R8ToR8G8(data, width, height); + break; } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 1bedae992..718c45ce1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -37,13 +37,15 @@ struct SurfaceParams { DXN1 = 11, // This is also known as BC4 BC7U = 12, ASTC_2D_4X4 = 13, + G8R8 = 14, MaxColorFormat, // DepthStencil formats - Z24S8 = 14, - S8Z24 = 15, - Z32F = 16, + Z24S8 = 15, + S8Z24 = 16, + Z32F = 17, + Z16 = 18, MaxDepthStencilFormat, @@ -95,9 +97,11 @@ struct SurfaceParams { 4, // DXN1 4, // BC7U 4, // ASTC_2D_4X4 + 1, // G8R8 1, // Z24S8 1, // S8Z24 1, // Z32F + 1, // Z16 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -123,9 +127,11 @@ struct SurfaceParams { 64, // DXN1 128, // BC7U 32, // ASTC_2D_4X4 + 16, // G8R8 32, // Z24S8 32, // S8Z24 32, // Z32F + 16, // Z16 }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -143,6 +149,8 @@ struct SurfaceParams { return PixelFormat::Z24S8; case Tegra::DepthFormat::Z32_FLOAT: return PixelFormat::Z32F; + case Tegra::DepthFormat::Z16_UNORM: + return PixelFormat::Z16; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -181,6 +189,8 @@ struct SurfaceParams { return PixelFormat::A1B5G5R5; case Tegra::Texture::TextureFormat::R8: return PixelFormat::R8; + case Tegra::Texture::TextureFormat::G8R8: + return PixelFormat::G8R8; case Tegra::Texture::TextureFormat::R16_G16_B16_A16: return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::BF10GF11RF11: @@ -218,6 +228,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::A1B5G5R5; case PixelFormat::R8: return Tegra::Texture::TextureFormat::R8; + case PixelFormat::G8R8: + return Tegra::Texture::TextureFormat::G8R8; case PixelFormat::RGBA16F: return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::R11FG11FB10F: @@ -249,6 +261,8 @@ struct SurfaceParams { return Tegra::DepthFormat::Z24_S8_UNORM; case PixelFormat::Z32F: return Tegra::DepthFormat::Z32_FLOAT; + case PixelFormat::Z16: + return Tegra::DepthFormat::Z16_UNORM; default: UNREACHABLE(); } @@ -295,6 +309,7 @@ struct SurfaceParams { static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { switch (format) { + case Tegra::DepthFormat::Z16_UNORM: case Tegra::DepthFormat::S8_Z24_UNORM: case Tegra::DepthFormat::Z24_S8_UNORM: return ComponentType::UNorm; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5914077e8..5fae95788 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -42,13 +42,14 @@ enum class ExitMethod { struct Subroutine { /// Generates a name suitable for GLSL source code. std::string GetName() const { - return "sub_" + std::to_string(begin) + '_' + std::to_string(end); + return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; } - u32 begin; ///< Entry point of the subroutine. - u32 end; ///< Return point of the subroutine. - ExitMethod exit_method; ///< Exit method of the subroutine. - std::set<u32> labels; ///< Addresses refereced by JMP instructions. + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set<u32> labels; ///< Addresses refereced by JMP instructions. bool operator<(const Subroutine& rhs) const { return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); @@ -58,11 +59,11 @@ struct Subroutine { /// Analyzes shader code and produces a set of subroutines. class ControlFlowAnalyzer { public: - ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) : program_code(program_code) { // Recursively finds all subroutines. - const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); if (program_main.exit_method != ExitMethod::AlwaysEnd) throw DecompileFail("Program does not always end"); } @@ -77,12 +78,12 @@ private: std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; /// Adds and analyzes a new subroutine if it is not added yet. - const Subroutine& AddSubroutine(u32 begin, u32 end) { - auto iter = subroutines.find(Subroutine{begin, end}); + const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { + auto iter = subroutines.find(Subroutine{begin, end, suffix}); if (iter != subroutines.end()) return *iter; - Subroutine subroutine{begin, end}; + Subroutine subroutine{begin, end, suffix}; subroutine.exit_method = Scan(begin, end, subroutine.labels); if (subroutine.exit_method == ExitMethod::Undetermined) throw DecompileFail("Recursive function detected"); @@ -191,7 +192,8 @@ public: UnsignedInteger, }; - GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {} + GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix) + : index{index}, shader{shader}, suffix{suffix} {} /// Gets the GLSL type string for a register static std::string GetTypeString(Type type) { @@ -216,7 +218,7 @@ public: /// Returns a GLSL string representing the current state of the register const std::string GetActiveString() { declr_type.insert(active_type); - return GetPrefixString(active_type) + std::to_string(index); + return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix; } /// Returns true if the active type is a float @@ -251,6 +253,7 @@ private: ShaderWriter& shader; Type active_type{Type::Float}; std::set<Type> declr_type; + const std::string& suffix; }; /** @@ -262,8 +265,8 @@ private: class GLSLRegisterManager { public: GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, - const Maxwell3D::Regs::ShaderStage& stage) - : shader{shader}, declarations{declarations}, stage{stage} { + const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix) + : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} { BuildRegisterList(); } @@ -430,12 +433,12 @@ public: } /// Add declarations for registers - void GenerateDeclarations() { + void GenerateDeclarations(const std::string& suffix) { for (const auto& reg : regs) { for (const auto& type : reg.DeclaredTypes()) { declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' + - GLSLRegister::GetPrefixString(type) + - std::to_string(reg.GetIndex()) + " = 0;"); + reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) + + '_' + suffix + " = 0;"); } } declarations.AddNewLine(); @@ -558,7 +561,7 @@ private: /// Build the GLSL register list. void BuildRegisterList() { for (size_t index = 0; index < Register::NumRegisters; ++index) { - regs.emplace_back(index, shader); + regs.emplace_back(index, shader, suffix); } } @@ -620,16 +623,17 @@ private: std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; std::vector<SamplerEntry> used_samplers; const Maxwell3D::Regs::ShaderStage& stage; + const std::string& suffix; }; class GLSLGenerator { public: GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, - u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) : subroutines(subroutines), program_code(program_code), main_offset(main_offset), - stage(stage) { + stage(stage), suffix(suffix) { - Generate(); + Generate(suffix); } std::string GetShaderCode() { @@ -644,7 +648,7 @@ public: private: /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { - auto iter = subroutines.find(Subroutine{begin, end}); + auto iter = subroutines.find(Subroutine{begin, end, suffix}); ASSERT(iter != subroutines.end()); return *iter; } @@ -689,7 +693,7 @@ private: // Can't assign to the constant predicate. ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); - std::string variable = 'p' + std::to_string(pred); + std::string variable = 'p' + std::to_string(pred) + '_' + suffix; shader.AddLine(variable + " = " + value + ';'); declr_predicates.insert(std::move(variable)); } @@ -707,7 +711,7 @@ private: if (index == static_cast<u64>(Pred::UnusedIndex)) variable = "true"; else - variable = 'p' + std::to_string(index); + variable = 'p' + std::to_string(index) + '_' + suffix; if (negate) { return "!(" + variable + ')'; @@ -728,10 +732,10 @@ private: const std::string& op_a, const std::string& op_b) const { using Tegra::Shader::PredCondition; static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { - {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, - {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, - {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, - {PredCondition::NotEqualWithNan, "!="}, + {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, + {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, + {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="}, }; const auto& comparison{PredicateComparisonStrings.find(condition)}; @@ -739,7 +743,8 @@ private: "Unknown predicate comparison operation"); std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; - if (condition == PredCondition::NotEqualWithNan) { + if (condition == PredCondition::LessThanWithNan || + condition == PredCondition::NotEqualWithNan) { predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; } @@ -968,6 +973,29 @@ private: regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); break; } + case OpCode::Id::FADD32I: { + std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + std::string op_b = GetImmediate32(instr); + + if (instr.fadd32i.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + if (instr.fadd32i.negate_a) { + op_a = "-(" + op_a + ')'; + } + + if (instr.fadd32i.abs_b) { + op_b = "abs(" + op_b + ')'; + } + + if (instr.fadd32i.negate_b) { + op_b = "-(" + op_b + ')'; + } + + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); + break; + } } break; } @@ -1616,16 +1644,32 @@ private: shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';'); } - shader.AddLine("return true;"); - if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, otherwise - // we have to account for the possibility of the condition not being met, so - // continue processing the next instruction. - offset = PROGRAM_END - 1; + switch (instr.flow.cond) { + case Tegra::Shader::FlowCondition::Always: + shader.AddLine("return true;"); + if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { + // If this is an unconditional exit then just end processing here, + // otherwise we have to account for the possibility of the condition + // not being met, so continue processing the next instruction. + offset = PROGRAM_END - 1; + } + break; + + case Tegra::Shader::FlowCondition::Fcsm_Tr: + // TODO(bunnei): What is this used for? If we assume this conditon is not + // satisifed, dual vertex shaders in Farming Simulator make more sense + LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}", + static_cast<u32>(instr.flow.cond.Value())); + UNREACHABLE(); } break; } case OpCode::Id::KIL: { + ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); shader.AddLine("discard;"); break; } @@ -1646,8 +1690,9 @@ private: // can ignore this when generating GLSL code. break; } - case OpCode::Id::DEPBAR: - case OpCode::Id::SYNC: { + case OpCode::Id::SYNC: + ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + case OpCode::Id::DEPBAR: { // TODO(Subv): Find out if we actually have to care about these instructions or if // the GLSL compiler takes care of that for us. LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); @@ -1687,7 +1732,7 @@ private: return program_counter; } - void Generate() { + void Generate(const std::string& suffix) { // Add declarations for all subroutines for (const auto& subroutine : subroutines) { shader.AddLine("bool " + subroutine.GetName() + "();"); @@ -1695,7 +1740,7 @@ private: shader.AddNewLine(); // Add the main entry point - shader.AddLine("bool exec_shader() {"); + shader.AddLine("bool exec_" + suffix + "() {"); ++shader.scope; CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); --shader.scope; @@ -1758,7 +1803,7 @@ private: /// Add declarations for registers void GenerateDeclarations() { - regs.GenerateDeclarations(); + regs.GenerateDeclarations(suffix); for (const auto& pred : declr_predicates) { declarations.AddLine("bool " + pred + " = false;"); @@ -1771,27 +1816,30 @@ private: const ProgramCode& program_code; const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; + const std::string& suffix; ShaderWriter shader; ShaderWriter declarations; - GLSLRegisterManager regs{shader, declarations, stage}; + GLSLRegisterManager regs{shader, declarations, stage, suffix}; // Declarations std::set<std::string> declr_predicates; }; // namespace Decompiler std::string GetCommonDeclarations() { - std::string declarations = "bool exec_shader();\n"; + std::string declarations; declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); + declarations += '\n'; return declarations; } boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage) { + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix) { try { - auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); - GLSLGenerator generator(subroutines, program_code, main_offset, stage); + auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 382c76b7a..7610dad3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D; std::string GetCommonDeclarations(); boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage); + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix); } // namespace Decompiler } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index c1e6fac9f..129c777d1 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_vertex();\n"; + + if (setup.IsDualProgram()) { + out += "bool exec_vertex_b();\n"; + } + + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex") + .get_value_or({}); - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Vertex) - .get_value_or({}); out += R"( out gl_PerVertex { @@ -34,7 +41,14 @@ layout (std140) uniform vs_config { }; void main() { - exec_shader(); + exec_vertex(); +)"; + + if (setup.IsDualProgram()) { + out += " exec_vertex_b();"; + } + + out += R"( // Viewport can be flipped, which is unsupported by glViewport position.xy *= viewport_flip.xy; @@ -44,8 +58,19 @@ void main() { // For now, this is here to bring order in lieu of proper emulation position.w = 1.0; } + )"; + out += program.first; + + if (setup.IsDualProgram()) { + ProgramResult program_b = + Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") + .get_value_or({}); + out += program_b.first; + } + return {out, program.second}; } @@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_fragment();\n"; - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Fragment) - .get_value_or({}); + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Fragment, "fragment") + .get_value_or({}); out += R"( - in vec4 position; out vec4 color; @@ -67,7 +93,7 @@ layout (std140) uniform fs_config { }; void main() { - exec_shader(); + exec_fragment(); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index ed890e0f9..4729ce0fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -115,21 +115,48 @@ struct ShaderEntries { using ProgramResult = std::pair<std::string, ShaderEntries>; struct ShaderSetup { - ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + ShaderSetup(const ProgramCode& program_code) { + program.code = program_code; + } + + struct { + ProgramCode code; + ProgramCode code_b; // Used for dual vertex shaders + } program; - ProgramCode program_code; bool program_code_hash_dirty = true; u64 GetProgramCodeHash() { if (program_code_hash_dirty) { - program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash = GetNewHash(); program_code_hash_dirty = false; } return program_code_hash; } + /// Used in scenarios where we have a dual vertex shaders + void SetProgramB(const ProgramCode& program_b) { + program.code_b = program_b; + has_program_b = true; + } + + bool IsDualProgram() const { + return has_program_b; + } + private: + u64 GetNewHash() const { + if (has_program_b) { + // Compute hash over dual shader programs + return Common::ComputeHash64(&program, sizeof(program)); + } else { + // Compute hash over a single shader program + return Common::ComputeHash64(&program.code, program.code.size()); + } + } + u64 program_code_hash{}; + bool has_program_b{}; }; struct MaxwellShaderConfigCommon { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 00841e937..1930fa6ef 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -92,11 +92,24 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } +ScopeAcquireGLContext::ScopeAcquireGLContext() { + if (Settings::values.use_multi_core) { + VideoCore::g_emu_window->MakeCurrent(); + } +} +ScopeAcquireGLContext::~ScopeAcquireGLContext() { + if (Settings::values.use_multi_core) { + VideoCore::g_emu_window->DoneCurrent(); + } +} + RendererOpenGL::RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) { + ScopeAcquireGLContext acquire_context; + Core::System::GetInstance().perf_stats.EndSystemFrame(); // Maintain the rasterizer's state as a priority @@ -418,7 +431,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum /// Initialize the renderer bool RendererOpenGL::Init() { - render_window->MakeCurrent(); + ScopeAcquireGLContext acquire_context; if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 21f0d298c..fd0267cf5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -31,6 +31,13 @@ struct ScreenInfo { TextureInfo texture; }; +/// Helper class to acquire/release OpenGL context within a given scope +class ScopeAcquireGLContext : NonCopyable { +public: + ScopeAcquireGLContext(); + ~ScopeAcquireGLContext(); +}; + class RendererOpenGL : public RendererBase { public: RendererOpenGL(); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index b3937b2fe..be18aa299 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -62,6 +62,7 @@ u32 BytesPerPixel(TextureFormat format) { return 4; case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: + case TextureFormat::G8R8: return 2; case TextureFormat::R8: return 1; @@ -77,6 +78,8 @@ u32 BytesPerPixel(TextureFormat format) { static u32 DepthBytesPerPixel(DepthFormat format) { switch (format) { + case DepthFormat::Z16_UNORM: + return 2; case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: case DepthFormat::Z32_FLOAT: @@ -110,6 +113,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::G8R8: case TextureFormat::R16_G16_B16_A16: case TextureFormat::R32_G32_B32_A32: case TextureFormat::BF10GF11RF11: @@ -133,6 +137,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); switch (format) { + case DepthFormat::Z16_UNORM: case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: case DepthFormat::Z32_FLOAT: @@ -164,6 +169,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::G8R8: case TextureFormat::BF10GF11RF11: case TextureFormat::R32_G32_B32_A32: // TODO(Subv): For the time being just forward the same data without any decoding. |
