diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 56 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 140 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 3 |
5 files changed, 115 insertions, 134 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 546e86532..8360feb5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -332,8 +332,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c // TODO(bunnei): Implement this const bool has_stencil = false; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; @@ -346,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle<u32> surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); - MathUtil::Rectangle<u32> draw_rect{ + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; + const MathUtil::Rectangle<u32> draw_rect{ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, surfaces_rect.left, surfaces_rect.right)), // Left static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, @@ -812,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() { void RasterizerOpenGL::SyncCullMode() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions - // state.cull.enabled = regs.cull.enabled != 0; - state.cull.enabled = false; + state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 114d35ce6..15a33ed9b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -122,6 +122,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI @@ -183,6 +186,21 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { return {0, actual_height, width, 0}; } +/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN +static bool IsFormatBCn(PixelFormat format) { + switch (format) { + case PixelFormat::DXT1: + case PixelFormat::DXT23: + case PixelFormat::DXT45: + case PixelFormat::DXN1: + case PixelFormat::DXN2SNORM: + case PixelFormat::DXN2UNORM: + case PixelFormat::BC7U: + return true; + } + return false; +} + template <bool morton_to_gl, PixelFormat format> void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, Tegra::GPUVAddr addr) { @@ -191,16 +209,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { - std::vector<u8> data; - if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { - data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - } else { - data = Tegra::Texture::UnswizzleDepthTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); - } + // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual + // pixel values. + const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; + const std::vector<u8> data = + Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, + bytes_per_pixel, stride, height, block_height); const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; gl_buffer.assign(data.begin(), data.begin() + size_to_copy); } else { @@ -228,13 +242,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, - MortonCopy<true, PixelFormat::Z32FS8>, + MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>, + MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, + MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>, + MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>, + MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, + MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, }; static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), @@ -265,6 +280,9 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::R32F>, MortonCopy<false, PixelFormat::R16F>, MortonCopy<false, PixelFormat::R16UNORM>, + MortonCopy<false, PixelFormat::R16S>, + MortonCopy<false, PixelFormat::R16UI>, + MortonCopy<false, PixelFormat::R16I>, MortonCopy<false, PixelFormat::RG16>, MortonCopy<false, PixelFormat::RG16F>, MortonCopy<false, PixelFormat::RG16UI>, @@ -611,8 +629,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu return GetSurface(SurfaceParams::CreateForTexture(config)); } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 26e2ee203..e24ba8cfe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -46,22 +46,25 @@ struct SurfaceParams { R32F = 20, R16F = 21, R16UNORM = 22, - RG16 = 23, - RG16F = 24, - RG16UI = 25, - RG16I = 26, - RG16S = 27, - RGB32F = 28, - SRGBA8 = 29, + R16S = 23, + R16UI = 24, + R16I = 25, + RG16 = 26, + RG16F = 27, + RG16UI = 28, + RG16I = 29, + RG16S = 30, + RGB32F = 31, + SRGBA8 = 32, MaxColorFormat, // DepthStencil formats - Z24S8 = 30, - S8Z24 = 31, - Z32F = 32, - Z16 = 33, - Z32FS8 = 34, + Z24S8 = 33, + S8Z24 = 34, + Z32F = 35, + Z16 = 36, + Z32FS8 = 37, MaxDepthStencilFormat, @@ -122,6 +125,9 @@ struct SurfaceParams { 1, // R32F 1, // R16F 1, // R16UNORM + 1, // R16S + 1, // R16UI + 1, // R16I 1, // RG16 1, // RG16F 1, // RG16UI @@ -168,6 +174,9 @@ struct SurfaceParams { 32, // R32F 16, // R16F 16, // R16UNORM + 16, // R16S + 16, // R16UI + 16, // R16I 32, // RG16 32, // RG16F 32, // RG16UI @@ -245,6 +254,14 @@ struct SurfaceParams { return PixelFormat::RG16S; case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R16_UNORM: + return PixelFormat::R16UNORM; + case Tegra::RenderTargetFormat::R16_SNORM: + return PixelFormat::R16S; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16UI; + case Tegra::RenderTargetFormat::R16_SINT: + return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; default: @@ -293,6 +310,12 @@ struct SurfaceParams { return PixelFormat::R16F; case Tegra::Texture::ComponentType::UNORM: return PixelFormat::R16UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::R16S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R16UI; + case Tegra::Texture::ComponentType::SINT: + return PixelFormat::R16I; } LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); @@ -348,92 +371,6 @@ struct SurfaceParams { } } - static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { - // TODO(Subv): Properly implement this - switch (format) { - case PixelFormat::ABGR8: - case PixelFormat::SRGBA8: - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::B5G6R5: - return Tegra::Texture::TextureFormat::B5G6R5; - case PixelFormat::A2B10G10R10: - return Tegra::Texture::TextureFormat::A2B10G10R10; - case PixelFormat::A1B5G5R5: - return Tegra::Texture::TextureFormat::A1B5G5R5; - case PixelFormat::R8: - return Tegra::Texture::TextureFormat::R8; - case PixelFormat::G8R8: - return Tegra::Texture::TextureFormat::G8R8; - case PixelFormat::RGBA16F: - return Tegra::Texture::TextureFormat::R16_G16_B16_A16; - case PixelFormat::R11FG11FB10F: - return Tegra::Texture::TextureFormat::BF10GF11RF11; - case PixelFormat::RGBA32UI: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::DXT1: - return Tegra::Texture::TextureFormat::DXT1; - case PixelFormat::DXT23: - return Tegra::Texture::TextureFormat::DXT23; - case PixelFormat::DXT45: - return Tegra::Texture::TextureFormat::DXT45; - case PixelFormat::DXN1: - return Tegra::Texture::TextureFormat::DXN1; - case PixelFormat::DXN2UNORM: - case PixelFormat::DXN2SNORM: - return Tegra::Texture::TextureFormat::DXN2; - case PixelFormat::BC7U: - return Tegra::Texture::TextureFormat::BC7U; - case PixelFormat::ASTC_2D_4X4: - return Tegra::Texture::TextureFormat::ASTC_2D_4X4; - case PixelFormat::BGRA8: - // TODO(bunnei): This is fine for unswizzling (since we just need the right component - // sizes), but could be a bug if we used this function in different ways. - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::RGBA32F: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::RGB32F: - return Tegra::Texture::TextureFormat::R32_G32_B32; - case PixelFormat::RG32F: - return Tegra::Texture::TextureFormat::R32_G32; - case PixelFormat::R32F: - return Tegra::Texture::TextureFormat::R32; - case PixelFormat::R16F: - case PixelFormat::R16UNORM: - return Tegra::Texture::TextureFormat::R16; - case PixelFormat::Z32F: - return Tegra::Texture::TextureFormat::ZF32; - case PixelFormat::Z24S8: - return Tegra::Texture::TextureFormat::Z24S8; - case PixelFormat::RG16F: - case PixelFormat::RG16: - case PixelFormat::RG16UI: - case PixelFormat::RG16I: - case PixelFormat::RG16S: - return Tegra::Texture::TextureFormat::R16_G16; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { - switch (format) { - case PixelFormat::S8Z24: - return Tegra::DepthFormat::S8_Z24_UNORM; - case PixelFormat::Z24S8: - return Tegra::DepthFormat::Z24_S8_UNORM; - case PixelFormat::Z32F: - return Tegra::DepthFormat::Z32_FLOAT; - case PixelFormat::Z16: - return Tegra::DepthFormat::Z16_UNORM; - case PixelFormat::Z32FS8: - return Tegra::DepthFormat::Z32_S8_X24_FLOAT; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { // TODO(Subv): Implement more component types switch (type) { @@ -462,9 +399,11 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RG16_SNORM: + case Tegra::RenderTargetFormat::R16_SNORM: return ComponentType::SNorm; case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: @@ -476,8 +415,10 @@ struct SurfaceParams { return ComponentType::Float; case Tegra::RenderTargetFormat::RGBA32_UINT: case Tegra::RenderTargetFormat::RG16_UINT: + case Tegra::RenderTargetFormat::R16_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: + case Tegra::RenderTargetFormat::R16_SINT: return ComponentType::SInt; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); @@ -634,8 +575,7 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle<s32>& viewport); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 32f06f409..8954deb81 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -141,6 +141,15 @@ private: ExitMethod jmp = Scan(target, end, labels); return exit_method = ParallelExit(no_jmp, jmp); } + case OpCode::Id::SSY: { + // The SSY instruction uses a similar encoding as the BRA instruction. + ASSERT_MSG(instr.bra.constant_buffer == 0, + "Constant buffer SSY is not supported"); + u32 target = offset + instr.bra.GetBranchTarget(); + labels.insert(target); + // Continue scanning for an exit method. + break; + } } } } @@ -828,7 +837,11 @@ private: ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, "NeverExecute predicate not implemented"); - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + // Some instructions (like SSY) don't have a predicate field, they are always + // unconditionally executed. + bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId()); + + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { shader.AddLine("if (" + GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + ')'); @@ -1668,16 +1681,25 @@ private: break; } case OpCode::Id::SSY: { - // The SSY opcode tells the GPU where to re-converge divergent execution paths, we - // can ignore this when generating GLSL code. + // The SSY opcode tells the GPU where to re-converge divergent execution paths, it + // sets the target of the jump that the SYNC instruction will make. The SSY opcode + // has a similar structure to the BRA opcode. + ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); + + u32 target = offset + instr.bra.GetBranchTarget(); + shader.AddLine("ssy_target = " + std::to_string(target) + "u;"); break; } - case OpCode::Id::SYNC: + case OpCode::Id::SYNC: { + // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + shader.AddLine("{ jmp_to = ssy_target; break; }"); + break; + } case OpCode::Id::DEPBAR: { - // TODO(Subv): Find out if we actually have to care about these instructions or if + // TODO(Subv): Find out if we actually have to care about this instruction or if // the GLSL compiler takes care of that for us. - LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); + LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: { @@ -1691,7 +1713,7 @@ private: } // Close the predicate condition scope. - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { --shader.scope; shader.AddLine('}'); } @@ -1742,6 +1764,7 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("uint ssy_target = 0u;"); shader.AddLine("while (true) {"); ++shader.scope; @@ -1757,7 +1780,7 @@ private: u32 compile_end = CompileRange(label, next_label); if (compile_end > next_label && compile_end != PROGRAM_END) { // This happens only when there is a label inside a IF/LOOP block - shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); labels.emplace(compile_end); } diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 43be69dd1..c439446b1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -45,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: |
