diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 73 |
3 files changed, 97 insertions, 18 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ac3e4bf27..d6048f639 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -49,17 +49,19 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported - ASSERT(component_type == ComponentType::UNorm); + // For now only UNORM components are supported, or RGBA16F which is type FLOAT + ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F); return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats @@ -110,8 +112,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, @@ -123,7 +126,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<false, PixelFormat::A2B10G10R10>, MortonCopy<false, PixelFormat::A1B5G5R5>, MortonCopy<false, PixelFormat::R8>, - // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + MortonCopy<false, PixelFormat::RGBA16F>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported + nullptr, nullptr, nullptr, nullptr, @@ -928,7 +933,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc // Use GetSurfaceSubRect instead ASSERT(params.width == params.stride); - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + ASSERT(!params.is_tiled || + (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0)); // Check for an exact match in existing surfaces Surface surface = diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index fc09f108c..6f08678ab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -59,9 +59,11 @@ struct SurfaceParams { A2B10G10R10 = 2, A1B5G5R5 = 3, R8 = 4, - DXT1 = 5, - DXT23 = 6, - DXT45 = 7, + RGBA16F = 5, + DXT1 = 6, + DXT23 = 7, + DXT45 = 8, + DXN1 = 9, // This is also known as BC4 Max, Invalid = 255, @@ -102,9 +104,11 @@ struct SurfaceParams { 1, // A2B10G10R10 1, // A1B5G5R5 1, // R8 + 2, // RGBA16F 4, // DXT1 4, // DXT23 4, // DXT45 + 4, // DXN1 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -124,9 +128,11 @@ struct SurfaceParams { 32, // A2B10G10R10 16, // A1B5G5R5 8, // R8 + 64, // RGBA16F 64, // DXT1 128, // DXT23 128, // DXT45 + 64, // DXN1 }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -143,6 +149,8 @@ struct SurfaceParams { return PixelFormat::ABGR8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return PixelFormat::A2B10G10R10; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + return PixelFormat::RGBA16F; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -172,12 +180,16 @@ struct SurfaceParams { return PixelFormat::A1B5G5R5; case Tegra::Texture::TextureFormat::R8: return PixelFormat::R8; + case Tegra::Texture::TextureFormat::R16_G16_B16_A16: + return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; case Tegra::Texture::TextureFormat::DXT23: return PixelFormat::DXT23; case Tegra::Texture::TextureFormat::DXT45: return PixelFormat::DXT45; + case Tegra::Texture::TextureFormat::DXN1: + return PixelFormat::DXN1; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -197,12 +209,16 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::A1B5G5R5; case PixelFormat::R8: return Tegra::Texture::TextureFormat::R8; + case PixelFormat::RGBA16F: + return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::DXT1: return Tegra::Texture::TextureFormat::DXT1; case PixelFormat::DXT23: return Tegra::Texture::TextureFormat::DXT23; case PixelFormat::DXT45: return Tegra::Texture::TextureFormat::DXT45; + case PixelFormat::DXN1: + return Tegra::Texture::TextureFormat::DXN1; default: UNREACHABLE(); } @@ -226,6 +242,8 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGBA8_SRGB: case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + return ComponentType::Float; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70ddea643..bb5209a7e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -299,7 +299,7 @@ public: * are stored as floats, so this may require conversion. * @param reg The destination register to use. * @param elem The element to use for the operation. - * @param attribute The input attibute to use as the source value. + * @param attribute The input attribute to use as the source value. */ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) { std::string dest = GetRegisterAsFloat(reg); @@ -451,6 +451,12 @@ private: switch (attribute) { case Attribute::Index::Position: return "position"; + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); + return "vec4(0, 0, gl_InstanceID, gl_VertexID)"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -786,8 +792,13 @@ private: 1, 1); break; } - case OpCode::Id::RRO: { - NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction"); + case OpCode::Id::RRO_C: + case OpCode::Id::RRO_R: + case OpCode::Id::RRO_IMM: { + // Currently RRO is only implemented as a register move. + // Usage of `abs_b` and `negate_b` here should also be correct. + regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); + NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: { @@ -885,10 +896,10 @@ private: instr.gpr0); break; } - case OpCode::Id::TEXS: { + case OpCode::Id::TEX: { ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string sampler = GetSampler(instr.sampler); const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; // Add an extra scope and declare the texture coords inside to prevent overwriting @@ -897,8 +908,52 @@ private: ++shader.scope; shader.AddLine(coord); const std::string texture = "texture(" + sampler + ", coords)"; - for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { - regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem); + + size_t dest_elem{}; + for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; + } + --shader.scope; + shader.AddLine("}"); + break; + } + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + // Add an extra scope and declare the texture coords inside to prevent + // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "texture(" + sampler + ", coords)"; + + // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes + // into gpr28+0 and gpr28+1 + size_t offset{}; + + for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) { + for (unsigned elem = 0; elem < 2; ++elem) { + if (!instr.texs.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem); + } + + if (!instr.texs.HasTwoDestinations()) { + // Skip the second destination + break; + } + + offset += 2; } --shader.scope; shader.AddLine("}"); @@ -955,8 +1010,8 @@ private: '(' + predicate + ") " + combiner + " (" + second_pred + ')'); if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled SetPredicate(instr.fsetp.pred0, "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); } |
