aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp73
3 files changed, 97 insertions, 18 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ac3e4bf27..d6048f639 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -49,17 +49,19 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+ {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::ColorTexture) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
- // For now only UNORM components are supported
- ASSERT(component_type == ComponentType::UNorm);
+ // For now only UNORM components are supported, or RGBA16F which is type FLOAT
+ ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
// TODO(Subv): Implement depth formats
@@ -110,8 +112,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
- MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::DXT1>,
- MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
+ MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -123,7 +126,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
MortonCopy<false, PixelFormat::A2B10G10R10>,
MortonCopy<false, PixelFormat::A1B5G5R5>,
MortonCopy<false, PixelFormat::R8>,
- // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+ MortonCopy<false, PixelFormat::RGBA16F>,
+ // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+ nullptr,
nullptr,
nullptr,
nullptr,
@@ -928,7 +933,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
// Use GetSurfaceSubRect instead
ASSERT(params.width == params.stride);
- ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0));
+ ASSERT(!params.is_tiled ||
+ (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
// Check for an exact match in existing surfaces
Surface surface =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index fc09f108c..6f08678ab 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -59,9 +59,11 @@ struct SurfaceParams {
A2B10G10R10 = 2,
A1B5G5R5 = 3,
R8 = 4,
- DXT1 = 5,
- DXT23 = 6,
- DXT45 = 7,
+ RGBA16F = 5,
+ DXT1 = 6,
+ DXT23 = 7,
+ DXT45 = 8,
+ DXN1 = 9, // This is also known as BC4
Max,
Invalid = 255,
@@ -102,9 +104,11 @@ struct SurfaceParams {
1, // A2B10G10R10
1, // A1B5G5R5
1, // R8
+ 2, // RGBA16F
4, // DXT1
4, // DXT23
4, // DXT45
+ 4, // DXN1
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -124,9 +128,11 @@ struct SurfaceParams {
32, // A2B10G10R10
16, // A1B5G5R5
8, // R8
+ 64, // RGBA16F
64, // DXT1
128, // DXT23
128, // DXT45
+ 64, // DXN1
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -143,6 +149,8 @@ struct SurfaceParams {
return PixelFormat::ABGR8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return PixelFormat::A2B10G10R10;
+ case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ return PixelFormat::RGBA16F;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -172,12 +180,16 @@ struct SurfaceParams {
return PixelFormat::A1B5G5R5;
case Tegra::Texture::TextureFormat::R8:
return PixelFormat::R8;
+ case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
+ return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
return PixelFormat::DXT23;
case Tegra::Texture::TextureFormat::DXT45:
return PixelFormat::DXT45;
+ case Tegra::Texture::TextureFormat::DXN1:
+ return PixelFormat::DXN1;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -197,12 +209,16 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::A1B5G5R5;
case PixelFormat::R8:
return Tegra::Texture::TextureFormat::R8;
+ case PixelFormat::RGBA16F:
+ return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
return Tegra::Texture::TextureFormat::DXT23;
case PixelFormat::DXT45:
return Tegra::Texture::TextureFormat::DXT45;
+ case PixelFormat::DXN1:
+ return Tegra::Texture::TextureFormat::DXN1;
default:
UNREACHABLE();
}
@@ -226,6 +242,8 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA8_SRGB:
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return ComponentType::UNorm;
+ case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ return ComponentType::Float;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 70ddea643..bb5209a7e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -299,7 +299,7 @@ public:
* are stored as floats, so this may require conversion.
* @param reg The destination register to use.
* @param elem The element to use for the operation.
- * @param attribute The input attibute to use as the source value.
+ * @param attribute The input attribute to use as the source value.
*/
void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
std::string dest = GetRegisterAsFloat(reg);
@@ -451,6 +451,12 @@ private:
switch (attribute) {
case Attribute::Index::Position:
return "position";
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ // TODO(Subv): Find out what the values are for the first two elements when inside a
+ // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+ // shader.
+ ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
+ return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -786,8 +792,13 @@ private:
1, 1);
break;
}
- case OpCode::Id::RRO: {
- NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+ case OpCode::Id::RRO_C:
+ case OpCode::Id::RRO_R:
+ case OpCode::Id::RRO_IMM: {
+ // Currently RRO is only implemented as a register move.
+ // Usage of `abs_b` and `negate_b` here should also be correct.
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
+ NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default: {
@@ -885,10 +896,10 @@ private:
instr.gpr0);
break;
}
- case OpCode::Id::TEXS: {
+ case OpCode::Id::TEX: {
ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string sampler = GetSampler(instr.sampler);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
// Add an extra scope and declare the texture coords inside to prevent overwriting
@@ -897,8 +908,52 @@ private:
++shader.scope;
shader.AddLine(coord);
const std::string texture = "texture(" + sampler + ", coords)";
- for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
- regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem);
+
+ size_t dest_elem{};
+ for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+ if (!instr.tex.IsComponentEnabled(elem)) {
+ // Skip disabled components
+ continue;
+ }
+ regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+ ++dest_elem;
+ }
+ --shader.scope;
+ shader.AddLine("}");
+ break;
+ }
+ case OpCode::Id::TEXS: {
+ ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+ const std::string sampler = GetSampler(instr.sampler);
+ const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
+ shader.AddLine("{");
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "texture(" + sampler + ", coords)";
+
+ // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
+ // into gpr28+0 and gpr28+1
+ size_t offset{};
+
+ for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
+ for (unsigned elem = 0; elem < 2; ++elem) {
+ if (!instr.texs.IsComponentEnabled(elem)) {
+ // Skip disabled components
+ continue;
+ }
+ regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem);
+ }
+
+ if (!instr.texs.HasTwoDestinations()) {
+ // Skip the second destination
+ break;
+ }
+
+ offset += 2;
}
--shader.scope;
shader.AddLine("}");
@@ -955,8 +1010,8 @@ private:
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
- // enabled
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
SetPredicate(instr.fsetp.pred0,
"!(" + predicate + ") " + combiner + " (" + second_pred + ')');
}