aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.h3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/engines/maxwell_3d.h29
-rw-r--r--src/video_core/engines/shader_bytecode.h78
-rw-r--r--src/video_core/gpu.cpp51
-rw-r--r--src/video_core/gpu.h26
-rw-r--r--src/video_core/renderer_base.cpp19
-rw-r--r--src/video_core/renderer_base.h30
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp207
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp145
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h314
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp407
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h5
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp201
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h42
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h49
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h8
-rw-r--r--src/video_core/video_core.cpp4
-rw-r--r--src/video_core/video_core.h11
25 files changed, 1148 insertions, 565 deletions
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index f7214ffec..a01153e0b 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -30,8 +30,7 @@ union CommandHeader {
BitField<29, 3, SubmissionMode> mode;
};
-static_assert(std::is_standard_layout<CommandHeader>::value == true,
- "CommandHeader does not use standard layout");
+static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a46ed4bd7..68f91cc75 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -222,6 +222,18 @@ void Maxwell3D::DrawArrays() {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
+ // Both instance configuration registers can not be set at the same time.
+ ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
+ "Illegal combination of instancing parameters");
+
+ if (regs.draw.instance_next) {
+ // Increment the current instance *before* drawing.
+ state.current_instance += 1;
+ } else if (!regs.draw.instance_cont) {
+ // Reset the current instance to 0.
+ state.current_instance = 0;
+ }
+
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
rasterizer.AccelerateDrawBatch(is_indexed);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0506ac8fe..771eb5abc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -93,6 +93,7 @@ public:
struct VertexAttribute {
enum class Size : u32 {
+ Invalid = 0x0,
Size_32_32_32_32 = 0x01,
Size_32_32_32 = 0x02,
Size_16_16_16_16 = 0x03,
@@ -257,6 +258,10 @@ public:
bool IsNormalized() const {
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
}
+
+ bool IsValid() const {
+ return size != Size::Invalid;
+ }
};
enum class PrimitiveTopology : u32 {
@@ -352,6 +357,27 @@ public:
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
+
+ // These values are used by Nouveau and some games.
+ ZeroGL = 0x4000,
+ OneGL = 0x4001,
+ SourceColorGL = 0x4300,
+ OneMinusSourceColorGL = 0x4301,
+ SourceAlphaGL = 0x4302,
+ OneMinusSourceAlphaGL = 0x4303,
+ DestAlphaGL = 0x4304,
+ OneMinusDestAlphaGL = 0x4305,
+ DestColorGL = 0x4306,
+ OneMinusDestColorGL = 0x4307,
+ SourceAlphaSaturateGL = 0x4308,
+ ConstantColorGL = 0xc001,
+ OneMinusConstantColorGL = 0xc002,
+ ConstantAlphaGL = 0xc003,
+ OneMinusConstantAlphaGL = 0xc004,
+ Source1ColorGL = 0xc900,
+ OneMinusSource1ColorGL = 0xc901,
+ Source1AlphaGL = 0xc902,
+ OneMinusSource1AlphaGL = 0xc903,
};
u32 separate_alpha;
@@ -612,6 +638,8 @@ public:
union {
u32 vertex_begin_gl;
BitField<0, 16, PrimitiveTopology> topology;
+ BitField<26, 1, u32> instance_next;
+ BitField<27, 1, u32> instance_cont;
};
} draw;
@@ -804,6 +832,7 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+ u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
State state{};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 3d4557b7e..9413a81fb 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -12,6 +12,7 @@
#include <boost/optional.hpp>
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -74,12 +75,14 @@ union Attribute {
enum class Index : u64 {
Position = 7,
Attribute_0 = 8,
+ Attribute_31 = 39,
// This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
TessCoordInstanceIDVertexID = 47,
- // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
- Unknown_63 = 63,
+ // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
+ // shader. It is unknown what the other values contain.
+ FrontFacing = 63,
};
union {
@@ -142,6 +145,7 @@ enum class PredCondition : u64 {
NotEqual = 5,
GreaterEqual = 6,
LessThanWithNan = 9,
+ GreaterThanWithNan = 12,
NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -201,11 +205,24 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
+enum class XmadMode : u64 {
+ None = 0,
+ CLo = 1,
+ CHi = 2,
+ CSfu = 3,
+ CBcc = 4,
+};
+
enum class FlowCondition : u64 {
Always = 0xF,
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
+enum class PredicateResultMode : u64 {
+ None = 0x0,
+ NotZero = 0x3,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -246,7 +263,7 @@ union Instruction {
BitField<39, 1, u64> invert_a;
BitField<40, 1, u64> invert_b;
BitField<41, 2, LogicOperation> operation;
- BitField<44, 2, u64> unk44;
+ BitField<44, 2, PredicateResultMode> pred_result_mode;
BitField<48, 3, Pred> pred48;
} lop;
@@ -276,6 +293,10 @@ union Instruction {
} alu;
union {
+ BitField<48, 1, u64> negate_b;
+ } fmul;
+
+ union {
BitField<48, 1, u64> is_signed;
} shift;
@@ -430,16 +451,20 @@ union Instruction {
}
bool IsComponentEnabled(size_t component) const {
- static constexpr std::array<std::array<u32, 8>, 4> mask_lut{
- {{},
- {0x1, 0x2, 0x4, 0x8, 0x3},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x7, 0xb, 0xd, 0xe, 0xf}}};
+ static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
+ {},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x7, 0xb, 0xd, 0xe, 0xf},
+ }};
size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
- return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0;
+ u32 mask = mask_lut[index][component_mask_selector];
+ // A mask of 0 means this instruction uses an unimplemented mask.
+ ASSERT(mask != 0);
+ return ((1ull << component) & mask) != 0;
}
} texs;
@@ -458,6 +483,18 @@ union Instruction {
} bra;
union {
+ BitField<20, 16, u64> imm20_16;
+ BitField<36, 1, u64> product_shift_left;
+ BitField<37, 1, u64> merge_37;
+ BitField<48, 1, u64> sign_a;
+ BitField<49, 1, u64> sign_b;
+ BitField<50, 3, XmadMode> mode;
+ BitField<52, 1, u64> high_b;
+ BitField<53, 1, u64> high_a;
+ BitField<56, 1, u64> merge_56;
+ } xmad;
+
+ union {
BitField<20, 14, u64> offset;
BitField<34, 5, u64> index;
} cbuf34;
@@ -477,8 +514,7 @@ union Instruction {
u64 value;
};
static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
-static_assert(std::is_standard_layout<Instruction>::value,
- "Structure does not have standard layout");
+static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
class OpCode {
public:
@@ -494,6 +530,8 @@ public:
LD_A,
LD_C,
ST_A,
+ LDG, // Load from global memory
+ STG, // Store in global memory
TEX,
TEXQ, // Texture Query
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
@@ -595,9 +633,17 @@ public:
IntegerSetPredicate,
PredicateSetPredicate,
Conversion,
+ Xmad,
Unknown,
};
+ /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
+ /// conditionally executed).
+ static bool IsPredicatedInstruction(Id opcode) {
+ // TODO(Subv): Add the rest of unpredicated instructions.
+ return opcode != Id::SSY;
+ }
+
class Matcher {
public:
Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
@@ -697,6 +743,8 @@ private:
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
+ INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
@@ -777,10 +825,10 @@ private:
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
- INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
- INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
- INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
- INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
+ INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
+ INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
+ INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
+ INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
};
#undef INST
std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4ff4d71c5..9758adcfd 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/assert.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
@@ -11,6 +12,15 @@
namespace Tegra {
+u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ABGR8:
+ return 4;
+ }
+
+ UNREACHABLE();
+}
+
GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
memory_manager = std::make_unique<MemoryManager>();
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
@@ -34,19 +44,60 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
switch (format) {
case RenderTargetFormat::RGBA32_FLOAT:
+ case RenderTargetFormat::RGBA32_UINT:
return 16;
+ case RenderTargetFormat::RGBA16_UINT:
+ case RenderTargetFormat::RGBA16_UNORM:
case RenderTargetFormat::RGBA16_FLOAT:
case RenderTargetFormat::RG32_FLOAT:
+ case RenderTargetFormat::RG32_UINT:
return 8;
case RenderTargetFormat::RGBA8_UNORM:
+ case RenderTargetFormat::RGBA8_SNORM:
+ case RenderTargetFormat::RGBA8_SRGB:
+ case RenderTargetFormat::RGBA8_UINT:
case RenderTargetFormat::RGB10_A2_UNORM:
case RenderTargetFormat::BGRA8_UNORM:
+ case RenderTargetFormat::RG16_UNORM:
+ case RenderTargetFormat::RG16_SNORM:
+ case RenderTargetFormat::RG16_UINT:
+ case RenderTargetFormat::RG16_SINT:
+ case RenderTargetFormat::RG16_FLOAT:
case RenderTargetFormat::R32_FLOAT:
case RenderTargetFormat::R11G11B10_FLOAT:
+ case RenderTargetFormat::R32_UINT:
return 4;
+ case RenderTargetFormat::R16_UNORM:
+ case RenderTargetFormat::R16_SNORM:
+ case RenderTargetFormat::R16_UINT:
+ case RenderTargetFormat::R16_SINT:
+ case RenderTargetFormat::R16_FLOAT:
+ case RenderTargetFormat::RG8_UNORM:
+ case RenderTargetFormat::RG8_SNORM:
+ return 2;
+ case RenderTargetFormat::R8_UNORM:
+ case RenderTargetFormat::R8_UINT:
+ return 1;
default:
UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
}
}
+u32 DepthFormatBytesPerPixel(DepthFormat format) {
+ switch (format) {
+ case DepthFormat::Z32_S8_X24_FLOAT:
+ return 8;
+ case DepthFormat::Z32_FLOAT:
+ case DepthFormat::S8_Z24_UNORM:
+ case DepthFormat::Z24_X8_UNORM:
+ case DepthFormat::Z24_S8_UNORM:
+ case DepthFormat::Z24_C8_UNORM:
+ return 4;
+ case DepthFormat::Z16_UNORM:
+ return 2;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
+ }
+}
+
} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 874eddd78..2697e1c27 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,7 +6,6 @@
#include <memory>
#include <unordered_map>
-#include <vector>
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/memory_manager.h"
@@ -21,22 +20,35 @@ enum class RenderTargetFormat : u32 {
NONE = 0x0,
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
+ RGBA16_UNORM = 0xC6,
+ RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,
+ RG32_UINT = 0xCD,
BGRA8_UNORM = 0xCF,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
+ RGBA8_SNORM = 0xD7,
+ RGBA8_UINT = 0xD9,
RG16_UNORM = 0xDA,
RG16_SNORM = 0xDB,
RG16_SINT = 0xDC,
RG16_UINT = 0xDD,
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
+ R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
+ RG8_UNORM = 0xEA,
+ RG8_SNORM = 0xEB,
+ R16_UNORM = 0xEE,
+ R16_SNORM = 0xEF,
+ R16_SINT = 0xF0,
+ R16_UINT = 0xF1,
R16_FLOAT = 0xF2,
R8_UNORM = 0xF3,
+ R8_UINT = 0xF6,
};
enum class DepthFormat : u32 {
@@ -52,6 +64,9 @@ enum class DepthFormat : u32 {
/// Returns the number of bytes per pixel of each rendertarget format.
u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
+/// Returns the number of bytes per pixel of each depth format.
+u32 DepthFormatBytesPerPixel(DepthFormat format);
+
class DebugContext;
/**
@@ -65,14 +80,7 @@ struct FramebufferConfig {
/**
* Returns the number of bytes per pixel.
*/
- static u32 BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- return 4;
- }
-
- UNREACHABLE();
- }
+ static u32 BytesPerPixel(PixelFormat format);
VAddr address;
u32 offset;
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 3ca350243..afd86a83a 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -4,18 +4,23 @@
#include <memory>
#include "core/frontend/emu_window.h"
+#include "core/settings.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace VideoCore {
-RendererBase::RendererBase(EmuWindow& window) : render_window{window} {}
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+ RefreshBaseSettings();
+}
+
RendererBase::~RendererBase() = default;
-void RendererBase::UpdateCurrentFramebufferLayout() {
- const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout();
+void RendererBase::RefreshBaseSettings() {
+ RefreshRasterizerSetting();
+ UpdateCurrentFramebufferLayout();
- render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
+ renderer_settings.use_framelimiter = Settings::values.toggle_framelimit;
}
void RendererBase::RefreshRasterizerSetting() {
@@ -24,4 +29,10 @@ void RendererBase::RefreshRasterizerSetting() {
}
}
+void RendererBase::UpdateCurrentFramebufferLayout() {
+ const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout();
+
+ render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
+}
+
} // namespace VideoCore
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 235de23a1..d9f16b8e6 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -4,23 +4,26 @@
#pragma once
+#include <atomic>
#include <memory>
#include <boost/optional.hpp>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
+namespace Core::Frontend {
class EmuWindow;
+}
namespace VideoCore {
+struct RendererSettings {
+ std::atomic_bool use_framelimiter{false};
+};
+
class RendererBase : NonCopyable {
public:
- /// Used to reference a framebuffer
- enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture };
-
- explicit RendererBase(EmuWindow& window);
+ explicit RendererBase(Core::Frontend::EmuWindow& window);
virtual ~RendererBase();
/// Swap buffers (render frame)
@@ -32,9 +35,6 @@ public:
/// Shutdown the renderer
virtual void ShutDown() = 0;
- /// Updates the framebuffer layout of the contained render window handle.
- void UpdateCurrentFramebufferLayout();
-
// Getter/setter functions:
// ------------------------
@@ -54,13 +54,23 @@ public:
return *rasterizer;
}
- void RefreshRasterizerSetting();
+ /// Refreshes the settings common to all renderers
+ void RefreshBaseSettings();
protected:
- EmuWindow& render_window; ///< Reference to the render window handle.
+ /// Refreshes settings specific to the rasterizer.
+ void RefreshRasterizerSetting();
+
+ Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
+
+ RendererSettings renderer_settings;
+
+private:
+ /// Updates the framebuffer layout of the contained render window handle.
+ void UpdateCurrentFramebufferLayout();
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8360feb5d..93eadde7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window)
+ : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
- // Create SSBOs
- for (size_t stage = 0; stage < ssbos.size(); ++stage) {
- for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
- ssbos[stage][buffer].Create();
- state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
- }
- }
-
GLint ext_num;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
for (GLint i = 0; i < ext_num; i++) {
const std::string_view extension{
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
- if (extension == "GL_ARB_buffer_storage") {
- has_ARB_buffer_storage = true;
- } else if (extension == "GL_ARB_direct_state_access") {
+ if (extension == "GL_ARB_direct_state_access") {
has_ARB_direct_state_access = true;
} else if (extension == "GL_ARB_separate_shader_objects") {
has_ARB_separate_shader_objects = true;
@@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} {
hw_vao.Create();
- stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
- stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle;
state.Apply();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
-
- for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
- auto& buffer = uniform_buffers[index];
- buffer.Create();
- glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
- glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
- GL_STREAM_COPY);
- glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
- }
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
glEnable(GL_BLEND);
+ glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
+
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
}
-RasterizerOpenGL::~RasterizerOpenGL() {
- if (stream_buffer != nullptr) {
- state.draw.vertex_buffer = stream_buffer->GetHandle();
- state.Apply();
- stream_buffer->Release();
- }
-}
+RasterizerOpenGL::~RasterizerOpenGL() {}
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
state.draw.vertex_array = hw_vao.handle;
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply();
// Upload all guest vertex arrays sequentially to our buffer
@@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
ASSERT(end > start);
u64 size = end - start + 1;
- // Copy vertex array data
- Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
+ GLintptr vertex_buffer_offset;
+ std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
+ UploadMemory(array_ptr, buffer_offset, start, size);
// Bind the vertex array to the buffer at the current offset.
- glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
+ glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
+ vertex_array.stride);
- ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
-
- array_ptr += size;
- buffer_offset += size;
+ ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported");
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
@@ -161,11 +135,16 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
// assume every shader uses them all.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
+
+ // Ignore invalid attributes.
+ if (!attrib.IsValid())
+ continue;
+
+ auto& buffer = regs.vertex_array[attrib.buffer];
LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
attrib.offset.Value(), attrib.IsNormalized());
- auto& buffer = regs.vertex_array[attrib.buffer];
ASSERT(buffer.IsEnabled());
glEnableVertexAttribArray(index);
@@ -196,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program
return program_code;
}
-void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
- // Helper function for uploading uniform data
- const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
- if (has_ARB_direct_state_access) {
- glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
- } else {
- glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
- glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
- }
- };
-
+std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
- u32 current_constbuffer_bindpoint = uniform_buffers.size();
+ u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -223,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
continue;
}
+ std::tie(buffer_ptr, buffer_offset) =
+ AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
+
const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
- // Flush the buffer so that the GPU can see the data we just wrote.
- glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
-
- // Upload uniform data as one UBO per stage
- const GLintptr ubo_offset = buffer_offset;
- copy_buffer(uniform_buffers[stage].handle, ubo_offset,
- sizeof(GLShader::MaxwellUniformData));
+ // Bind the buffer
+ glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
+ sizeof(ubo));
- buffer_ptr += sizeof(GLShader::MaxwellUniformData);
- buffer_offset += sizeof(GLShader::MaxwellUniformData);
+ buffer_ptr += sizeof(ubo);
+ buffer_offset += sizeof(ubo);
GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
GLShader::ShaderEntries shader_resources;
@@ -277,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
static_cast<Maxwell::ShaderStage>(stage));
// Configure the const buffers for this shader stage.
- current_constbuffer_bindpoint =
- SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
- current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+ std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers(
+ buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+ current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
// Configure the textures for this shader stage.
current_texture_bindpoint =
@@ -294,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
}
shader_program_manager->UseTrivialGeometryShader();
+
+ return {buffer_ptr, buffer_offset};
}
size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -427,6 +397,31 @@ void RasterizerOpenGL::Clear() {
}
}
+std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
+ size_t alignment) {
+ // Align the offset, not the mapped pointer
+ GLintptr offset_aligned =
+ static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
+ return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
+}
+
+std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
+ GLintptr buffer_offset,
+ Tegra::GPUVAddr gpu_addr,
+ size_t size, size_t alignment) {
+ std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
+ GLintptr uploaded_offset = buffer_offset;
+
+ const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
+ const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)};
+ Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+
+ return {buffer_ptr, buffer_offset, uploaded_offset};
+}
+
void RasterizerOpenGL::DrawArrays() {
if (accelerate_draw == AccelDraw::Disabled)
return;
@@ -451,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() {
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply();
size_t buffer_size = CalculateVertexArraysSize();
@@ -461,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() {
}
// Uniform space for the 5 shader stages
- buffer_size = Common::AlignUp<size_t>(buffer_size, 4) +
- sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
+ buffer_size =
+ Common::AlignUp<size_t>(buffer_size, 4) +
+ (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
+
+ // Add space for at least 18 constant buffers
+ buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
u8* buffer_ptr;
GLintptr buffer_offset;
- std::tie(buffer_ptr, buffer_offset) =
- stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);
+ std::tie(buffer_ptr, buffer_offset, std::ignore) =
+ stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
+ u8* buffer_ptr_base = buffer_ptr;
- u8* offseted_buffer;
- std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
-
- offseted_buffer =
- reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
- buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
+ std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
- const boost::optional<VAddr> index_data_addr{
- memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())};
- Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size);
-
- index_buffer_offset = buffer_offset;
- offseted_buffer += index_buffer_size;
- buffer_offset += index_buffer_size;
+ std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
+ buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
}
- offseted_buffer =
- reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
- buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
-
- SetupShaders(offseted_buffer, buffer_offset);
+ std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
- stream_buffer->Unmap();
+ stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -642,45 +627,32 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program,
- u32 current_bindpoint,
- const std::vector<GLShader::ConstBufferEntry>& entries) {
+std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
+ u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program,
+ u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) {
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
- // Reset all buffer draw state for this stage.
- for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
- buffer.bindpoint = 0;
- buffer.enabled = false;
- }
-
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
- auto& buffer_draw_state =
- state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
if (!buffer.enabled) {
continue;
}
- buffer_draw_state.enabled = true;
- buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
-
- boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
-
size_t size = 0;
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
- size = buffer.size * sizeof(float);
+ size = buffer.size;
if (size > MaxConstbufferSize) {
- LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
- MaxConstbufferSize);
+ LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
+ MaxConstbufferSize);
size = MaxConstbufferSize;
}
} else {
@@ -693,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- std::vector<u8> data(size);
- Memory::ReadBlock(*addr, data.data(), data.size());
+ GLintptr const_buffer_offset;
+ std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
+ UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
+ static_cast<size_t>(uniform_buffer_alignment));
- glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo);
- glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
- glBindBuffer(GL_UNIFORM_BUFFER, 0);
+ glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
+ stream_buffer.GetHandle(), const_buffer_offset, size);
// Now configure the bindpoint of the buffer inside the shader
const std::string buffer_name = used_buffer.GetName();
const GLuint index =
glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
if (index != GL_INVALID_INDEX) {
- glUniformBlockBinding(program, index, buffer_draw_state.bindpoint);
+ glUniformBlockBinding(program, index, current_bindpoint + bindpoint);
}
}
state.Apply();
- return current_bindpoint + static_cast<u32>(entries.size());
+ return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6d6d85cc1..74307f626 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <tuple>
#include <utility>
#include <vector>
#include <glad/glad.h>
@@ -21,12 +22,15 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-class EmuWindow;
struct ScreenInfo;
+namespace Core::Frontend {
+class EmuWindow;
+}
+
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
- explicit RasterizerOpenGL(EmuWindow& renderer);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer);
~RasterizerOpenGL() override;
void DrawArrays() override;
@@ -97,9 +101,10 @@ private:
* @param entries Vector describing the buffers that are actually used in the guest shader.
* @returns The next available bindpoint for use in the next shader stage.
*/
- u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
- u32 current_bindpoint,
- const std::vector<GLShader::ConstBufferEntry>& entries);
+ std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
+ u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ GLuint program, u32 current_bindpoint,
+ const std::vector<GLShader::ConstBufferEntry>& entries);
/*
* Configures the current textures to use for the draw command.
@@ -136,7 +141,6 @@ private:
/// Syncs the blend state to match the guest state
void SyncBlendState();
- bool has_ARB_buffer_storage = false;
bool has_ARB_direct_state_access = false;
bool has_ARB_separate_shader_objects = false;
bool has_ARB_vertex_attrib_binding = false;
@@ -145,29 +149,31 @@ private:
RasterizerCacheOpenGL res_cache;
- EmuWindow& emu_window;
+ Core::Frontend::EmuWindow& emu_window;
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
OGLVertexArray sw_vao;
OGLVertexArray hw_vao;
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
- std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
- Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
- ssbos;
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
- std::unique_ptr<OGLStreamBuffer> stream_buffer;
+ OGLStreamBuffer stream_buffer;
OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer;
+ GLint uniform_buffer_alignment;
size_t CalculateVertexArraysSize() const;
std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
- std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
+ std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
+
+ std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
- void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
+ std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
+ Tegra::GPUVAddr gpu_addr, size_t size,
+ size_t alignment = 4);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 9fb734b77..fb7476fb8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -92,13 +92,18 @@ struct FormatTuple {
}
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
- {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
+ {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
+ {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
- false}, // A2B10G10R10
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8
+ false}, // A2B10G10R10U
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
+ {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
@@ -112,16 +117,20 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXN2UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U
+ {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
{GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
{GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
+ {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
{GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
@@ -129,15 +138,21 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
+ {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
+
+ // Depth formats
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
+ false}, // Z16
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // S8Z24
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
- false}, // Z16
+ false}, // S8Z24
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
ComponentType::Float, false}, // Z32FS8
}};
@@ -228,35 +243,73 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
- MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
- MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::RG16>,
- MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>,
- MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>,
- MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>,
- MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>,
+ // clang-format off
+ MortonCopy<true, PixelFormat::ABGR8U>,
+ MortonCopy<true, PixelFormat::ABGR8S>,
+ MortonCopy<true, PixelFormat::ABGR8UI>,
+ MortonCopy<true, PixelFormat::B5G6R5U>,
+ MortonCopy<true, PixelFormat::A2B10G10R10U>,
+ MortonCopy<true, PixelFormat::A1B5G5R5U>,
+ MortonCopy<true, PixelFormat::R8U>,
+ MortonCopy<true, PixelFormat::R8UI>,
+ MortonCopy<true, PixelFormat::RGBA16F>,
+ MortonCopy<true, PixelFormat::RGBA16U>,
+ MortonCopy<true, PixelFormat::RGBA16UI>,
+ MortonCopy<true, PixelFormat::R11FG11FB10F>,
+ MortonCopy<true, PixelFormat::RGBA32UI>,
+ MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::DXN2UNORM>,
+ MortonCopy<true, PixelFormat::DXN2SNORM>,
+ MortonCopy<true, PixelFormat::BC7U>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+ MortonCopy<true, PixelFormat::G8R8U>,
+ MortonCopy<true, PixelFormat::G8R8S>,
+ MortonCopy<true, PixelFormat::BGRA8>,
+ MortonCopy<true, PixelFormat::RGBA32F>,
+ MortonCopy<true, PixelFormat::RG32F>,
+ MortonCopy<true, PixelFormat::R32F>,
+ MortonCopy<true, PixelFormat::R16F>,
+ MortonCopy<true, PixelFormat::R16U>,
+ MortonCopy<true, PixelFormat::R16S>,
+ MortonCopy<true, PixelFormat::R16UI>,
+ MortonCopy<true, PixelFormat::R16I>,
+ MortonCopy<true, PixelFormat::RG16>,
+ MortonCopy<true, PixelFormat::RG16F>,
+ MortonCopy<true, PixelFormat::RG16UI>,
+ MortonCopy<true, PixelFormat::RG16I>,
+ MortonCopy<true, PixelFormat::RG16S>,
+ MortonCopy<true, PixelFormat::RGB32F>,
+ MortonCopy<true, PixelFormat::SRGBA8>,
+ MortonCopy<true, PixelFormat::RG8U>,
+ MortonCopy<true, PixelFormat::RG8S>,
+ MortonCopy<true, PixelFormat::RG32UI>,
+ MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::Z16>,
+ MortonCopy<true, PixelFormat::Z24S8>,
+ MortonCopy<true, PixelFormat::S8Z24>,
MortonCopy<true, PixelFormat::Z32FS8>,
+ // clang-format on
};
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
- MortonCopy<false, PixelFormat::ABGR8>,
- MortonCopy<false, PixelFormat::B5G6R5>,
- MortonCopy<false, PixelFormat::A2B10G10R10>,
- MortonCopy<false, PixelFormat::A1B5G5R5>,
- MortonCopy<false, PixelFormat::R8>,
+ // clang-format off
+ MortonCopy<false, PixelFormat::ABGR8U>,
+ MortonCopy<false, PixelFormat::ABGR8S>,
+ MortonCopy<false, PixelFormat::ABGR8UI>,
+ MortonCopy<false, PixelFormat::B5G6R5U>,
+ MortonCopy<false, PixelFormat::A2B10G10R10U>,
+ MortonCopy<false, PixelFormat::A1B5G5R5U>,
+ MortonCopy<false, PixelFormat::R8U>,
+ MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::RGBA16U>,
+ MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
@@ -269,13 +322,17 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::G8R8>,
+ MortonCopy<false, PixelFormat::G8R8U>,
+ MortonCopy<false, PixelFormat::G8R8S>,
MortonCopy<false, PixelFormat::BGRA8>,
MortonCopy<false, PixelFormat::RGBA32F>,
MortonCopy<false, PixelFormat::RG32F>,
MortonCopy<false, PixelFormat::R32F>,
MortonCopy<false, PixelFormat::R16F>,
- MortonCopy<false, PixelFormat::R16UNORM>,
+ MortonCopy<false, PixelFormat::R16U>,
+ MortonCopy<false, PixelFormat::R16S>,
+ MortonCopy<false, PixelFormat::R16UI>,
+ MortonCopy<false, PixelFormat::R16I>,
MortonCopy<false, PixelFormat::RG16>,
MortonCopy<false, PixelFormat::RG16F>,
MortonCopy<false, PixelFormat::RG16UI>,
@@ -283,11 +340,16 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
MortonCopy<false, PixelFormat::SRGBA8>,
- MortonCopy<false, PixelFormat::Z24S8>,
- MortonCopy<false, PixelFormat::S8Z24>,
+ MortonCopy<false, PixelFormat::RG8U>,
+ MortonCopy<false, PixelFormat::RG8S>,
+ MortonCopy<false, PixelFormat::RG32UI>,
+ MortonCopy<false, PixelFormat::R32UI>,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
+ MortonCopy<false, PixelFormat::Z24S8>,
+ MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32FS8>,
+ // clang-format on
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -407,7 +469,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
- const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)};
+ const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const size_t offset{bpp * (y * width + x)};
@@ -439,7 +501,8 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
ConvertS8Z24ToZ24S8(data, width, height);
break;
- case PixelFormat::G8R8:
+ case PixelFormat::G8R8U:
+ case PixelFormat::G8R8S:
// Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
ConvertG8R8ToR8G8(data, width, height);
break;
@@ -730,8 +793,6 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()};
ASSERT(params.type == new_params.type);
- ASSERT(params.pixel_format == new_params.pixel_format);
- ASSERT(params.component_type == new_params.component_type);
// Create a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 829a76dfe..fc8b44219 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -23,45 +23,62 @@ using PageMap = boost::icl::interval_map<u64, int>;
struct SurfaceParams {
enum class PixelFormat {
- ABGR8 = 0,
- B5G6R5 = 1,
- A2B10G10R10 = 2,
- A1B5G5R5 = 3,
- R8 = 4,
- RGBA16F = 5,
- R11FG11FB10F = 6,
- RGBA32UI = 7,
- DXT1 = 8,
- DXT23 = 9,
- DXT45 = 10,
- DXN1 = 11, // This is also known as BC4
- DXN2UNORM = 12,
- DXN2SNORM = 13,
- BC7U = 14,
- ASTC_2D_4X4 = 15,
- G8R8 = 16,
- BGRA8 = 17,
- RGBA32F = 18,
- RG32F = 19,
- R32F = 20,
- R16F = 21,
- R16UNORM = 22,
- RG16 = 23,
- RG16F = 24,
- RG16UI = 25,
- RG16I = 26,
- RG16S = 27,
- RGB32F = 28,
- SRGBA8 = 29,
+ ABGR8U = 0,
+ ABGR8S = 1,
+ ABGR8UI = 2,
+ B5G6R5U = 3,
+ A2B10G10R10U = 4,
+ A1B5G5R5U = 5,
+ R8U = 6,
+ R8UI = 7,
+ RGBA16F = 8,
+ RGBA16U = 9,
+ RGBA16UI = 10,
+ R11FG11FB10F = 11,
+ RGBA32UI = 12,
+ DXT1 = 13,
+ DXT23 = 14,
+ DXT45 = 15,
+ DXN1 = 16, // This is also known as BC4
+ DXN2UNORM = 17,
+ DXN2SNORM = 18,
+ BC7U = 19,
+ ASTC_2D_4X4 = 20,
+ G8R8U = 21,
+ G8R8S = 22,
+ BGRA8 = 23,
+ RGBA32F = 24,
+ RG32F = 25,
+ R32F = 26,
+ R16F = 27,
+ R16U = 28,
+ R16S = 29,
+ R16UI = 30,
+ R16I = 31,
+ RG16 = 32,
+ RG16F = 33,
+ RG16UI = 34,
+ RG16I = 35,
+ RG16S = 36,
+ RGB32F = 37,
+ SRGBA8 = 38,
+ RG8U = 39,
+ RG8S = 40,
+ RG32UI = 41,
+ R32UI = 42,
MaxColorFormat,
+ // Depth formats
+ Z32F = 43,
+ Z16 = 44,
+
+ MaxDepthFormat,
+
// DepthStencil formats
- Z24S8 = 30,
- S8Z24 = 31,
- Z32F = 32,
- Z16 = 33,
- Z32FS8 = 34,
+ Z24S8 = 45,
+ S8Z24 = 46,
+ Z32FS8 = 47,
MaxDepthStencilFormat,
@@ -99,12 +116,17 @@ struct SurfaceParams {
return 0;
constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
- 1, // ABGR8
- 1, // B5G6R5
- 1, // A2B10G10R10
- 1, // A1B5G5R5
- 1, // R8
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
4, // DXT1
@@ -115,13 +137,17 @@ struct SurfaceParams {
4, // DXN2SNORM
4, // BC7U
4, // ASTC_2D_4X4
- 1, // G8R8
+ 1, // G8R8U
+ 1, // G8R8S
1, // BGRA8
1, // RGBA32F
1, // RG32F
1, // R32F
1, // R16F
- 1, // R16UNORM
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
1, // RG16
1, // RG16F
1, // RG16UI
@@ -129,10 +155,14 @@ struct SurfaceParams {
1, // RG16S
1, // RGB32F
1, // SRGBA8
- 1, // Z24S8
- 1, // S8Z24
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
1, // Z32F
1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
1, // Z32FS8
}};
@@ -145,12 +175,17 @@ struct SurfaceParams {
return 0;
constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8
- 16, // B5G6R5
- 32, // A2B10G10R10
- 16, // A1B5G5R5
- 8, // R8
+ 32, // ABGR8U
+ 32, // ABGR8S
+ 32, // ABGR8UI
+ 16, // B5G6R5U
+ 32, // A2B10G10R10U
+ 16, // A1B5G5R5U
+ 8, // R8U
+ 8, // R8UI
64, // RGBA16F
+ 64, // RGBA16U
+ 64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
64, // DXT1
@@ -161,13 +196,17 @@ struct SurfaceParams {
128, // DXN2SNORM
128, // BC7U
32, // ASTC_2D_4X4
- 16, // G8R8
+ 16, // G8R8U
+ 16, // G8R8S
32, // BGRA8
128, // RGBA32F
64, // RG32F
32, // R32F
16, // R16F
- 16, // R16UNORM
+ 16, // R16U
+ 16, // R16S
+ 16, // R16UI
+ 16, // R16I
32, // RG16
32, // RG16F
32, // RG16UI
@@ -175,10 +214,14 @@ struct SurfaceParams {
32, // RG16S
96, // RGB32F
32, // SRGBA8
- 32, // Z24S8
- 32, // S8Z24
+ 16, // RG8U
+ 16, // RG8S
+ 64, // RG32UI
+ 32, // R32UI
32, // Z32F
16, // Z16
+ 32, // Z24S8
+ 32, // S8Z24
64, // Z32FS8
}};
@@ -214,13 +257,21 @@ struct SurfaceParams {
// gamma.
case Tegra::RenderTargetFormat::RGBA8_SRGB:
case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::ABGR8;
+ return PixelFormat::ABGR8U;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
+ return PixelFormat::ABGR8S;
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
+ return PixelFormat::ABGR8UI;
case Tegra::RenderTargetFormat::BGRA8_UNORM:
return PixelFormat::BGRA8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- return PixelFormat::A2B10G10R10;
+ return PixelFormat::A2B10G10R10U;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
return PixelFormat::RGBA16F;
+ case Tegra::RenderTargetFormat::RGBA16_UNORM:
+ return PixelFormat::RGBA16U;
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
+ return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
return PixelFormat::RGBA32F;
case Tegra::RenderTargetFormat::RG32_FLOAT:
@@ -228,11 +279,13 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return PixelFormat::R11FG11FB10F;
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- return PixelFormat::B5G6R5;
+ return PixelFormat::B5G6R5U;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
case Tegra::RenderTargetFormat::R8_UNORM:
- return PixelFormat::R8;
+ return PixelFormat::R8U;
+ case Tegra::RenderTargetFormat::R8_UINT:
+ return PixelFormat::R8UI;
case Tegra::RenderTargetFormat::RG16_FLOAT:
return PixelFormat::RG16F;
case Tegra::RenderTargetFormat::RG16_UINT:
@@ -243,10 +296,26 @@ struct SurfaceParams {
return PixelFormat::RG16;
case Tegra::RenderTargetFormat::RG16_SNORM:
return PixelFormat::RG16S;
+ case Tegra::RenderTargetFormat::RG8_UNORM:
+ return PixelFormat::RG8U;
+ case Tegra::RenderTargetFormat::RG8_SNORM:
+ return PixelFormat::RG8S;
case Tegra::RenderTargetFormat::R16_FLOAT:
return PixelFormat::R16F;
+ case Tegra::RenderTargetFormat::R16_UNORM:
+ return PixelFormat::R16U;
+ case Tegra::RenderTargetFormat::R16_SNORM:
+ return PixelFormat::R16S;
+ case Tegra::RenderTargetFormat::R16_UINT:
+ return PixelFormat::R16UI;
+ case Tegra::RenderTargetFormat::R16_SINT:
+ return PixelFormat::R16I;
case Tegra::RenderTargetFormat::R32_FLOAT:
return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_UINT:
+ return PixelFormat::R32UI;
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ return PixelFormat::RG32UI;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -258,21 +327,79 @@ struct SurfaceParams {
// TODO(Subv): Properly implement this
switch (format) {
case Tegra::Texture::TextureFormat::A8R8G8B8:
- return PixelFormat::ABGR8;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::ABGR8U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::ABGR8S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::ABGR8UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::B5G6R5:
- return PixelFormat::B5G6R5;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::B5G6R5U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::A2B10G10R10:
- return PixelFormat::A2B10G10R10;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::A2B10G10R10U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::A1B5G5R5:
- return PixelFormat::A1B5G5R5;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::A1B5G5R5U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R8:
- return PixelFormat::R8;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::R8U;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R8UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::G8R8:
- return PixelFormat::G8R8;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::G8R8U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::G8R8S;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
- return PixelFormat::RGBA16F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::RGBA16U;
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RGBA16F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::BF10GF11RF11:
- return PixelFormat::R11FG11FB10F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R11FG11FB10F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -284,23 +411,53 @@ struct SurfaceParams {
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32:
- return PixelFormat::RG32F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RG32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RG32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32_B32:
- return PixelFormat::RGB32F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RGB32F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::R16F;
case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::R16UNORM;
+ return PixelFormat::R16U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::R16S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R16UI;
+ case Tegra::Texture::ComponentType::SINT:
+ return PixelFormat::R16I;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R32:
- return PixelFormat::R32F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::ZF32:
return PixelFormat::Z32F;
+ case Tegra::Texture::TextureFormat::Z16:
+ return PixelFormat::Z16;
case Tegra::Texture::TextureFormat::Z24S8:
return PixelFormat::Z24S8;
case Tegra::Texture::TextureFormat::DXT1:
@@ -376,9 +533,15 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
case Tegra::RenderTargetFormat::R8_UNORM:
case Tegra::RenderTargetFormat::RG16_UNORM:
+ case Tegra::RenderTargetFormat::R16_UNORM:
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ case Tegra::RenderTargetFormat::RG8_UNORM:
+ case Tegra::RenderTargetFormat::RGBA16_UNORM:
return ComponentType::UNorm;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
case Tegra::RenderTargetFormat::RG16_SNORM:
+ case Tegra::RenderTargetFormat::R16_SNORM:
+ case Tegra::RenderTargetFormat::RG8_SNORM:
return ComponentType::SNorm;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
@@ -389,9 +552,16 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::R32_FLOAT:
return ComponentType::Float;
case Tegra::RenderTargetFormat::RGBA32_UINT:
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
case Tegra::RenderTargetFormat::RG16_UINT:
+ case Tegra::RenderTargetFormat::R8_UINT:
+ case Tegra::RenderTargetFormat::R16_UINT:
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ case Tegra::RenderTargetFormat::R32_UINT:
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
return ComponentType::UInt;
case Tegra::RenderTargetFormat::RG16_SINT:
+ case Tegra::RenderTargetFormat::R16_SINT:
return ComponentType::SInt;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
@@ -402,7 +572,7 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8;
+ return PixelFormat::ABGR8U;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -429,6 +599,10 @@ struct SurfaceParams {
return SurfaceType::ColorTexture;
}
+ if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+ return SurfaceType::Depth;
+ }
+
if (static_cast<size_t>(pixel_format) <
static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
return SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 32f06f409..57cf9f213 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -141,6 +141,15 @@ private:
ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
+ case OpCode::Id::SSY: {
+ // The SSY instruction uses a similar encoding as the BRA instruction.
+ ASSERT_MSG(instr.bra.constant_buffer == 0,
+ "Constant buffer SSY is not supported");
+ u32 target = offset + instr.bra.GetBranchTarget();
+ labels.insert(target);
+ // Continue scanning for an exit method.
+ break;
+ }
}
}
}
@@ -347,35 +356,43 @@ public:
* @param reg The register to use as the source value.
*/
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
- std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem);
+ std::string dest = GetOutputAttribute(attribute);
std::string src = GetRegisterAsFloat(reg);
- shader.AddLine(dest + " = " + src + ';');
+
+ if (!dest.empty()) {
+ // Can happen with unknown/unimplemented output attributes, in which case we ignore the
+ // instruction for now.
+ shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+ }
}
/// Generates code representing a uniform (C buffer) register, interpreted as the input type.
- std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
+ std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type,
+ Register::Size size = Register::Size::Word) {
declr_const_buffers[index].MarkAsUsed(index, offset, stage);
std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
std::to_string(offset % 4) + ']';
if (type == GLSLRegister::Type::Float) {
- return value;
+ // Do nothing, default
} else if (type == GLSLRegister::Type::Integer) {
- return "floatBitsToInt(" + value + ')';
+ value = "floatBitsToInt(" + value + ')';
+ } else if (type == GLSLRegister::Type::UnsignedInteger) {
+ value = "floatBitsToUint(" + value + ')';
} else {
UNREACHABLE();
}
+
+ return ConvertIntegerSize(value, size);
}
- std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
+ std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str,
GLSLRegister::Type type) {
- declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
-
- std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " +
- std::to_string(offset) + ") / 4)";
+ declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
- std::string value =
- 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]";
+ std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+ std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+ final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -524,21 +541,21 @@ private:
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
- return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
- case Attribute::Index::Unknown_63:
- // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
- LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63");
- UNREACHABLE();
- break;
+ return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
+ case Attribute::Index::FrontFacing:
+ // TODO(Subv): Find out what the values are for the other elements.
+ ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
+ return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
- if (attribute >= Attribute::Index::Attribute_0) {
+ if (attribute >= Attribute::Index::Attribute_0 &&
+ attribute <= Attribute::Index::Attribute_31) {
declr_input_attribute.insert(attribute);
return "input_attribute_" + std::to_string(index);
}
- LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+ LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute));
UNREACHABLE();
}
@@ -560,6 +577,7 @@ private:
LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
UNREACHABLE();
+ return {};
}
}
@@ -689,10 +707,11 @@ private:
const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
- {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
- {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
- {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
- {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
+ {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
+ {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
+ {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
+ {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
+ {PredCondition::GreaterThanWithNan, ">"},
};
const auto& comparison{PredicateComparisonStrings.find(condition)};
@@ -701,7 +720,8 @@ private:
std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
if (condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan) {
+ condition == PredCondition::NotEqualWithNan ||
+ condition == PredCondition::GreaterThanWithNan) {
predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
}
@@ -727,6 +747,30 @@ private:
return op->second;
}
+ /**
+ * Transforms the input string GLSL operand into one that applies the abs() function and negates
+ * the output if necessary. When both abs and neg are true, the negation will be applied after
+ * taking the absolute value.
+ * @param operand The input operand to take the abs() of, negate, or both.
+ * @param abs Whether to apply the abs() function to the input operand.
+ * @param neg Whether to negate the input operand.
+ * @returns String corresponding to the operand after being transformed by the abs() and
+ * negation operations.
+ */
+ static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) {
+ std::string result = operand;
+
+ if (abs) {
+ result = "abs(" + result + ')';
+ }
+
+ if (neg) {
+ result = "-(" + result + ')';
+ }
+
+ return result;
+ }
+
/*
* Returns whether the instruction at the specified offset is a 'sched' instruction.
* Sched instructions always appear before a sequence of 3 instructions.
@@ -740,28 +784,51 @@ private:
}
void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
- const std::string& op_b) {
+ const std::string& op_b,
+ Tegra::Shader::PredicateResultMode predicate_mode,
+ Tegra::Shader::Pred predicate) {
+ std::string result{};
switch (logic_op) {
case LogicOperation::And: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+ result = '(' + op_a + " & " + op_b + ')';
break;
}
case LogicOperation::Or: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+ result = '(' + op_a + " | " + op_b + ')';
break;
}
case LogicOperation::Xor: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+ result = '(' + op_a + " ^ " + op_b + ')';
break;
}
case LogicOperation::PassB: {
- regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+ result = op_b;
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
UNREACHABLE();
}
+
+ if (dest != Tegra::Shader::Register::ZeroIndex) {
+ regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+ }
+
+ using Tegra::Shader::PredicateResultMode;
+ // Write the predicate value depending on the predicate mode.
+ switch (predicate_mode) {
+ case PredicateResultMode::None:
+ // Do nothing.
+ return;
+ case PredicateResultMode::NotZero:
+ // Set the predicate to true if the result is not zero.
+ SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}",
+ static_cast<u32>(predicate_mode));
+ UNREACHABLE();
+ }
}
void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
@@ -799,6 +866,33 @@ private:
shader.AddLine('}');
}
+ /*
+ * Emits code to push the input target address to the SSY address stack, incrementing the stack
+ * top.
+ */
+ void EmitPushToSSYStack(u32 target) {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;");
+ shader.AddLine("ssy_stack_top++;");
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ /*
+ * Emits code to pop an address from the SSY address stack, setting the jump address to the
+ * popped address and decrementing the stack top.
+ */
+ void EmitPopFromSSYStack() {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack_top--;");
+ shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];");
+ shader.AddLine("break;");
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -828,7 +922,11 @@ private:
ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
"NeverExecute predicate not implemented");
- if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ // Some instructions (like SSY) don't have a predicate field, they are always
+ // unconditionally executed.
+ bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId());
+
+ if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
shader.AddLine("if (" +
GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) +
')');
@@ -839,13 +937,6 @@ private:
switch (opcode->GetType()) {
case OpCode::Type::Arithmetic: {
std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- if (instr.alu.abs_a) {
- op_a = "abs(" + op_a + ')';
- }
-
- if (instr.alu.negate_a) {
- op_a = "-(" + op_a + ')';
- }
std::string op_b;
@@ -860,17 +951,10 @@ private:
}
}
- if (instr.alu.abs_b) {
- op_b = "abs(" + op_b + ')';
- }
-
- if (instr.alu.negate_b) {
- op_b = "-(" + op_b + ')';
- }
-
switch (opcode->GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
+ // MOV does not have neither 'abs' nor 'neg' bits.
regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
break;
}
@@ -878,6 +962,8 @@ private:
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
+ // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+ op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
instr.alu.saturate_d);
break;
@@ -885,11 +971,14 @@ private:
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
instr.alu.saturate_d);
break;
}
case OpCode::Id::MUFU: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
@@ -929,6 +1018,9 @@ private:
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
std::string condition =
GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
std::string parameters = op_a + ',' + op_b;
@@ -942,7 +1034,7 @@ private:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
// Currently RRO is only implemented as a register move.
- // Usage of `abs_b` and `negate_b` here should also be correct.
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
@@ -1079,7 +1171,9 @@ private:
if (instr.alu.lop32i.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+ Tegra::Shader::PredicateResultMode::None,
+ Tegra::Shader::Pred::UnusedIndex);
break;
}
default: {
@@ -1145,16 +1239,14 @@ private:
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
- ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
- ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
-
if (instr.alu.lop.invert_a)
op_a = "~(" + op_a + ')';
if (instr.alu.lop.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+ instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
break;
}
case OpCode::Id::IMNMX_C:
@@ -1219,8 +1311,6 @@ private:
break;
}
case OpCode::Type::Conversion: {
- ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
-
switch (opcode->GetId()) {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
@@ -1232,20 +1322,41 @@ private:
op_a = "abs(" + op_a + ')';
}
+ if (instr.conversion.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
break;
}
- case OpCode::Id::I2F_R: {
+ case OpCode::Id::I2F_R:
+ case OpCode::Id::I2F_C: {
ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
- std::string op_a = regs.GetRegisterAsInteger(
- instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
+
+ std::string op_a{};
+
+ if (instr.is_b_gpr) {
+ op_a =
+ regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed,
+ instr.conversion.src_size);
+ } else {
+ op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ instr.conversion.is_input_signed
+ ? GLSLRegister::Type::Integer
+ : GLSLRegister::Type::UnsignedInteger,
+ instr.conversion.src_size);
+ }
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
+ if (instr.conversion.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
break;
}
@@ -1254,6 +1365,14 @@ private:
ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+ if (instr.conversion.abs_a) {
+ op_a = "abs(" + op_a + ')';
+ }
+
+ if (instr.conversion.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
switch (instr.conversion.f2f.rounding) {
case Tegra::Shader::F2fRoundingOp::None:
break;
@@ -1276,21 +1395,29 @@ private:
break;
}
- if (instr.conversion.abs_a) {
- op_a = "abs(" + op_a + ')';
- }
-
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
break;
}
- case OpCode::Id::F2I_R: {
+ case OpCode::Id::F2I_R:
+ case OpCode::Id::F2I_C: {
ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
- std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+ std::string op_a{};
+
+ if (instr.is_b_gpr) {
+ op_a = regs.GetRegisterAsFloat(instr.gpr20);
+ } else {
+ op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
+ }
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
+ if (instr.conversion.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
switch (instr.conversion.f2i.rounding) {
case Tegra::Shader::F2iRoundingOp::None:
break;
@@ -1338,11 +1465,16 @@ private:
case OpCode::Id::LD_C: {
ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
+ // Add an extra scope and declare the index register inside to prevent
+ // overwriting it in case it is used as an output of the LD instruction.
+ shader.AddLine("{");
+ ++shader.scope;
+
+ shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
+ " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
+
std::string op_a =
- regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
- GLSLRegister::Type::Float);
- std::string op_b =
- regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
+ regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
GLSLRegister::Type::Float);
switch (instr.ld_c.type.Value()) {
@@ -1350,16 +1482,22 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
break;
- case Tegra::Shader::UniformType::Double:
+ case Tegra::Shader::UniformType::Double: {
+ std::string op_b =
+ regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
+ "index", GLSLRegister::Type::Float);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
break;
-
+ }
default:
LOG_CRITICAL(HW_GPU, "Unhandled type: {}",
static_cast<unsigned>(instr.ld_c.type.Value()));
UNREACHABLE();
}
+
+ --shader.scope;
+ shader.AddLine("}");
break;
}
case OpCode::Id::ST_A: {
@@ -1615,6 +1753,99 @@ private:
}
break;
}
+ case OpCode::Type::Xmad: {
+ ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented");
+ ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");
+
+ std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
+ std::string op_b;
+ std::string op_c;
+
+ // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
+ ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented");
+ const bool is_signed{instr.xmad.sign_a == 1};
+
+ bool is_merge{};
+ switch (opcode->GetId()) {
+ case OpCode::Id::XMAD_CR: {
+ is_merge = instr.xmad.merge_56;
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ instr.xmad.sign_b ? GLSLRegister::Type::Integer
+ : GLSLRegister::Type::UnsignedInteger);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ case OpCode::Id::XMAD_RR: {
+ is_merge = instr.xmad.merge_37;
+ op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ case OpCode::Id::XMAD_RC: {
+ op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
+ op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ is_signed ? GLSLRegister::Type::Integer
+ : GLSLRegister::Type::UnsignedInteger);
+ break;
+ }
+ case OpCode::Id::XMAD_IMM: {
+ is_merge = instr.xmad.merge_37;
+ op_b += std::to_string(instr.xmad.imm20_16);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+
+ // TODO(bunnei): Ensure this is right with signed operands
+ if (instr.xmad.high_a) {
+ op_a = "((" + op_a + ") >> 16)";
+ } else {
+ op_a = "((" + op_a + ") & 0xFFFF)";
+ }
+
+ std::string src2 = '(' + op_b + ')'; // Preserve original source 2
+ if (instr.xmad.high_b) {
+ op_b = '(' + src2 + " >> 16)";
+ } else {
+ op_b = '(' + src2 + " & 0xFFFF)";
+ }
+
+ std::string product = '(' + op_a + " * " + op_b + ')';
+ if (instr.xmad.product_shift_left) {
+ product = '(' + product + " << 16)";
+ }
+
+ switch (instr.xmad.mode) {
+ case Tegra::Shader::XmadMode::None:
+ break;
+ case Tegra::Shader::XmadMode::CLo:
+ op_c = "((" + op_c + ") & 0xFFFF)";
+ break;
+ case Tegra::Shader::XmadMode::CHi:
+ op_c = "((" + op_c + ") >> 16)";
+ break;
+ case Tegra::Shader::XmadMode::CBcc:
+ op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
+ break;
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}",
+ static_cast<u32>(instr.xmad.mode.Value()));
+ UNREACHABLE();
+ }
+ }
+
+ std::string sum{'(' + product + " + " + op_c + ')'};
+ if (is_merge) {
+ sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
+ }
+
+ regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
+ break;
+ }
default: {
switch (opcode->GetId()) {
case OpCode::Id::EXIT: {
@@ -1652,7 +1883,15 @@ private:
}
case OpCode::Id::KIL: {
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+
+ // Enclose "discard" in a conditional, so that GLSL compilation does not complain
+ // about unexecuted instructions that may follow this.
+ shader.AddLine("if (true) {");
+ ++shader.scope;
shader.AddLine("discard;");
+ --shader.scope;
+ shader.AddLine("}");
+
break;
}
case OpCode::Id::BRA: {
@@ -1668,16 +1907,25 @@ private:
break;
}
case OpCode::Id::SSY: {
- // The SSY opcode tells the GPU where to re-converge divergent execution paths, we
- // can ignore this when generating GLSL code.
+ // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
+ // sets the target of the jump that the SYNC instruction will make. The SSY opcode
+ // has a similar structure to the BRA opcode.
+ ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
+
+ u32 target = offset + instr.bra.GetBranchTarget();
+ EmitPushToSSYStack(target);
break;
}
- case OpCode::Id::SYNC:
+ case OpCode::Id::SYNC: {
+ // The SYNC opcode jumps to the address previously set by the SSY opcode
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+ EmitPopFromSSYStack();
+ break;
+ }
case OpCode::Id::DEPBAR: {
- // TODO(Subv): Find out if we actually have to care about these instructions or if
+ // TODO(Subv): Find out if we actually have to care about this instruction or if
// the GLSL compiler takes care of that for us.
- LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
+ LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default: {
@@ -1691,7 +1939,7 @@ private:
}
// Close the predicate condition scope.
- if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
--shader.scope;
shader.AddLine('}');
}
@@ -1742,6 +1990,13 @@ private:
} else {
labels.insert(subroutine.begin);
shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
+
+ // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems
+ // unlikely that shaders will use 20 nested SSYs.
+ constexpr u32 SSY_STACK_SIZE = 20;
+ shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];");
+ shader.AddLine("uint ssy_stack_top = 0u;");
+
shader.AddLine("while (true) {");
++shader.scope;
@@ -1757,7 +2012,7 @@ private:
u32 compile_end = CompileRange(label, next_label);
if (compile_end > next_label && compile_end != PROGRAM_END) {
// This happens only when there is a label inside a IF/LOOP block
- shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
+ shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
labels.emplace(compile_end);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 129c777d1..57e0e1726 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -38,6 +38,7 @@ out vec4 position;
layout (std140) uniform vs_config {
vec4 viewport_flip;
+ uvec4 instance_id;
};
void main() {
@@ -90,6 +91,7 @@ out vec4 color;
layout (std140) uniform fs_config {
vec4 viewport_flip;
+ uvec4 instance_id;
};
void main() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 415d42fda..f0886caac 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -37,11 +37,16 @@ void SetShaderUniformBlockBindings(GLuint shader) {
} // namespace Impl
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+ const auto& state = gpu.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
+
+ // We only assign the instance to the first component of the vector, the rest is just padding.
+ instance_id[0] = state.current_instance;
}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 716933a0b..75fa73605 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -24,14 +24,15 @@ void SetShaderUniformBlockBindings(GLuint shader);
} // namespace Impl
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
alignas(16) GLvec4 viewport_flip;
+ alignas(16) GLuvec4 instance_id;
};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 68bacd4c5..1d1975179 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -203,21 +203,6 @@ void OpenGLState::Apply() const {
}
}
- // Constbuffers
- for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) {
- for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
- const auto& current = cur_state.draw.const_buffers[stage][buffer_id];
- const auto& new_state = draw.const_buffers[stage][buffer_id];
-
- if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
- current.ssbo != new_state.ssbo) {
- if (new_state.enabled) {
- glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
- }
- }
- }
- }
-
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 5c7b636e4..bdb02ba25 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -119,12 +119,6 @@ public:
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
- struct ConstBufferConfig {
- bool enabled = false;
- GLuint bindpoint;
- GLuint ssbo;
- };
- std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
} draw;
struct {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index a2713e9f0..03a8ed8b7 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,174 +9,91 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-class OrphanBuffer : public OGLStreamBuffer {
-public:
- explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {}
- ~OrphanBuffer() override;
-
-private:
- void Create(size_t size, size_t sync_subdivide) override;
- void Release() override;
-
- std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
- void Unmap() override;
-
- std::vector<u8> data;
-};
-
-class StorageBuffer : public OGLStreamBuffer {
-public:
- explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
- ~StorageBuffer() override;
-
-private:
- void Create(size_t size, size_t sync_subdivide) override;
- void Release() override;
-
- std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
- void Unmap() override;
-
- struct Fence {
- OGLSync sync;
- size_t offset;
- };
- std::deque<Fence> head;
- std::deque<Fence> tail;
-
- u8* mapped_ptr;
-};
-
-OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
- gl_target = target;
-}
-
-GLuint OGLStreamBuffer::GetHandle() const {
- return gl_buffer.handle;
-}
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
+ : gl_target(target), buffer_size(size) {
+ gl_buffer.Create();
+ glBindBuffer(gl_target, gl_buffer.handle);
-std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) {
- if (storage_buffer) {
- return std::make_unique<StorageBuffer>(target);
+ GLsizeiptr allocate_size = size;
+ if (target == GL_ARRAY_BUFFER) {
+ // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
+ // read position is near the end and is an out-of-bound access to the vertex buffer. This is
+ // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
+ // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
+ // crash.
+ allocate_size *= 2;
}
- return std::make_unique<OrphanBuffer>(target);
-}
-OrphanBuffer::~OrphanBuffer() {
- Release();
+ if (GLAD_GL_ARB_buffer_storage) {
+ persistent = true;
+ coherent = prefer_coherent;
+ GLbitfield flags =
+ GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
+ glBufferStorage(gl_target, allocate_size, nullptr, flags);
+ mapped_ptr = static_cast<u8*>(glMapBufferRange(
+ gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
+ } else {
+ glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
+ }
}
-void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) {
- buffer_pos = 0;
- buffer_size = size;
- data.resize(buffer_size);
-
- if (gl_buffer.handle == 0) {
- gl_buffer.Create();
+OGLStreamBuffer::~OGLStreamBuffer() {
+ if (persistent) {
glBindBuffer(gl_target, gl_buffer.handle);
+ glUnmapBuffer(gl_target);
}
-
- glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
-}
-
-void OrphanBuffer::Release() {
gl_buffer.Release();
}
-std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) {
- buffer_pos = Common::AlignUp(buffer_pos, alignment);
-
- if (buffer_pos + size > buffer_size) {
- Create(std::max(buffer_size, size), 0);
- }
-
- mapped_size = size;
- return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
-}
-
-void OrphanBuffer::Unmap() {
- glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
- static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
- buffer_pos += mapped_size;
-}
-
-StorageBuffer::~StorageBuffer() {
- Release();
+GLuint OGLStreamBuffer::GetHandle() const {
+ return gl_buffer.handle;
}
-void StorageBuffer::Create(size_t size, size_t sync_subdivide) {
- if (gl_buffer.handle != 0)
- return;
-
- buffer_pos = 0;
- buffer_size = size;
- buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
-
- gl_buffer.Create();
- glBindBuffer(gl_target, gl_buffer.handle);
-
- glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
- mapped_ptr = reinterpret_cast<u8*>(
- glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
+GLsizeiptr OGLStreamBuffer::GetSize() const {
+ return buffer_size;
}
-void StorageBuffer::Release() {
- if (gl_buffer.handle == 0)
- return;
-
- glUnmapBuffer(gl_target);
-
- gl_buffer.Release();
- head.clear();
- tail.clear();
-}
-
-std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
+std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= buffer_size);
+ ASSERT(alignment <= buffer_size);
+ mapped_size = size;
- OGLSync sync;
-
- buffer_pos = Common::AlignUp(buffer_pos, alignment);
- size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
-
- if (!head.empty() &&
- (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
- ASSERT(head.back().sync.handle == 0);
- head.back().sync.Create();
+ if (alignment > 0) {
+ buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
}
+ bool invalidate = false;
if (buffer_pos + size > buffer_size) {
- if (!tail.empty()) {
- std::swap(sync, tail.back().sync);
- tail.clear();
- }
- std::swap(tail, head);
buffer_pos = 0;
- effective_offset = 0;
- }
+ invalidate = true;
- while (!tail.empty() && buffer_pos + size > tail.front().offset) {
- std::swap(sync, tail.front().sync);
- tail.pop_front();
+ if (persistent) {
+ glUnmapBuffer(gl_target);
+ }
}
- if (sync.handle != 0) {
- glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
- sync.Release();
+ if (invalidate | !persistent) {
+ GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
+ (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
+ (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
+ mapped_ptr = static_cast<u8*>(
+ glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
+ mapped_offset = buffer_pos;
}
- if (head.empty() || effective_offset > head.back().offset) {
- head.emplace_back();
- head.back().offset = effective_offset;
+ return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
+}
+
+void OGLStreamBuffer::Unmap(GLsizeiptr size) {
+ ASSERT(size <= mapped_size);
+
+ if (!coherent && size > 0) {
+ glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
}
- mapped_size = size;
- return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos));
-}
+ if (!persistent) {
+ glUnmapBuffer(gl_target);
+ }
-void StorageBuffer::Unmap() {
- glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
- static_cast<GLsizeiptr>(mapped_size));
- buffer_pos += mapped_size;
+ buffer_pos += size;
}
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index e78dc5784..45592daaf 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -2,35 +2,41 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#pragma once
-
-#include <memory>
+#include <tuple>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(GLenum target);
- virtual ~OGLStreamBuffer() = default;
-
-public:
- static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
-
- virtual void Create(size_t size, size_t sync_subdivide) = 0;
- virtual void Release() {}
+ explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
+ ~OGLStreamBuffer();
GLuint GetHandle() const;
+ GLsizeiptr GetSize() const;
+
+ /*
+ * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
+ * and the optional alignment requirement.
+ * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
+ * The return values are the pointer to the new chunk, the offset within the buffer,
+ * and the invalidation flag for previous chunks.
+ * The actual used size must be specified on unmapping the chunk.
+ */
+ std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
- virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0;
- virtual void Unmap() = 0;
+ void Unmap(GLsizeiptr size);
-protected:
+private:
OGLBuffer gl_buffer;
GLenum gl_target;
- size_t buffer_pos = 0;
- size_t buffer_size = 0;
- size_t buffer_sync_subdivide = 0;
- size_t mapped_size = 0;
+ bool coherent = false;
+ bool persistent = false;
+
+ GLintptr buffer_pos = 0;
+ GLsizeiptr buffer_size = 0;
+ GLintptr mapped_offset = 0;
+ GLsizeiptr mapped_size = 0;
+ u8* mapped_ptr = nullptr;
};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index c439446b1..5d91a0c2f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,15 +24,25 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_UNSIGNED_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
}
@@ -42,16 +52,25 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return {};
}
+ case Maxwell::VertexAttribute::Type::SignedInt:
case Maxwell::VertexAttribute::Type::SignedNorm: {
switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return GL_INT;
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
}
@@ -61,9 +80,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return {};
}
- case Maxwell::VertexAttribute::Type::UnsignedInt:
- return GL_UNSIGNED_INT;
-
case Maxwell::VertexAttribute::Type::Float:
return GL_FLOAT;
}
@@ -91,6 +107,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return GL_POINTS;
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return GL_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
@@ -129,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
// GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
+ case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+ return GL_MIRROR_CLAMP_TO_EDGE;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
UNREACHABLE();
@@ -156,42 +176,61 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
+ case Maxwell::Blend::Factor::ZeroGL:
return GL_ZERO;
case Maxwell::Blend::Factor::One:
+ case Maxwell::Blend::Factor::OneGL:
return GL_ONE;
case Maxwell::Blend::Factor::SourceColor:
+ case Maxwell::Blend::Factor::SourceColorGL:
return GL_SRC_COLOR;
case Maxwell::Blend::Factor::OneMinusSourceColor:
+ case Maxwell::Blend::Factor::OneMinusSourceColorGL:
return GL_ONE_MINUS_SRC_COLOR;
case Maxwell::Blend::Factor::SourceAlpha:
+ case Maxwell::Blend::Factor::SourceAlphaGL:
return GL_SRC_ALPHA;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+ case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
return GL_ONE_MINUS_SRC_ALPHA;
case Maxwell::Blend::Factor::DestAlpha:
+ case Maxwell::Blend::Factor::DestAlphaGL:
return GL_DST_ALPHA;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
+ case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
return GL_ONE_MINUS_DST_ALPHA;
case Maxwell::Blend::Factor::DestColor:
+ case Maxwell::Blend::Factor::DestColorGL:
return GL_DST_COLOR;
case Maxwell::Blend::Factor::OneMinusDestColor:
+ case Maxwell::Blend::Factor::OneMinusDestColorGL:
return GL_ONE_MINUS_DST_COLOR;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
+ case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
return GL_SRC_ALPHA_SATURATE;
case Maxwell::Blend::Factor::Source1Color:
+ case Maxwell::Blend::Factor::Source1ColorGL:
return GL_SRC1_COLOR;
case Maxwell::Blend::Factor::OneMinusSource1Color:
+ case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
return GL_ONE_MINUS_SRC1_COLOR;
case Maxwell::Blend::Factor::Source1Alpha:
+ case Maxwell::Blend::Factor::Source1AlphaGL:
return GL_SRC1_ALPHA;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+ case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
return GL_ONE_MINUS_SRC1_ALPHA;
case Maxwell::Blend::Factor::ConstantColor:
+ case Maxwell::Blend::Factor::ConstantColorGL:
return GL_CONSTANT_COLOR;
case Maxwell::Blend::Factor::OneMinusConstantColor:
+ case Maxwell::Blend::Factor::OneMinusConstantColorGL:
return GL_ONE_MINUS_CONSTANT_COLOR;
case Maxwell::Blend::Factor::ConstantAlpha:
+ case Maxwell::Blend::Factor::ConstantAlphaGL:
return GL_CONSTANT_ALPHA;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+ case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 899865e3b..bf30eda6d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -18,7 +18,6 @@
#include "core/tracer/recorder.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/utils.h"
-#include "video_core/video_core.h"
static const char vertex_shader[] = R"(
#version 150 core
@@ -92,7 +91,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
-ScopeAcquireGLContext::ScopeAcquireGLContext(EmuWindow& emu_window_) : emu_window{emu_window_} {
+ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_)
+ : emu_window{emu_window_} {
if (Settings::values.use_multi_core) {
emu_window.MakeCurrent();
}
@@ -103,7 +103,9 @@ ScopeAcquireGLContext::~ScopeAcquireGLContext() {
}
}
-RendererOpenGL::RendererOpenGL(EmuWindow& window) : VideoCore::RendererBase{window} {}
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
+ : VideoCore::RendererBase{window} {}
+
RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
@@ -423,7 +425,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
- LOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message);
+ LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 428afa3b7..a5eab6997 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,9 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
+namespace Core::Frontend {
class EmuWindow;
+}
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
@@ -34,16 +36,16 @@ struct ScreenInfo {
/// Helper class to acquire/release OpenGL context within a given scope
class ScopeAcquireGLContext : NonCopyable {
public:
- explicit ScopeAcquireGLContext(EmuWindow& window);
+ explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window);
~ScopeAcquireGLContext();
private:
- EmuWindow& emu_window;
+ Core::Frontend::EmuWindow& emu_window;
};
class RendererOpenGL : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(EmuWindow& window);
+ explicit RendererOpenGL(Core::Frontend::EmuWindow& window);
~RendererOpenGL() override;
/// Swap buffers (render frame)
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 5085ef96b..6780d1c16 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -9,9 +9,7 @@
namespace VideoCore {
-std::atomic<bool> g_toggle_framelimit_enabled;
-
-std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window) {
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) {
return std::make_unique<RendererOpenGL>(emu_window);
}
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 7c01c0b8d..f79f85dfe 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -4,27 +4,22 @@
#pragma once
-#include <atomic>
#include <memory>
+namespace Core::Frontend {
class EmuWindow;
+}
namespace VideoCore {
class RendererBase;
-enum class Renderer { Software, OpenGL };
-
-// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
-// qt ui)
-extern std::atomic<bool> g_toggle_framelimit_enabled;
-
/**
* Creates a renderer instance.
*
* @note The returned renderer instance is simply allocated. Its Init()
* function still needs to be called to fully complete its setup.
*/
-std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window);
} // namespace VideoCore