aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp141
-rw-r--r--src/video_core/renderer_opengl/gl_device.h32
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp57
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp353
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h32
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp368
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp405
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp78
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h19
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_state.h18
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp186
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h22
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp232
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h22
-rw-r--r--src/video_core/renderer_opengl/utils.cpp32
-rw-r--r--src/video_core/renderer_opengl/utils.h18
23 files changed, 1166 insertions, 1128 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c65b24c69..1a2e2a9f7 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,7 +5,10 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstring>
+#include <optional>
#include <vector>
+
#include <glad/glad.h>
#include "common/logging/log.h"
@@ -17,6 +20,30 @@ namespace OpenGL {
namespace {
+// One uniform block is reserved for emulation purposes
+constexpr u32 ReservedUniformBlocks = 1;
+
+constexpr u32 NumStages = 5;
+
+constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
+ GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
+ GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
+
+constexpr std::array LimitSSBOs = {
+ GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
+ GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
+
+constexpr std::array LimitSamplers = {
+ GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS};
+
+constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
+ GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
+ GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
+ GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
+
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
@@ -48,13 +75,73 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
return std::find(images.begin(), images.end(), extension) != images.end();
}
+u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
+ ASSERT(num >= amount);
+ if (limit) {
+ amount = std::min(amount, GetInteger<u32>(*limit));
+ }
+ num -= amount;
+ return std::exchange(base, base + amount);
+}
+
+std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
+ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
+
+ static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4};
+ const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
+ const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
+ const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
+
+ u32 num_ubos = total_ubos - ReservedUniformBlocks;
+ u32 num_ssbos = total_ssbos;
+ u32 num_samplers = total_samplers;
+
+ u32 base_ubo = ReservedUniformBlocks;
+ u32 base_ssbo = 0;
+ u32 base_samplers = 0;
+
+ for (std::size_t i = 0; i < NumStages; ++i) {
+ const std::size_t stage = stage_swizzle[i];
+ bindings[stage] = {
+ Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
+ Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
+ Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
+ }
+
+ u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
+ u32 base_images = 0;
+
+ // Reserve more image bindings on fragment and vertex stages.
+ bindings[4].image =
+ Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
+ bindings[0].image =
+ Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
+
+ // Reserve the other image bindings.
+ const u32 total_extracted_images = num_images / (NumStages - 2);
+ for (std::size_t i = 2; i < NumStages; ++i) {
+ const std::size_t stage = stage_swizzle[i];
+ bindings[stage].image =
+ Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
+ }
+
+ // Compute doesn't care about any of this.
+ bindings[5] = {0, 0, 0, 0};
+
+ return bindings;
+}
+
} // Anonymous namespace
-Device::Device() {
+Device::Device() : base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+ const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
+ const bool is_amd = vendor == "ATI Technologies Inc.";
+ const bool is_intel = vendor == "Intel";
+ const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -62,11 +149,13 @@ Device::Device() {
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
+ has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_variable_aoffi = TestVariableAoffi();
- has_component_indexing_bug = TestComponentIndexingBug();
+ has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
+ has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
@@ -79,10 +168,12 @@ Device::Device(std::nullptr_t) {
max_vertex_attributes = 16;
max_varyings = 15;
has_warp_intrinsics = true;
+ has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
+ has_broken_compute = false;
has_precise_bug = false;
}
@@ -97,52 +188,6 @@ void main() {
})");
}
-bool Device::TestComponentIndexingBug() {
- const GLchar* COMPONENT_TEST = R"(#version 430 core
-layout (std430, binding = 0) buffer OutputBuffer {
- uint output_value;
-};
-layout (std140, binding = 0) uniform InputBuffer {
- uvec4 input_value[4096];
-};
-layout (location = 0) uniform uint idx;
-void main() {
- output_value = input_value[idx >> 2][idx & 3];
-})";
- const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
- SCOPE_EXIT({ glDeleteProgram(shader); });
- glUseProgram(shader);
-
- OGLVertexArray vao;
- vao.Create();
- glBindVertexArray(vao.handle);
-
- constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
- OGLBuffer ubo;
- ubo.Create();
- glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
- glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
-
- OGLBuffer ssbo;
- ssbo.Create();
- glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
-
- for (GLuint index = 4; index < 8; ++index) {
- glInvalidateBufferData(ssbo.handle);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
-
- glProgramUniform1ui(shader, 0, index);
- glDrawArrays(GL_POINTS, 0, 1);
-
- GLuint result;
- glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
- if (result != values.at(index)) {
- return true;
- }
- }
- return false;
-}
-
bool Device::TestPreciseBug() {
return !TestProgram(R"(#version 430 core
in vec3 coords;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bf35bd0b6..d73b099d0 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,14 +6,32 @@
#include <cstddef>
#include "common/common_types.h"
+#include "video_core/engines/shader_type.h"
namespace OpenGL {
-class Device {
+static constexpr u32 EmulationUniformBlockBinding = 0;
+
+class Device final {
public:
+ struct BaseBindings final {
+ u32 uniform_buffer{};
+ u32 shader_storage_buffer{};
+ u32 sampler{};
+ u32 image{};
+ };
+
explicit Device();
explicit Device(std::nullptr_t);
+ const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
+ return base_bindings[stage_index];
+ }
+
+ const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
+ return GetBaseBindings(static_cast<std::size_t>(shader_type));
+ }
+
std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
@@ -34,6 +52,10 @@ public:
return has_warp_intrinsics;
}
+ bool HasShaderBallot() const {
+ return has_shader_ballot;
+ }
+
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
@@ -54,25 +76,31 @@ public:
return has_precise_bug;
}
+ bool HasBrokenCompute() const {
+ return has_broken_compute;
+ }
+
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
private:
static bool TestVariableAoffi();
- static bool TestComponentIndexingBug();
static bool TestPreciseBug();
+ std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_warp_intrinsics{};
+ bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_broken_compute{};
bool has_fast_buffer_sub_data{};
};
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index a5d69d78d..874ed3c6e 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -3,9 +3,12 @@
// Refer to the license.txt file included.
#include <tuple>
+#include <unordered_map>
+#include <utility>
-#include "common/cityhash.h"
-#include "common/scope_exit.h"
+#include <glad/glad.h>
+
+#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -13,6 +16,7 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using VideoCore::Surface::SurfaceType;
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
@@ -35,36 +39,49 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
local_state.draw.draw_framebuffer = framebuffer.handle;
local_state.ApplyFramebufferState();
+ if (key.zeta) {
+ const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
+ const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+ key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
+ }
+
+ std::size_t num_buffers = 0;
+ std::array<GLenum, Maxwell::NumRenderTargets> targets;
+
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (key.colors[index]) {
- key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
- GL_DRAW_FRAMEBUFFER);
+ if (!key.colors[index]) {
+ targets[index] = GL_NONE;
+ continue;
}
+ const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
+ key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
+
+ const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
+ targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
+ num_buffers = index + 1;
}
- if (key.colors_count) {
- glDrawBuffers(key.colors_count, key.color_attachments.data());
+
+ if (num_buffers > 0) {
+ glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
} else {
glDrawBuffer(GL_NONE);
}
- if (key.zeta) {
- key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
- GL_DRAW_FRAMEBUFFER);
- }
-
return framebuffer;
}
-std::size_t FramebufferCacheKey::Hash() const {
- static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
- return static_cast<std::size_t>(
- Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+std::size_t FramebufferCacheKey::Hash() const noexcept {
+ std::size_t hash = std::hash<View>{}(zeta);
+ for (const auto& color : colors) {
+ hash ^= std::hash<View>{}(color);
+ }
+ hash ^= static_cast<std::size_t>(color_attachments) << 16;
+ return hash;
}
-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
- return std::tie(stencil_enable, colors_count, color_attachments, colors, zeta) ==
- std::tie(rhs.stencil_enable, rhs.colors_count, rhs.color_attachments, rhs.colors,
- rhs.zeta);
+bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
+ return std::tie(colors, zeta, color_attachments) ==
+ std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 424344c48..02ec80ae9 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -18,21 +18,24 @@
namespace OpenGL {
-struct alignas(sizeof(u64)) FramebufferCacheKey {
- bool stencil_enable = false;
- u16 colors_count = 0;
+constexpr std::size_t BitsPerAttachment = 4;
- std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
- std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+struct FramebufferCacheKey {
View zeta;
+ std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+ u32 color_attachments = 0;
- std::size_t Hash() const;
+ std::size_t Hash() const noexcept;
- bool operator==(const FramebufferCacheKey& rhs) const;
+ bool operator==(const FramebufferCacheKey& rhs) const noexcept;
- bool operator!=(const FramebufferCacheKey& rhs) const {
+ bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
+
+ void SetAttachment(std::size_t index, u32 attachment) {
+ color_attachments |= attachment << (BitsPerAttachment * index);
+ }
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e560d70d5..672051102 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -19,9 +19,11 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
+#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -49,8 +51,25 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
-static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry) {
+namespace {
+
+template <typename Engine, typename Entry>
+Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
+ Tegra::Engines::ShaderType shader_type) {
+ if (entry.IsBindless()) {
+ const Tegra::Texture::TextureHandle tex_handle =
+ engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
+ return engine.GetTextureInfo(tex_handle);
+ }
+ if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
+ return engine.GetStageTexture(shader_type, entry.GetOffset());
+ } else {
+ return engine.GetTexture(entry.GetOffset());
+ }
+}
+
+std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
@@ -64,14 +83,16 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
return buffer.size;
}
+} // Anonymous namespace
+
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
- : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
- system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+ : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
+ shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
+ buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
- clear_framebuffer.Create();
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
@@ -238,12 +259,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
- BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
- const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
+ const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
@@ -251,31 +271,34 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
+ case Maxwell::ShaderProgram::Fragment:
+ shader_program_manager->UseTrivialFragmentShader();
+ break;
default:
break;
}
continue;
}
- const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
-
- GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu, stage);
- const auto [buffer, offset] =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-
- // Bind the emulation info buffer
- bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
+ // Currently this stages are not supported in the OpenGL backend.
+ // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+ if (program == Maxwell::ShaderProgram::TesselationControl) {
+ continue;
+ } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+ continue;
+ }
Shader shader{shader_cache.GetStageProgram(program)};
- const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupDrawConstBuffers(stage_enum, shader);
- SetupDrawGlobalMemory(stage_enum, shader);
- const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
+ // Stage indices are 0 - 5
+ const std::size_t stage = index == 0 ? 0 : index - 1;
+ SetupDrawConstBuffers(stage, shader);
+ SetupDrawGlobalMemory(stage, shader);
+ SetupDrawTextures(stage, shader);
+ SetupDrawImages(stage, shader);
- const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
- const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
+ const ProgramVariant variant(primitive_mode);
+ const auto program_handle = shader->GetHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -304,10 +327,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
- index++;
+ ++index;
}
-
- base_bindings = next_bindings;
}
SyncClipEnabled(clip_distances);
@@ -362,78 +383,58 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
- FramebufferCacheKey fbkey;
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ FramebufferCacheKey key;
+ const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
+ for (std::size_t index = 0; index < colors_count; ++index) {
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
-
- if (color_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(index);
+ if (!color_surface) {
+ continue;
}
+ // Assume that a surface will be written to if it is used as a framebuffer, even
+ // if the shader doesn't actually write to it.
+ texture_cache.MarkColorBufferInUse(index);
- fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
- fbkey.colors[index] = std::move(color_surface);
+ key.SetAttachment(index, regs.rt_control.GetMap(index));
+ key.colors[index] = std::move(color_surface);
}
- fbkey.colors_count = regs.rt_control.count;
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
texture_cache.MarkDepthBufferInUse();
-
- fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
- fbkey.zeta = std::move(depth_surface);
+ key.zeta = std::move(depth_surface);
}
texture_cache.GuardRenderTargets(false);
- state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
+ state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
SyncViewport(state);
}
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
bool using_depth_fb, bool using_stencil_fb) {
+ using VideoCore::Surface::SurfaceType;
+
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
texture_cache.GuardRenderTargets(true);
- View color_surface{};
+ View color_surface;
if (using_color_fb) {
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
}
- View depth_surface{};
+ View depth_surface;
if (using_depth_fb || using_stencil_fb) {
depth_surface = texture_cache.GetDepthBufferSurface(false);
}
texture_cache.GuardRenderTargets(false);
- current_state.draw.draw_framebuffer = clear_framebuffer.handle;
- current_state.ApplyFramebufferState();
-
- if (color_surface) {
- color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
- } else {
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- }
+ FramebufferCacheKey key;
+ key.colors[0] = color_surface;
+ key.zeta = depth_surface;
- if (depth_surface) {
- const auto& params = depth_surface->GetSurfaceParams();
- switch (params.type) {
- case VideoCore::Surface::SurfaceType::Depth:
- depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- break;
- case VideoCore::Surface::SurfaceType::DepthStencil:
- depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
- break;
- default:
- UNIMPLEMENTED();
- }
- } else {
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- }
+ current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
+ current_state.ApplyFramebufferState();
}
void RasterizerOpenGL::Clear() {
@@ -516,6 +517,7 @@ void RasterizerOpenGL::Clear() {
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
SyncViewport(clear_state);
+ SyncRasterizeEnable(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -543,6 +545,7 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::DrawPrelude() {
auto& gpu = system.GPU().Maxwell3D();
+ SyncRasterizeEnable(state);
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -592,8 +595,16 @@ void RasterizerOpenGL::DrawPrelude() {
index_buffer_offset = SetupIndexBuffer();
// Prepare packed bindings.
- bind_ubo_pushbuffer.Setup(0);
- bind_ssbo_pushbuffer.Setup(0);
+ bind_ubo_pushbuffer.Setup();
+ bind_ssbo_pushbuffer.Setup();
+
+ // Setup emulation uniform buffer.
+ GLShader::MaxwellUniformData ubo;
+ ubo.SetFromRegs(gpu);
+ const auto [buffer, offset] =
+ buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
+ bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
@@ -726,19 +737,21 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- if (!GLAD_GL_ARB_compute_variable_group_size) {
- LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
- "lack of GL_ARB_compute_variable_group_size");
+ if (device.HasBrokenCompute()) {
return;
}
+ buffer_cache.Acquire();
+
auto kernel = shader_cache.GetComputeKernel(code_addr);
- ProgramVariant variant;
- variant.texture_buffer_usage = SetupComputeTextures(kernel);
+ SetupComputeTextures(kernel);
SetupComputeImages(kernel);
- const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
- state.draw.shader_program = program;
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
+ launch_desc.block_dim_z, launch_desc.shared_alloc,
+ launch_desc.local_pos_alloc);
+ state.draw.shader_program = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;
const std::size_t buffer_size =
@@ -746,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
- bind_ubo_pushbuffer.Setup(0);
- bind_ssbo_pushbuffer.Setup(0);
+ bind_ubo_pushbuffer.Setup();
+ bind_ssbo_pushbuffer.Setup();
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
@@ -762,10 +775,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
state.ApplyShaderProgram();
state.ApplyProgramPipeline();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
- launch_desc.grid_dim_z, launch_desc.block_dim_x,
- launch_desc.block_dim_y, launch_desc.block_dim_z);
+ glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
}
void RasterizerOpenGL::FlushAll() {}
@@ -821,7 +831,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto surface{
- texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
+ texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}
@@ -834,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
if (params.pixel_format != pixel_format) {
- LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
+ LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
}
screen_info.display_texture = surface->GetTexture();
@@ -843,20 +853,23 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
- const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
+ const auto& shader_stage = stages[stage_index];
+
+ u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
- SetupConstBuffer(buffer, entry);
+ SetupConstBuffer(binding++, buffer, entry);
}
}
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+
+ u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -864,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(buffer, entry);
+ SetupConstBuffer(binding++, buffer, entry);
}
}
-void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
- bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
+ bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
+ sizeof(float));
return;
}
@@ -883,19 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData());
- bind_ubo_pushbuffer.Push(cbuf, offset, size);
+ bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
}
-void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
+
+ u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(entry, gpu_addr, size);
+ SetupGlobalMemory(binding++, entry, gpu_addr, size);
}
}
@@ -903,120 +918,82 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+
+ u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(entry, gpu_addr, size);
+ SetupGlobalMemory(binding++, entry, gpu_addr, size);
}
}
-void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
+void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
- bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
+ bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
}
-TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage,
- const Shader& shader,
- BaseBindings base_bindings) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& gpu = system.GPU();
- const auto& maxwell3d = gpu.Maxwell3D();
- const auto& entries = shader->GetShaderEntries().samplers;
-
- ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
- "Exceeded the number of active textures.");
-
- TextureBufferUsage texture_buffer_usage{0};
-
- for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& entry = entries[bindpoint];
- const auto texture = [&] {
- if (!entry.IsBindless()) {
- return maxwell3d.GetStageTexture(stage, entry.GetOffset());
- }
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
- const Tegra::Texture::TextureHandle tex_handle =
- maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
- return maxwell3d.GetTextureInfo(tex_handle);
- }();
-
- if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
- texture_buffer_usage.set(bindpoint);
- }
+ const auto& maxwell3d = system.GPU().Maxwell3D();
+ u32 binding = device.GetBaseBindings(stage_index).sampler;
+ for (const auto& entry : shader->GetShaderEntries().samplers) {
+ const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+ const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+ SetupTexture(binding++, texture, entry);
}
-
- return texture_buffer_usage;
}
-TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
- const auto& entries = kernel->GetShaderEntries().samplers;
-
- ASSERT_MSG(entries.size() <= std::size(state.textures),
- "Exceeded the number of active textures.");
-
- TextureBufferUsage texture_buffer_usage{0};
-
- for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& entry = entries[bindpoint];
- const auto texture = [&] {
- if (!entry.IsBindless()) {
- return compute.GetTexture(entry.GetOffset());
- }
- const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
- Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
- return compute.GetTextureInfo(tex_handle);
- }();
-
- if (SetupTexture(bindpoint, texture, entry)) {
- texture_buffer_usage.set(bindpoint);
- }
+ u32 binding = 0;
+ for (const auto& entry : kernel->GetShaderEntries().samplers) {
+ const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+ SetupTexture(binding++, texture, entry);
}
-
- return texture_buffer_usage;
}
-bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry) {
- state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
-
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
+ state.samplers[binding] = 0;
state.textures[binding] = 0;
- return false;
+ return;
}
state.textures[binding] = view->GetTexture();
if (view->GetSurfaceParams().IsBuffer()) {
- return true;
+ return;
}
+ state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
// Apply swizzle to textures that are not buffers.
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
- return false;
+}
+
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
+ const auto& maxwell3d = system.GPU().Maxwell3D();
+ u32 binding = device.GetBaseBindings(stage_index).image;
+ for (const auto& entry : shader->GetShaderEntries().images) {
+ const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+ const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
+ SetupImage(binding++, tic, entry);
+ }
}
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& compute = system.GPU().KeplerCompute();
- const auto& entries = shader->GetShaderEntries().images;
- for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& entry = entries[bindpoint];
- const auto tic = [&] {
- if (!entry.IsBindless()) {
- return compute.GetTexture(entry.GetOffset()).tic;
- }
- const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
- Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
- return compute.GetTextureInfo(tex_handle).tic;
- }();
- SetupImage(bindpoint, tic, entry);
+ u32 binding = 0;
+ for (const auto& entry : shader->GetShaderEntries().images) {
+ const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
+ SetupImage(binding++, tic, entry);
}
}
@@ -1055,6 +1032,19 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
+
+ bool flip_y = false;
+ if (regs.viewport_transform[0].scale_y < 0.0) {
+ flip_y = !flip_y;
+ }
+ if (regs.screen_y_control.y_negate != 0) {
+ flip_y = !flip_y;
+ }
+ state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+ state.clip_control.depth_mode =
+ regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+ ? GL_ZERO_TO_ONE
+ : GL_NEGATIVE_ONE_TO_ONE;
}
void RasterizerOpenGL::SyncClipEnabled(
@@ -1077,28 +1067,14 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& maxwell3d = system.GPU().Maxwell3D();
-
- const auto& regs = maxwell3d.regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
- state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
-
- const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
- regs.viewport_transform[0].scale_y < 0.0f};
-
- // If the GPU is configured to flip the rasterized triangles, then we need to flip the
- // notion of front and back. Note: We flip the triangles when the value of the register is 0
- // because OpenGL already does it for us.
- if (flip_triangles) {
- if (state.cull.front_face == GL_CCW)
- state.cull.front_face = GL_CW;
- else if (state.cull.front_face == GL_CW)
- state.cull.front_face = GL_CCW;
- }
}
+
+ state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
@@ -1162,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
}
+void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ current_state.rasterizer_discard = regs.rasterize_enable == 0;
+}
+
void RasterizerOpenGL::SyncColorMask() {
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.color_mask) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bd6fe5c3a..6a27cf497 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -83,42 +83,41 @@ private:
bool using_depth_fb, bool using_stencil_fb);
/// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader);
+ void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
/// Configures the current constbuffers to use for the kernel invocation.
void SetupComputeConstBuffers(const Shader& kernel);
/// Configures a constant buffer.
- void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader);
+ void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
/// Configures the current global memory entries to use for the kernel invocation.
void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer.
- void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
+ void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Syncs all the state, shaders, render targets and textures setting before a draw call.
void DrawPrelude();
- /// Configures the current textures to use for the draw command. Returns shaders texture buffer
- /// usage.
- TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, BaseBindings base_bindings);
+ /// Configures the current textures to use for the draw command.
+ void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
- /// Configures the textures used in a compute shader. Returns texture buffer usage.
- TextureBufferUsage SetupComputeTextures(const Shader& kernel);
+ /// Configures the textures used in a compute shader.
+ void SetupComputeTextures(const Shader& kernel);
- /// Configures a texture. Returns true when the texture is a texture buffer.
- bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ /// Configures a texture.
+ void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry);
+ /// Configures images in a graphics shader.
+ void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+
/// Configures images in a compute shader.
void SetupComputeImages(const Shader& shader);
@@ -169,6 +168,9 @@ private:
/// Syncs the point state to match the guest state
void SyncPointState();
+ /// Syncs the rasterizer enable state to match the guest state
+ void SyncRasterizeEnable(OpenGLState& current_state);
+
/// Syncs Color Mask
void SyncColorMask();
@@ -224,8 +226,6 @@ private:
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
-
- OGLFramebuffer clear_framebuffer;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f1b89165d..de742d11c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -8,12 +8,15 @@
#include <thread>
#include <unordered_set>
#include <boost/functional/hash.hpp>
+#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -82,28 +85,26 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr) {
- ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+ ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
- std::fill(program_code.begin(), program_code.end(), 0);
- return program_code;
+ std::fill(code.begin(), code.end(), 0);
+ return code;
});
- memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
- program_code.size() * sizeof(u64));
- program_code.resize(CalculateProgramSize(program_code));
- return program_code;
+ memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
+ code.resize(CalculateProgramSize(code));
+ return code;
}
/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetShaderType(ProgramType program_type) {
- switch (program_type) {
- case ProgramType::VertexA:
- case ProgramType::VertexB:
+constexpr GLenum GetGLShaderType(ShaderType shader_type) {
+ switch (shader_type) {
+ case ShaderType::Vertex:
return GL_VERTEX_SHADER;
- case ProgramType::Geometry:
+ case ShaderType::Geometry:
return GL_GEOMETRY_SHADER;
- case ProgramType::Fragment:
+ case ShaderType::Fragment:
return GL_FRAGMENT_SHADER;
- case ProgramType::Compute:
+ case ShaderType::Compute:
return GL_COMPUTE_SHADER;
default:
return GL_NONE;
@@ -111,52 +112,33 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
}
/// Describes primitive behavior on geometry shaders
-constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
case GL_POINTS:
- return {"points", "Points", 1};
+ return {"points", 1};
case GL_LINES:
case GL_LINE_STRIP:
- return {"lines", "Lines", 2};
+ return {"lines", 2};
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
- return {"lines_adjacency", "LinesAdj", 4};
+ return {"lines_adjacency", 4};
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
- return {"triangles", "Triangles", 3};
+ return {"triangles", 3};
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
- return {"triangles_adjacency", "TrianglesAdj", 6};
+ return {"triangles_adjacency", 6};
default:
- return {"points", "Invalid", 1};
+ return {"points", 1};
}
}
-ProgramType GetProgramType(Maxwell::ShaderProgram program) {
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- return ProgramType::VertexA;
- case Maxwell::ShaderProgram::VertexB:
- return ProgramType::VertexB;
- case Maxwell::ShaderProgram::TesselationControl:
- return ProgramType::TessellationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ProgramType::TessellationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ProgramType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ProgramType::Fragment;
- }
- UNREACHABLE();
- return {};
-}
-
/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
+u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
u64 unique_identifier = boost::hash_value(code);
- if (program_type == ProgramType::VertexA) {
+ if (is_a) {
// VertexA programs include two programs
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
@@ -164,79 +146,74 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
}
/// Creates an unspecialized program from code streams
-std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
+std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
const std::optional<ShaderIR>& ir_b) {
- switch (program_type) {
- case ProgramType::VertexA:
- case ProgramType::VertexB:
+ switch (shader_type) {
+ case ShaderType::Vertex:
return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
- case ProgramType::Geometry:
+ case ShaderType::Geometry:
return GLShader::GenerateGeometryShader(device, ir);
- case ProgramType::Fragment:
+ case ShaderType::Fragment:
return GLShader::GenerateFragmentShader(device, ir);
- case ProgramType::Compute:
+ case ShaderType::Compute:
return GLShader::GenerateComputeShader(device, ir);
default:
- UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
+ UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
return {};
}
}
-constexpr const char* GetProgramTypeName(ProgramType program_type) {
- switch (program_type) {
- case ProgramType::VertexA:
- case ProgramType::VertexB:
+constexpr const char* GetShaderTypeName(ShaderType shader_type) {
+ switch (shader_type) {
+ case ShaderType::Vertex:
return "VS";
- case ProgramType::TessellationControl:
- return "TCS";
- case ProgramType::TessellationEval:
- return "TES";
- case ProgramType::Geometry:
+ case ShaderType::TesselationControl:
+ return "HS";
+ case ShaderType::TesselationEval:
+ return "DS";
+ case ShaderType::Geometry:
return "GS";
- case ProgramType::Fragment:
+ case ShaderType::Fragment:
return "FS";
- case ProgramType::Compute:
+ case ShaderType::Compute:
return "CS";
}
return "UNK";
}
-Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
+constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
switch (program_type) {
- case ProgramType::VertexA:
- case ProgramType::VertexB:
- return Tegra::Engines::ShaderType::Vertex;
- case ProgramType::TessellationControl:
- return Tegra::Engines::ShaderType::TesselationControl;
- case ProgramType::TessellationEval:
- return Tegra::Engines::ShaderType::TesselationEval;
- case ProgramType::Geometry:
- return Tegra::Engines::ShaderType::Geometry;
- case ProgramType::Fragment:
- return Tegra::Engines::ShaderType::Fragment;
- case ProgramType::Compute:
- return Tegra::Engines::ShaderType::Compute;
- }
- UNREACHABLE();
+ case Maxwell::ShaderProgram::VertexA:
+ case Maxwell::ShaderProgram::VertexB:
+ return ShaderType::Vertex;
+ case Maxwell::ShaderProgram::TesselationControl:
+ return ShaderType::TesselationControl;
+ case Maxwell::ShaderProgram::TesselationEval:
+ return ShaderType::TesselationEval;
+ case Maxwell::ShaderProgram::Geometry:
+ return ShaderType::Geometry;
+ case Maxwell::ShaderProgram::Fragment:
+ return ShaderType::Fragment;
+ }
return {};
}
-std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
- return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
+std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
+ return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
-Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
- Core::System& system, ProgramType program_type) {
- if (program_type == ProgramType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
+ ShaderType shader_type) {
+ if (shader_type == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
}
}
-std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
- return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
- GetConstBufferEngineInterface(system, program_type));
+std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
+ return std::make_unique<ConstBufferLocker>(shader_type,
+ GetConstBufferEngineInterface(system, shader_type));
}
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
@@ -253,88 +230,66 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
}
}
-CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
- const ProgramCode& program_code, const ProgramCode& program_code_b,
- const ProgramVariant& variant, ConstBufferLocker& locker,
+CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
+ const ProgramCode& code, const ProgramCode& code_b,
+ ConstBufferLocker& locker, const ProgramVariant& variant,
bool hint_retrievable = false) {
- LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
+ LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
- const bool is_compute = program_type == ProgramType::Compute;
+ const bool is_compute = shader_type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
+ const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
std::optional<ShaderIR> ir_b;
- if (!program_code_b.empty()) {
- ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
+ if (!code_b.empty()) {
+ ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
}
const auto entries = GLShader::GetEntries(ir);
- auto base_bindings{variant.base_bindings};
- const auto primitive_mode{variant.primitive_mode};
- const auto texture_buffer_usage{variant.texture_buffer_usage};
-
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
-#extension GL_ARB_shader_viewport_layer_array : enable
-#extension GL_EXT_shader_image_load_formatted : enable
-#extension GL_NV_gpu_shader5 : enable
-#extension GL_NV_shader_thread_group : enable
-#extension GL_NV_shader_thread_shuffle : enable
)",
- GetShaderId(unique_identifier, program_type));
- if (is_compute) {
- source += "#extension GL_ARB_compute_variable_group_size : require\n";
+ GetShaderId(unique_identifier, shader_type));
+ if (device.HasShaderBallot()) {
+ source += "#extension GL_ARB_shader_ballot : require\n";
}
- source += '\n';
-
- if (!is_compute) {
- source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+ if (device.HasVertexViewportLayer()) {
+ source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
}
-
- for (const auto& cbuf : entries.const_buffers) {
- source +=
- fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
+ if (device.HasImageLoadFormatted()) {
+ source += "#extension GL_EXT_shader_image_load_formatted : require\n";
}
- for (const auto& gmem : entries.global_memory_entries) {
- source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
- gmem.GetCbufOffset(), base_bindings.gmem++);
+ if (device.HasWarpIntrinsics()) {
+ source += "#extension GL_NV_gpu_shader5 : require\n"
+ "#extension GL_NV_shader_thread_group : require\n"
+ "#extension GL_NV_shader_thread_shuffle : require\n";
}
- for (const auto& sampler : entries.samplers) {
- source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
- base_bindings.sampler++);
+
+ if (shader_type == ShaderType::Geometry) {
+ const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
+ source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
+ source += fmt::format("layout ({}) in;\n", glsl_topology);
}
- for (const auto& image : entries.images) {
+ if (shader_type == ShaderType::Compute) {
+ if (variant.local_memory_size > 0) {
+ source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
+ Common::AlignUp(variant.local_memory_size, 4) / 4);
+ }
source +=
- fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
- }
+ fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
+ variant.block_x, variant.block_y, variant.block_z);
- // Transform 1D textures to texture samplers by declaring its preprocessor macros.
- for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
- if (!texture_buffer_usage.test(i)) {
- continue;
+ if (variant.shared_memory_size > 0) {
+ // shared_memory_size is described in number of words
+ source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
}
- source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
- }
- if (texture_buffer_usage.any()) {
- source += '\n';
- }
-
- if (program_type == ProgramType::Geometry) {
- const auto [glsl_topology, debug_name, max_vertices] =
- GetPrimitiveDescription(primitive_mode);
-
- source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
- source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
- }
- if (program_type == ProgramType::Compute) {
- source += "layout (local_size_variable) in;\n";
}
source += '\n';
- source += GenerateGLSL(device, program_type, ir, ir_b);
+ source += GenerateGLSL(device, shader_type, ir, ir_b);
OGLShader shader;
- shader.Create(source.c_str(), GetShaderType(program_type));
+ shader.Create(source.c_str(), GetGLShaderType(shader_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
@@ -357,18 +312,16 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
- GLShader::ShaderEntries entries, ProgramCode program_code,
- ProgramCode program_code_b)
- : RasterizerCacheObject{params.host_ptr}, system{params.system},
- disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
- unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
- program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
+CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
+ GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
+ : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
+ device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
+ shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
if (!params.precompiled_variants) {
return;
}
for (const auto& pair : *params.precompiled_variants) {
- auto locker = MakeLocker(system, program_type);
+ auto locker = MakeLocker(system, shader_type);
const auto& usage = pair->first;
FillLocker(*locker, usage);
@@ -389,92 +342,83 @@ CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_t
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b) {
- params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
- params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
+ Maxwell::ShaderProgram program_type, ProgramCode code,
+ ProgramCode code_b) {
+ const auto shader_type = GetShaderType(program_type);
+ params.disk_cache.SaveRaw(
+ ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
- ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)));
- const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+ ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
- // if (!program_code_b.empty()) {
- // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
+ // if (!code_b.empty()) {
+ // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
- return std::shared_ptr<CachedShader>(
- new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
- std::move(program_code), std::move(program_code_b)));
+ return std::shared_ptr<CachedShader>(new CachedShader(
+ params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
params.disk_cache.SaveRaw(
- ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
+ ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
- ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute);
+ ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
+ params.system.GPU().KeplerCompute());
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
return std::shared_ptr<CachedShader>(new CachedShader(
- params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
+ params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized) {
- return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
+ return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
unspecialized.entries, unspecialized.code,
unspecialized.code_b));
}
-std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
- UpdateVariant();
+GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
+ EnsureValidLockerVariant();
- const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
+ const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
auto& program = entry->second;
- if (is_cache_miss) {
- program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
- variant, *curr_variant->locker);
- disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
-
- LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
+ if (!is_cache_miss) {
+ return program->handle;
}
- auto base_bindings = variant.base_bindings;
- base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
- if (program_type != ProgramType::Compute) {
- base_bindings.cbuf += STAGE_RESERVED_UBOS;
- }
- base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
- base_bindings.sampler += static_cast<u32>(entries.samplers.size());
+ program = BuildShader(device, unique_identifier, shader_type, code, code_b,
+ *curr_locker_variant->locker, variant);
+ disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
- return {program->handle, base_bindings};
+ LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
+ return program->handle;
}
-void CachedShader::UpdateVariant() {
- if (curr_variant && !curr_variant->locker->IsConsistent()) {
- curr_variant = nullptr;
+bool CachedShader::EnsureValidLockerVariant() {
+ const auto previous_variant = curr_locker_variant;
+ if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
+ curr_locker_variant = nullptr;
}
- if (!curr_variant) {
+ if (!curr_locker_variant) {
for (auto& variant : locker_variants) {
if (variant->locker->IsConsistent()) {
- curr_variant = variant.get();
+ curr_locker_variant = variant.get();
}
}
}
- if (!curr_variant) {
+ if (!curr_locker_variant) {
auto& new_variant = locker_variants.emplace_back();
new_variant = std::make_unique<LockerVariant>();
- new_variant->locker = MakeLocker(system, program_type);
- curr_variant = new_variant.get();
+ new_variant->locker = MakeLocker(system, shader_type);
+ curr_locker_variant = new_variant.get();
}
+ return previous_variant == curr_locker_variant;
}
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
const ConstBufferLocker& locker) const {
- ShaderDiskCacheUsage usage;
- usage.unique_identifier = unique_identifier;
- usage.variant = variant;
- usage.keys = locker.GetKeys();
- usage.bound_samplers = locker.GetBoundSamplers();
- usage.bindless_samplers = locker.GetBindlessSamplers();
- return usage;
+ return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
+ locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -533,11 +477,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
if (!shader) {
- auto locker{MakeLocker(system, unspecialized.program_type)};
+ auto locker{MakeLocker(system, unspecialized.type)};
FillLocker(*locker, usage);
- shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
- unspecialized.code, unspecialized.code_b, usage.variant,
- *locker, true);
+
+ shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
+ unspecialized.code, unspecialized.code_b, *locker,
+ usage.variant, true);
}
std::scoped_lock lock{mutex};
@@ -640,7 +585,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
const auto& raw{raws[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
- GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
+ GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - "
@@ -651,9 +596,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
}
const u32 main_offset =
- raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
- const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
+ raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+ ConstBufferLocker locker(raw.GetType());
+ const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (raw.HasProgramA()) {
@@ -662,9 +607,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
UnspecializedShader unspecialized;
unspecialized.entries = GLShader::GetEntries(ir);
- unspecialized.program_type = raw.GetProgramType();
- unspecialized.code = raw.GetProgramCode();
- unspecialized.code_b = raw.GetProgramCodeB();
+ unspecialized.type = raw.GetType();
+ unspecialized.code = raw.GetCode();
+ unspecialized.code_b = raw.GetCodeB();
unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
if (callback) {
@@ -697,7 +642,8 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
}
- const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
+ const auto unique_identifier = GetUniqueIdentifier(
+ GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
@@ -725,7 +671,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
// No kernel found - create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
- const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
+ const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6bd7c9cf1..7b1470db3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -17,6 +17,7 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -47,7 +48,7 @@ using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
struct UnspecializedShader {
GLShader::ShaderEntries entries;
- ProgramType program_type;
+ Tegra::Engines::ShaderType type;
ProgramCode code;
ProgramCode code_b;
};
@@ -77,7 +78,7 @@ public:
}
std::size_t GetSizeInBytes() const override {
- return program_code.size() * sizeof(u64);
+ return code.size() * sizeof(u64);
}
/// Gets the shader entries for the shader
@@ -86,7 +87,7 @@ public:
}
/// Gets the GL program handle for the shader
- std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
+ GLuint GetHandle(const ProgramVariant& variant);
private:
struct LockerVariant {
@@ -94,11 +95,11 @@ private:
std::unordered_map<ProgramVariant, CachedProgram> programs;
};
- explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
+ explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode program_code,
ProgramCode program_code_b);
- void UpdateVariant();
+ bool EnsureValidLockerVariant();
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
const VideoCommon::Shader::ConstBufferLocker& locker) const;
@@ -110,14 +111,14 @@ private:
VAddr cpu_addr{};
u64 unique_identifier{};
- ProgramType program_type{};
+ Tegra::Engines::ShaderType shader_type{};
GLShader::ShaderEntries entries;
- ProgramCode program_code;
- ProgramCode program_code_b;
+ ProgramCode code;
+ ProgramCode code_b;
- LockerVariant* curr_variant = nullptr;
+ LockerVariant* curr_locker_variant = nullptr;
std::vector<std::unique_ptr<LockerVariant>> locker_variants;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 92ee8459e..f9f7a97b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -16,6 +16,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -27,6 +28,7 @@ namespace OpenGL::GLShader {
namespace {
+using Tegra::Engines::ShaderType;
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
@@ -41,11 +43,15 @@ using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
+class ASTDecompiler;
+class ExprDecompiler;
+
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-struct TextureAoffi {};
+struct TextureOffset {};
+struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -223,7 +229,7 @@ private:
Type type{};
};
-constexpr const char* GetTypeString(Type type) {
+const char* GetTypeString(Type type) {
switch (type) {
case Type::Bool:
return "bool";
@@ -243,7 +249,7 @@ constexpr const char* GetTypeString(Type type) {
}
}
-constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
+const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
return "1D";
@@ -331,16 +337,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-constexpr bool IsVertexShader(ProgramType stage) {
- return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
+[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) {
+ return stage == ShaderType::Vertex;
}
-class ASTDecompiler;
-class ExprDecompiler;
-
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
@@ -396,6 +399,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
+ DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
@@ -427,7 +431,7 @@ private:
}
void DeclareGeometry() {
- if (stage != ProgramType::Geometry) {
+ if (stage != ShaderType::Geometry) {
return;
}
@@ -510,10 +514,14 @@ private:
}
void DeclareLocalMemory() {
- // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
- // specialization time.
- const u64 local_memory_size =
- stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
+ if (stage == ShaderType::Compute) {
+ code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
+ code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
+ code.AddLine("#endif");
+ return;
+ }
+
+ const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
@@ -522,13 +530,6 @@ private:
code.AddNewLine();
}
- void DeclareSharedMemory() {
- if (stage != ProgramType::Compute) {
- return;
- }
- code.AddLine("shared uint {}[];", GetSharedMemory());
- }
-
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
const auto flag_code = static_cast<InternalFlag>(flag);
@@ -578,12 +579,12 @@ private:
const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
- if (stage == ProgramType::Geometry) {
+ if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
std::string suffix;
- if (stage == ProgramType::Fragment) {
+ if (stage == ShaderType::Fragment) {
const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
return;
@@ -595,7 +596,7 @@ private:
}
void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
+ if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
DeclareOutputAttribute(ToGenericAttribute(i));
}
@@ -620,9 +621,9 @@ private:
}
void DeclareConstantBuffers() {
- for (const auto& entry : ir.GetConstantBuffers()) {
- const auto [index, size] = entry;
- code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
+ u32 binding = device.GetBaseBindings(stage).uniform_buffer;
+ for (const auto& [index, cbuf] : ir.GetConstantBuffers()) {
+ code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
GetConstBufferBlock(index));
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
code.AddLine("}};");
@@ -631,9 +632,8 @@ private:
}
void DeclareGlobalMemory() {
- for (const auto& gmem : ir.GetGlobalMemory()) {
- const auto& [base, usage] = gmem;
-
+ u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
+ for (const auto& [base, usage] : ir.GetGlobalMemory()) {
// Since we don't know how the shader will use the shader, hint the driver to disable as
// much optimizations as possible
std::string qualifier = "coherent volatile";
@@ -643,8 +643,8 @@ private:
qualifier += " writeonly";
}
- code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
- base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
+ code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
+ GetGlobalMemoryBlock(base));
code.AddLine(" uint {}[];", GetGlobalMemory(base));
code.AddLine("}};");
code.AddNewLine();
@@ -652,15 +652,17 @@ private:
}
void DeclareSamplers() {
- const auto& samplers = ir.GetSamplers();
- for (const auto& sampler : samplers) {
- const std::string name{GetSampler(sampler)};
- const std::string description{"layout (binding = SAMPLER_BINDING_" +
- std::to_string(sampler.GetIndex()) + ") uniform"};
+ u32 binding = device.GetBaseBindings(stage).sampler;
+ for (const auto& sampler : ir.GetSamplers()) {
+ const std::string name = GetSampler(sampler);
+ const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
+
std::string sampler_type = [&]() {
+ if (sampler.IsBuffer()) {
+ return "samplerBuffer";
+ }
switch (sampler.GetType()) {
case Tegra::Shader::TextureType::Texture1D:
- // Special cased, read below.
return "sampler1D";
case Tegra::Shader::TextureType::Texture2D:
return "sampler2D";
@@ -680,21 +682,9 @@ private:
sampler_type += "Shadow";
}
- if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) {
- // 1D textures can be aliased to texture buffers, hide the declarations behind a
- // preprocessor flag and use one or the other from the GPU state. This has to be
- // done because shaders don't have enough information to determine the texture type.
- EmitIfdefIsBuffer(sampler);
- code.AddLine("{} samplerBuffer {};", description, name);
- code.AddLine("#else");
- code.AddLine("{} {} {};", description, sampler_type, name);
- code.AddLine("#endif");
- } else {
- // The other texture types (2D, 3D and cubes) don't have this issue.
- code.AddLine("{} {} {};", description, sampler_type, name);
- }
+ code.AddLine("{} {} {};", description, sampler_type, name);
}
- if (!samplers.empty()) {
+ if (!ir.GetSamplers().empty()) {
code.AddNewLine();
}
}
@@ -717,7 +707,7 @@ private:
constexpr u32 element_stride = 4;
const u32 address{generic_base + index * generic_stride + element * element_stride};
- const bool declared = stage != ProgramType::Fragment ||
+ const bool declared = stage != ShaderType::Fragment ||
header.ps.GetAttributeUse(index) != AttributeUse::Unused;
const std::string value =
declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
@@ -734,8 +724,8 @@ private:
}
void DeclareImages() {
- const auto& images{ir.GetImages()};
- for (const auto& image : images) {
+ u32 binding = device.GetBaseBindings(stage).image;
+ for (const auto& image : ir.GetImages()) {
std::string qualifier = "coherent volatile";
if (image.IsRead() && !image.IsWritten()) {
qualifier += " readonly";
@@ -745,10 +735,10 @@ private:
const char* format = image.IsAtomic() ? "r32ui, " : "";
const char* type_declaration = GetImageTypeDeclaration(image.GetType());
- code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format,
- image.GetIndex(), qualifier, type_declaration, GetImage(image));
+ code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
+ qualifier, type_declaration, GetImage(image));
}
- if (!images.empty()) {
+ if (!ir.GetImages().empty()) {
code.AddNewLine();
}
}
@@ -761,6 +751,9 @@ private:
Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
+ if (const auto amend_index = operation->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+ }
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -809,7 +802,7 @@ private:
}
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
+ UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
return {fmt::format("ReadPhysicalAttribute({})",
@@ -868,18 +861,13 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
return {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
}
if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
- Type::Uint};
+ return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -887,6 +875,9 @@ private:
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ if (const auto amend_index = conditional->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+ }
// It's invalid to call conditional on nested nodes, use an operation instead
code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
++code.scope;
@@ -909,7 +900,7 @@ private:
Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
- if (stage == ProgramType::Geometry && buffer) {
+ if (stage == ShaderType::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
@@ -921,11 +912,11 @@ private:
switch (attribute) {
case Attribute::Index::Position:
switch (stage) {
- case ProgramType::Geometry:
+ case ShaderType::Geometry:
return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
GetSwizzle(element)),
Type::Float};
- case ProgramType::Fragment:
+ case ShaderType::Fragment:
return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
Type::Float};
default:
@@ -959,7 +950,7 @@ private:
return {"0", Type::Int};
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ProgramType::Fragment);
+ ASSERT(stage == ShaderType::Fragment);
switch (element) {
case 3:
return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
@@ -985,7 +976,7 @@ private:
// be found in fragment shaders, so we disable precise there. There are vertex shaders that
// also fail to build but nobody seems to care about those.
// Note: Only bugged drivers will skip precise.
- const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment;
+ const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
std::string temporary = code.GenerateTemporary();
code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
@@ -1092,7 +1083,7 @@ private:
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras) {
+ const std::vector<TextureIR>& extras, bool separate_dc = false) {
constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1105,9 +1096,12 @@ private:
std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
+ } else if (!meta->ptp.empty()) {
+ expr += "Offsets";
}
expr += '(' + GetSampler(meta->sampler) + ", ";
- expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
+ expr += coord_constructors.at(count + (has_array ? 1 : 0) +
+ (has_shadow && !separate_dc ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += Visit(operation[i]).AsFloat();
@@ -1120,15 +1114,26 @@ private:
expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- expr += ", " + Visit(meta->depth_compare).AsFloat();
+ if (separate_dc) {
+ expr += "), " + Visit(meta->depth_compare).AsFloat();
+ } else {
+ expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
+ }
+ } else {
+ expr += ')';
}
- expr += ')';
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureAoffi>(variant)) {
- expr += GenerateTextureAoffi(meta->aoffi);
+ } else if (std::holds_alternative<TextureOffset>(variant)) {
+ if (!meta->aoffi.empty()) {
+ expr += GenerateTextureAoffi(meta->aoffi);
+ } else if (!meta->ptp.empty()) {
+ expr += GenerateTexturePtp(meta->ptp);
+ }
+ } else if (std::holds_alternative<TextureDerivates>(variant)) {
+ expr += GenerateTextureDerivates(meta->derivates);
} else {
UNREACHABLE();
}
@@ -1167,6 +1172,20 @@ private:
return expr;
}
+ std::string ReadTextureOffset(const Node& value) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
+ // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+ // to be constant by the standard).
+ return std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else if (device.HasVariableAoffi()) {
+ // Avoid using variable AOFFI on unsupported devices.
+ return Visit(value).AsInt();
+ } else {
+ // Insert 0 on devices not supporting variable AOFFI.
+ return "0";
+ }
+ }
+
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
if (aoffi.empty()) {
return {};
@@ -1177,18 +1196,7 @@ private:
expr += '(';
for (std::size_t index = 0; index < aoffi.size(); ++index) {
- const auto operand{aoffi.at(index)};
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- expr += Visit(operand).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- expr += '0';
- }
+ expr += ReadTextureOffset(aoffi.at(index));
if (index + 1 < aoffi.size()) {
expr += ", ";
}
@@ -1198,6 +1206,50 @@ private:
return expr;
}
+ std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
+ static constexpr std::size_t num_vectors = 4;
+ ASSERT(ptp.size() == num_vectors * 2);
+
+ std::string expr = ", ivec2[](";
+ for (std::size_t vector = 0; vector < num_vectors; ++vector) {
+ const bool has_next = vector + 1 < num_vectors;
+ expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
+ ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
+ }
+ expr += ')';
+ return expr;
+ }
+
+ std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
+ if (derivates.empty()) {
+ return {};
+ }
+ constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
+ std::string expr = ", ";
+ const std::size_t components = derivates.size() / 2;
+ std::string dx = coord_constructors.at(components - 1);
+ std::string dy = coord_constructors.at(components - 1);
+ dx += '(';
+ dy += '(';
+
+ for (std::size_t index = 0; index < components; ++index) {
+ const auto operand_x{derivates.at(index * 2)};
+ const auto operand_y{derivates.at(index * 2 + 1)};
+ dx += Visit(operand_x).AsFloat();
+ dy += Visit(operand_y).AsFloat();
+
+ if (index + 1 < components) {
+ dx += ", ";
+ dy += ", ";
+ }
+ }
+ dx += ')';
+ dy += ')';
+ expr += dx + ", " + dy;
+
+ return expr;
+ }
+
std::string BuildIntegerCoordinates(Operation operation) {
constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const std::size_t coords_count{operation.GetOperandsCount()};
@@ -1247,17 +1299,12 @@ private:
}
target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- ASSERT(stage == ProgramType::Compute);
- target = {
- fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
- Type::Uint};
+ ASSERT(stage == ShaderType::Compute);
+ target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress()).AsUint();
const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -1379,6 +1426,26 @@ private:
return GenerateUnary(operation, "float", Type::Float, type);
}
+ Expression FSwizzleAdd(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsFloat();
+ const std::string op_b = VisitOperand(operation, 1).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {fmt::format("{} + {}", op_a, op_b), Type::Float};
+ }
+
+ const std::string instr_mask = VisitOperand(operation, 2).AsUint();
+ const std::string mask = code.GenerateTemporary();
+ code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
+ instr_mask);
+
+ const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
+ const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
+ return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
+ Type::Float};
+ }
+
Expression ICastFloat(Operation operation) {
return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
@@ -1452,6 +1519,11 @@ private:
return GenerateUnary(operation, "bitCount", type, type);
}
+ template <Type type>
+ Expression BitMSB(Operation operation) {
+ return GenerateUnary(operation, "findMSB", type, type);
+ }
+
Expression HNegate(Operation operation) {
const auto GetNegate = [&](std::size_t index) {
return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
@@ -1471,7 +1543,8 @@ private:
}
Expression HCastFloat(Operation operation) {
- return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat};
+ return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
+ Type::HalfFloat};
}
Expression HUnpack(Operation operation) {
@@ -1645,7 +1718,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
- operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+ operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1657,7 +1730,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
- operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+ operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1665,13 +1738,18 @@ private:
}
Expression TextureGather(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return {GenerateTexture(operation, "Gather",
- {TextureArgument{type, meta->component}, TextureAoffi{}}) +
- GetSwizzle(meta->element),
+ const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
+ const bool separate_dc = meta.sampler.IsShadow();
+
+ std::vector<TextureIR> ir;
+ if (meta.sampler.IsShadow()) {
+ ir = {TextureOffset{}};
+ } else {
+ ir = {TextureOffset{}, TextureArgument{type, meta.component}};
+ }
+ return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
Type::Float};
}
@@ -1729,27 +1807,23 @@ private:
expr += ", ";
}
- // Store a copy of the expression without the lod to be used with texture buffers
- std::string expr_buffer = expr;
-
- if (meta->lod) {
+ if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
expr += Visit(meta->lod).AsInt();
}
expr += ')';
expr += GetSwizzle(meta->element);
- expr_buffer += ')';
- expr_buffer += GetSwizzle(meta->element);
+ return {std::move(expr), Type::Float};
+ }
- const std::string tmp{code.GenerateTemporary()};
- EmitIfdefIsBuffer(meta->sampler);
- code.AddLine("float {} = {};", tmp, expr_buffer);
- code.AddLine("#else");
- code.AddLine("float {} = {};", tmp, expr);
- code.AddLine("#endif");
+ Expression TextureGradient(Operation operation) {
+ const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+ ASSERT(meta);
- return {tmp, Type::Float};
+ std::string expr =
+ GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
+ return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
}
Expression ImageLoad(Operation operation) {
@@ -1817,7 +1891,7 @@ private:
}
void PreExit() {
- if (stage != ProgramType::Fragment) {
+ if (stage != ShaderType::Fragment) {
return;
}
const auto& used_registers = ir.GetRegisters();
@@ -1870,27 +1944,25 @@ private:
}
Expression EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ProgramType::Geometry,
+ ASSERT_MSG(stage == ShaderType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
-
- // If a geometry shader is attached, it will always flip (it's the last stage before
- // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
Expression EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ProgramType::Geometry,
+ ASSERT_MSG(stage == ShaderType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
-
code.AddLine("EndPrimitive();");
return {};
}
+ Expression InvocationId(Operation operation) {
+ return {"gl_InvocationID", Type::Int};
+ }
+
Expression YNegate(Operation operation) {
- // Config pack's third value is Y_NEGATE's state.
- return {"config_pack[2]", Type::Uint};
+ return {"y_direction", Type::Float};
}
template <u32 element>
@@ -1942,34 +2014,29 @@ private:
return Vote(operation, "allThreadsEqualNV");
}
- template <const std::string_view& func>
- Expression Shuffle(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsFloat();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
- // On a "single-thread" device we are either on the same thread or out of bounds. Both
- // cases return the passed value.
- return {value, Type::Float};
+ Expression ThreadId(Operation operation) {
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {"0U", Type::Uint};
}
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- const std::string width = VisitOperand(operation, 2).AsUint();
- return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ return {"gl_SubGroupInvocationARB", Type::Uint};
}
- template <const std::string_view& func>
- Expression InRangeShuffle(Operation operation) {
- const std::string index = VisitOperand(operation, 0).AsUint();
- const std::string width = VisitOperand(operation, 1).AsUint();
- if (!device.HasWarpIntrinsics()) {
- // On a "single-thread" device we are only in bounds when the requested index is 0.
- return {fmt::format("({} == 0U)", index), Type::Bool};
+ Expression ShuffleIndexed(Operation operation) {
+ std::string value = VisitOperand(operation, 0).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {std::move(value), Type::Float};
}
- const std::string in_range = code.GenerateTemporary();
- code.AddLine("bool {};", in_range);
- code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
- return {in_range, Type::Bool};
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
+ }
+
+ Expression MemoryBarrierGL(Operation) {
+ code.AddLine("memoryBarrier();");
+ return {};
}
struct Func final {
@@ -1981,11 +2048,6 @@ private:
static constexpr std::string_view Or = "Or";
static constexpr std::string_view Xor = "Xor";
static constexpr std::string_view Exchange = "Exchange";
-
- static constexpr std::string_view ShuffleIndexed = "shuffleNV";
- static constexpr std::string_view ShuffleUp = "shuffleUpNV";
- static constexpr std::string_view ShuffleDown = "shuffleDownNV";
- static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
};
static constexpr std::array operation_decompilers = {
@@ -2016,6 +2078,7 @@ private:
&GLSLDecompiler::FTrunc,
&GLSLDecompiler::FCastInteger<Type::Int>,
&GLSLDecompiler::FCastInteger<Type::Uint>,
+ &GLSLDecompiler::FSwizzleAdd,
&GLSLDecompiler::Add<Type::Int>,
&GLSLDecompiler::Mul<Type::Int>,
@@ -2037,6 +2100,7 @@ private:
&GLSLDecompiler::BitfieldInsert<Type::Int>,
&GLSLDecompiler::BitfieldExtract<Type::Int>,
&GLSLDecompiler::BitCount<Type::Int>,
+ &GLSLDecompiler::BitMSB<Type::Int>,
&GLSLDecompiler::Add<Type::Uint>,
&GLSLDecompiler::Mul<Type::Uint>,
@@ -2055,6 +2119,7 @@ private:
&GLSLDecompiler::BitfieldInsert<Type::Uint>,
&GLSLDecompiler::BitfieldExtract<Type::Uint>,
&GLSLDecompiler::BitCount<Type::Uint>,
+ &GLSLDecompiler::BitMSB<Type::Uint>,
&GLSLDecompiler::Add<Type::HalfFloat>,
&GLSLDecompiler::Mul<Type::HalfFloat>,
@@ -2118,6 +2183,7 @@ private:
&GLSLDecompiler::TextureQueryDimensions,
&GLSLDecompiler::TextureQueryLod,
&GLSLDecompiler::TexelFetch,
+ &GLSLDecompiler::TextureGradient,
&GLSLDecompiler::ImageLoad,
&GLSLDecompiler::ImageStore,
@@ -2138,6 +2204,7 @@ private:
&GLSLDecompiler::EmitVertex,
&GLSLDecompiler::EndPrimitive,
+ &GLSLDecompiler::InvocationId,
&GLSLDecompiler::YNegate,
&GLSLDecompiler::LocalInvocationId<0>,
&GLSLDecompiler::LocalInvocationId<1>,
@@ -2151,15 +2218,10 @@ private:
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
- &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
- &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
- &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
+ &GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ShuffleIndexed,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
+ &GLSLDecompiler::MemoryBarrierGL,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2200,10 +2262,6 @@ private:
return "lmem_" + suffix;
}
- std::string GetSharedMemory() const {
- return fmt::format("smem_{}", suffix);
- }
-
std::string GetInternalFlag(InternalFlag flag) const {
constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
"overflow_flag"};
@@ -2221,10 +2279,6 @@ private:
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
}
- void EmitIfdefIsBuffer(const Sampler& sampler) {
- code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
- }
-
std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
}
@@ -2243,7 +2297,7 @@ private:
const Device& device;
const ShaderIR& ir;
- const ProgramType stage;
+ const ShaderType stage;
const std::string suffix;
const Header header;
@@ -2492,10 +2546,13 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
)";
}
-std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
+std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index b1e75e6cc..7876f48d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -10,6 +10,7 @@
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -17,20 +18,8 @@ class ShaderIR;
}
namespace OpenGL {
-
class Device;
-
-enum class ProgramType : u32 {
- VertexA = 0,
- VertexB = 1,
- TessellationControl = 2,
- TessellationEval = 3,
- Geometry = 4,
- Fragment = 5,
- Compute = 6
-};
-
-} // namespace OpenGL
+}
namespace OpenGL::GLShader {
@@ -94,6 +83,6 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
std::string GetCommonDeclarations();
std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- ProgramType stage, const std::string& suffix);
+ Tegra::Engines::ShaderType stage, const std::string& suffix);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 184a565e6..cf874a09a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cstring>
+
#include <fmt/format.h>
#include "common/assert.h"
@@ -12,50 +13,50 @@
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "common/zstd_compression.h"
-
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
-
+#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
namespace OpenGL {
+using Tegra::Engines::ShaderType;
using VideoCommon::Shader::BindlessSamplerMap;
using VideoCommon::Shader::BoundSamplerMap;
using VideoCommon::Shader::KeyMap;
namespace {
+using ShaderCacheVersionHash = std::array<u8, 64>;
+
+enum class TransferableEntryKind : u32 {
+ Raw,
+ Usage,
+};
+
struct ConstBufferKey {
- u32 cbuf;
- u32 offset;
- u32 value;
+ u32 cbuf{};
+ u32 offset{};
+ u32 value{};
};
struct BoundSamplerKey {
- u32 offset;
- Tegra::Engines::SamplerDescriptor sampler;
+ u32 offset{};
+ Tegra::Engines::SamplerDescriptor sampler{};
};
struct BindlessSamplerKey {
- u32 cbuf;
- u32 offset;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-enum class TransferableEntryKind : u32 {
- Raw,
- Usage,
+ u32 cbuf{};
+ u32 offset{};
+ Tegra::Engines::SamplerDescriptor sampler{};
};
-constexpr u32 NativeVersion = 5;
+constexpr u32 NativeVersion = 11;
// Making sure sizes doesn't change by accident
-static_assert(sizeof(BaseBindings) == 16);
+static_assert(sizeof(ProgramVariant) == 20);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -66,10 +67,10 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // Anonymous namespace
-ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
- ProgramCode program_code, ProgramCode program_code_b)
- : unique_identifier{unique_identifier}, program_type{program_type},
- program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
+ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
+ ProgramCode code_b)
+ : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
+ code_b)} {}
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -77,42 +78,39 @@ ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
- file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
+ file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
- u32 program_code_size{};
- u32 program_code_size_b{};
- if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
+ u32 code_size{};
+ u32 code_size_b{};
+ if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
+ file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
- program_code.resize(program_code_size);
- program_code_b.resize(program_code_size_b);
+ code.resize(code_size);
+ code_b.resize(code_size_b);
- if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
+ if (file.ReadArray(code.data(), code_size) != code_size)
return false;
- if (HasProgramA() &&
- file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+ if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
return true;
}
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
- if (file.WriteObject(unique_identifier) != 1 ||
- file.WriteObject(static_cast<u32>(program_type)) != 1 ||
- file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
- file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
+ if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
+ file.WriteObject(static_cast<u32>(code.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
- if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
+ if (file.WriteArray(code.data(), code.size()) != code.size())
return false;
- if (HasProgramA() &&
- file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
+ if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
return true;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index db23ada93..69a2fbdda 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -4,7 +4,6 @@
#pragma once
-#include <bitset>
#include <optional>
#include <string>
#include <tuple>
@@ -19,6 +18,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
+#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/const_buffer_locker.h"
@@ -37,42 +37,42 @@ struct ShaderDiskCacheDump;
using ProgramCode = std::vector<u64>;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
-using TextureBufferUsage = std::bitset<64>;
-
-/// Allocated bindings used by an OpenGL shader program
-struct BaseBindings {
- u32 cbuf{};
- u32 gmem{};
- u32 sampler{};
- u32 image{};
-
- bool operator==(const BaseBindings& rhs) const {
- return std::tie(cbuf, gmem, sampler, image) ==
- std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
- }
- bool operator!=(const BaseBindings& rhs) const {
- return !operator==(rhs);
- }
-};
-static_assert(std::is_trivially_copyable_v<BaseBindings>);
+/// Describes the different variants a program can be compiled with.
+struct ProgramVariant final {
+ ProgramVariant() = default;
+
+ /// Graphics constructor.
+ explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
+ : primitive_mode{primitive_mode} {}
+
+ /// Compute constructor.
+ explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
+ u32 local_memory_size) noexcept
+ : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
+ shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
-/// Describes the different variants a single program can be compiled.
-struct ProgramVariant {
- BaseBindings base_bindings;
+ // Graphics specific parameters.
GLenum primitive_mode{};
- TextureBufferUsage texture_buffer_usage{};
- bool operator==(const ProgramVariant& rhs) const {
- return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
- std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
+ // Compute specific parameters.
+ u32 block_x{};
+ u16 block_y{};
+ u16 block_z{};
+ u32 shared_memory_size{};
+ u32 local_memory_size{};
+
+ bool operator==(const ProgramVariant& rhs) const noexcept {
+ return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
+ local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
+ rhs.block_z, rhs.shared_memory_size,
+ rhs.local_memory_size);
}
- bool operator!=(const ProgramVariant& rhs) const {
+ bool operator!=(const ProgramVariant& rhs) const noexcept {
return !operator==(rhs);
}
};
-
static_assert(std::is_trivially_copyable_v<ProgramVariant>);
/// Describes how a shader is used.
@@ -99,21 +99,14 @@ struct ShaderDiskCacheUsage {
namespace std {
template <>
-struct hash<OpenGL::BaseBindings> {
- std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
- return static_cast<std::size_t>(bindings.cbuf) ^
- (static_cast<std::size_t>(bindings.gmem) << 8) ^
- (static_cast<std::size_t>(bindings.sampler) << 16) ^
- (static_cast<std::size_t>(bindings.image) << 24);
- }
-};
-
-template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
- return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
- std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
- (static_cast<std::size_t>(variant.primitive_mode) << 6);
+ return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
+ static_cast<std::size_t>(variant.block_x) ^
+ (static_cast<std::size_t>(variant.block_y) << 32) ^
+ (static_cast<std::size_t>(variant.block_z) << 48) ^
+ (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
+ (static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};
@@ -121,7 +114,7 @@ template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
- std::hash<OpenGL::ProgramVariant>()(usage.variant);
+ std::hash<OpenGL::ProgramVariant>{}(usage.variant);
}
};
@@ -132,8 +125,8 @@ namespace OpenGL {
/// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
- explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
- ProgramCode program_code, ProgramCode program_code_b = {});
+ explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
+ ProgramCode code, ProgramCode code_b = {});
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
@@ -146,27 +139,26 @@ public:
}
bool HasProgramA() const {
- return program_type == ProgramType::VertexA;
+ return !code.empty() && !code_b.empty();
}
- ProgramType GetProgramType() const {
- return program_type;
+ Tegra::Engines::ShaderType GetType() const {
+ return type;
}
- const ProgramCode& GetProgramCode() const {
- return program_code;
+ const ProgramCode& GetCode() const {
+ return code;
}
- const ProgramCode& GetProgramCodeB() const {
- return program_code_b;
+ const ProgramCode& GetCodeB() const {
+ return code_b;
}
private:
u64 unique_identifier{};
- ProgramType program_type{};
-
- ProgramCode program_code;
- ProgramCode program_code_b;
+ Tegra::Engines::ShaderType type{};
+ ProgramCode code;
+ ProgramCode code_b;
};
/// Contains an OpenGL dumped binary program
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 0e22eede9..34946fb47 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,8 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string>
+
#include <fmt/format.h>
+
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h"
@@ -11,6 +16,7 @@
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
+using Tegra::Engines::ShaderType;
using VideoCommon::Shader::CompileDepth;
using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
@@ -18,53 +24,40 @@ using VideoCommon::Shader::ShaderIR;
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
std::string out = GetCommonDeclarations();
- out += R"(
-layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
-
-)";
- const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
- out += Decompile(device, ir, stage, "vertex");
+ out += fmt::format(R"(
+layout (std140, binding = {}) uniform vs_config {{
+ float y_direction;
+}};
+
+)",
+ EmulationUniformBlockBinding);
+ out += Decompile(device, ir, ShaderType::Vertex, "vertex");
if (ir_b) {
- out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
+ out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
}
out += R"(
void main() {
+ gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
execute_vertex();
)";
-
if (ir_b) {
out += " execute_vertex_b();";
}
-
- out += R"(
-
- // Set Position Y direction
- gl_Position.y *= utof(config_pack[2]);
- // Check if the flip stage is VertexB
- // Config pack's second value is flip_stage
- if (config_pack[1] == 1) {
- // Viewport can be flipped, which is unsupported by glViewport
- gl_Position.xy *= viewport_flip.xy;
- }
-}
-)";
+ out += "}\n";
return out;
}
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
- out += R"(
-layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
+ out += fmt::format(R"(
+layout (std140, binding = {}) uniform gs_config {{
+ float y_direction;
+}};
-)";
- out += Decompile(device, ir, ProgramType::Geometry, "geometry");
+)",
+ EmulationUniformBlockBinding);
+ out += Decompile(device, ir, ShaderType::Geometry, "geometry");
out += R"(
void main() {
@@ -76,7 +69,7 @@ void main() {
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
- out += R"(
+ out += fmt::format(R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2;
@@ -86,13 +79,13 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
-layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
+layout (std140, binding = {}) uniform fs_config {{
+ float y_direction;
+}};
-)";
- out += Decompile(device, ir, ProgramType::Fragment, "fragment");
+)",
+ EmulationUniformBlockBinding);
+ out += Decompile(device, ir, ShaderType::Fragment, "fragment");
out += R"(
void main() {
@@ -104,7 +97,7 @@ void main() {
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
- out += Decompile(device, ir, ProgramType::Compute, "compute");
+ out += Decompile(device, ir, ShaderType::Compute, "compute");
out += R"(
void main() {
execute_compute();
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index b05f90f20..75d3fac04 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() {
old_state = current_state;
}
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
- const auto& state = maxwell.state;
-
- // TODO(bunnei): Support more than one viewport
- viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
- viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
-
- instance_id = state.current_instance;
-
- // Assign in which stage the position has to be flipped
- // (the last stage before the fragment shader).
- constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
- if (maxwell.regs.shader_config[geometry_index].enable) {
- flip_stage = geometry_index;
- } else {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
- }
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
+ y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 6961e702a..478c165ce 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -18,17 +18,12 @@ namespace OpenGL::GLShader {
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
-struct MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
-
- alignas(16) GLvec4 viewport_flip;
- struct alignas(16) {
- GLuint instance_id;
- GLuint flip_stage;
- GLfloat y_direction;
- };
+struct alignas(16) MaxwellUniformData {
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
+
+ GLfloat y_direction;
};
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
@@ -55,6 +50,10 @@ public:
current_state.geometry_shader = 0;
}
+ void UseTrivialFragmentShader() {
+ current_state.fragment_shader = 0;
+ }
+
private:
struct PipelineState {
bool operator==(const PipelineState& rhs) const {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f25148362..df2e2395a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -182,6 +182,10 @@ void OpenGLState::ApplyCulling() {
}
}
+void OpenGLState::ApplyRasterizerDiscard() {
+ Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
+}
+
void OpenGLState::ApplyColorMask() {
if (!dirty.color_mask) {
return;
@@ -410,15 +414,32 @@ void OpenGLState::ApplyAlphaTest() {
}
}
+void OpenGLState::ApplyClipControl() {
+ if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
+ std::tie(clip_control.origin, clip_control.depth_mode))) {
+ glClipControl(clip_control.origin, clip_control.depth_mode);
+ }
+}
+
void OpenGLState::ApplyTextures() {
- if (const auto update = UpdateArray(cur_state.textures, textures)) {
- glBindTextures(update->first, update->second, textures.data() + update->first);
+ const std::size_t size = std::size(textures);
+ for (std::size_t i = 0; i < size; ++i) {
+ if (UpdateValue(cur_state.textures[i], textures[i])) {
+ // BindTextureUnit doesn't support binding null textures, skip those binds.
+ // TODO(Rodrigo): Stop using null textures
+ if (textures[i] != 0) {
+ glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
+ }
+ }
}
}
void OpenGLState::ApplySamplers() {
- if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
- glBindSamplers(update->first, update->second, samplers.data() + update->first);
+ const std::size_t size = std::size(samplers);
+ for (std::size_t i = 0; i < size; ++i) {
+ if (UpdateValue(cur_state.samplers[i], samplers[i])) {
+ glBindSampler(static_cast<GLuint>(i), samplers[i]);
+ }
}
}
@@ -438,6 +459,7 @@ void OpenGLState::Apply() {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
+ ApplyRasterizerDiscard();
ApplyColorMask();
ApplyDepthClamp();
ApplyViewport();
@@ -453,6 +475,7 @@ void OpenGLState::Apply() {
ApplyImages();
ApplyPolygonOffset();
ApplyAlphaTest();
+ ApplyClipControl();
}
void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index cca25206b..fb180f302 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -48,6 +48,8 @@ public:
GLuint index = 0;
} primitive_restart; // GL_PRIMITIVE_RESTART
+ bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
+
struct ColorMask {
GLboolean red_enabled = GL_TRUE;
GLboolean green_enabled = GL_TRUE;
@@ -56,6 +58,7 @@ public:
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
+
struct {
bool test_enabled = false; // GL_STENCIL_TEST
struct {
@@ -96,9 +99,11 @@ public:
GLenum operation = GL_COPY;
} logic_op;
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
+ static constexpr std::size_t NumSamplers = 32 * 5;
+ static constexpr std::size_t NumImages = 8 * 5;
+ std::array<GLuint, NumSamplers> textures = {};
+ std::array<GLuint, NumSamplers> samplers = {};
+ std::array<GLuint, NumImages> images = {};
struct {
GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
@@ -146,6 +151,11 @@ public:
std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
+ struct {
+ GLenum origin = GL_LOWER_LEFT;
+ GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
+ } clip_control;
+
OpenGLState();
/// Get the currently active OpenGL state
@@ -167,6 +177,7 @@ public:
void ApplyMultisample();
void ApplySRgb();
void ApplyCulling();
+ void ApplyRasterizerDiscard();
void ApplyColorMask();
void ApplyDepth();
void ApplyPrimitiveRestart();
@@ -182,6 +193,7 @@ public:
void ApplyDepthClamp();
void ApplyPolygonOffset();
void ApplyAlphaTest();
+ void ApplyClipControl();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 55b3e58b2..b790b0ef4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -23,7 +23,6 @@ namespace OpenGL {
using Tegra::Texture::SwizzleSource;
using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::ComponentType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::SurfaceTarget;
@@ -40,114 +39,95 @@ struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
- ComponentType component_type;
bool compressed;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
- {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
- false}, // A2B10G10R10U
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
- false}, // R11FG11FB10F
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT1
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT23
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT45
- {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
- {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXN2UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // BC7U
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
- true}, // BC6H_UF16
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
- true}, // BC6H_SF16
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
- {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
- {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
- {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
- {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
- {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
- {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
- {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
- {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
- {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
- false}, // RGBA8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
- {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
- {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBX16F
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
+ {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+ {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
+ {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
+ {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F
+ {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F
+ {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F
+ {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U
+ {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I
+ {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S
+ {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
+ {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
+ {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
// Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT1_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT23_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT45_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // BC7U_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, ComponentType::Float, false}, // E5B9G9R9F
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
// Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
- false}, // Z16
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
// DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // Z24S8
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // S8Z24
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
- ComponentType::Float, false}, // Z32FS8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
}};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
+const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
return format;
@@ -249,7 +229,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
- const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
+ const auto& tuple{GetFormatTuple(params.pixel_format)};
internal_format = tuple.internal_format;
format = tuple.format;
type = tuple.type;
@@ -451,8 +431,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
texture_view.Create();
const GLuint handle{texture_view.handle};
- const FormatTuple& tuple{
- GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
+ const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
@@ -509,6 +488,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
+ state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
state.AllDirty();
state.Apply();
@@ -562,8 +542,8 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
const auto& dst_params = dst_surface->GetSurfaceParams();
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
- const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
- const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+ const auto source_format = GetFormatTuple(src_params.pixel_format);
+ const auto dest_format = GetFormatTuple(dst_params.pixel_format);
const std::size_t source_size = src_surface->GetHostSizeInBytes();
const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 9ed738171..ea4f35663 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_POINTS;
case Maxwell::PrimitiveTopology::Lines:
return GL_LINES;
+ case Maxwell::PrimitiveTopology::LineLoop:
+ return GL_LINE_LOOP;
case Maxwell::PrimitiveTopology::LineStrip:
return GL_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::Quads:
return GL_QUADS;
- default:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
- UNREACHABLE();
- return {};
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ return GL_QUAD_STRIP;
+ case Maxwell::PrimitiveTopology::Polygon:
+ return GL_POLYGON;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ return GL_LINES_ADJACENCY;
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ return GL_LINE_STRIP_ADJACENCY;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ return GL_TRIANGLES_ADJACENCY;
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ return GL_TRIANGLE_STRIP_ADJACENCY;
+ case Maxwell::PrimitiveTopology::Patches:
+ return GL_PATCHES;
}
+ UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+ return GL_POINTS;
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 4bbd17b12..bba16afaf 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,19 +24,21 @@
namespace OpenGL {
-static const char vertex_shader[] = R"(
-#version 150 core
+namespace {
-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
+constexpr char vertex_shader[] = R"(
+#version 430 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
// This is a truncated 3x3 matrix for 2D transformations:
// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
// The third column performs translation.
// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
// implicitly be [0, 0, 1]
-uniform mat3x2 modelview_matrix;
+layout (location = 0) uniform mat3x2 modelview_matrix;
void main() {
// Multiply input position by the rotscale part of the matrix and then manually translate by
@@ -47,34 +49,29 @@ void main() {
}
)";
-static const char fragment_shader[] = R"(
-#version 150 core
+constexpr char fragment_shader[] = R"(
+#version 430 core
-in vec2 frag_tex_coord;
-out vec4 color;
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
-uniform sampler2D color_texture;
+layout (binding = 0) uniform sampler2D color_texture;
void main() {
- // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
- // support more framebuffer pixel formats.
color = texture(color_texture, frag_tex_coord);
}
)";
-/**
- * Vertex structure that the drawn screen rectangles are composed of.
- */
+constexpr GLint PositionLocation = 0;
+constexpr GLint TexCoordLocation = 1;
+constexpr GLint ModelViewMatrixLocation = 0;
+
struct ScreenRectVertex {
- ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
- position[0] = x;
- position[1] = y;
- tex_coord[0] = u;
- tex_coord[1] = v;
- }
+ constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
+ : position{{x, y}}, tex_coord{{u, v}} {}
- GLfloat position[2];
- GLfloat tex_coord[2];
+ std::array<GLfloat, 2> position;
+ std::array<GLfloat, 2> tex_coord;
};
/**
@@ -84,18 +81,82 @@ struct ScreenRectVertex {
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
-static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
+std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
- matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
- matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
+ matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
+ matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
+const char* GetSource(GLenum source) {
+ switch (source) {
+ case GL_DEBUG_SOURCE_API:
+ return "API";
+ case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
+ return "WINDOW_SYSTEM";
+ case GL_DEBUG_SOURCE_SHADER_COMPILER:
+ return "SHADER_COMPILER";
+ case GL_DEBUG_SOURCE_THIRD_PARTY:
+ return "THIRD_PARTY";
+ case GL_DEBUG_SOURCE_APPLICATION:
+ return "APPLICATION";
+ case GL_DEBUG_SOURCE_OTHER:
+ return "OTHER";
+ default:
+ UNREACHABLE();
+ return "Unknown source";
+ }
+}
+
+const char* GetType(GLenum type) {
+ switch (type) {
+ case GL_DEBUG_TYPE_ERROR:
+ return "ERROR";
+ case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
+ return "DEPRECATED_BEHAVIOR";
+ case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
+ return "UNDEFINED_BEHAVIOR";
+ case GL_DEBUG_TYPE_PORTABILITY:
+ return "PORTABILITY";
+ case GL_DEBUG_TYPE_PERFORMANCE:
+ return "PERFORMANCE";
+ case GL_DEBUG_TYPE_OTHER:
+ return "OTHER";
+ case GL_DEBUG_TYPE_MARKER:
+ return "MARKER";
+ default:
+ UNREACHABLE();
+ return "Unknown type";
+ }
+}
+
+void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
+ const GLchar* message, const void* user_param) {
+ const char format[] = "{} {} {}: {}";
+ const char* const str_source = GetSource(source);
+ const char* const str_type = GetType(type);
+
+ switch (severity) {
+ case GL_DEBUG_SEVERITY_HIGH:
+ LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ case GL_DEBUG_SEVERITY_MEDIUM:
+ LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ case GL_DEBUG_SEVERITY_NOTIFICATION:
+ case GL_DEBUG_SEVERITY_LOW:
+ LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ }
+}
+
+} // Anonymous namespace
+
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
@@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
prev_state.Apply();
}
-/**
- * Loads framebuffer from emulated memory into the active OpenGL texture.
- */
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
@@ -158,7 +216,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
+ u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
@@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
-/**
- * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
- * be 1x1 but will stretch across whatever it's rendered on.
- */
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
-/**
- * Initializes the OpenGL state and creates persistent objects.
- */
void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
@@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() {
state.draw.shader_program = shader.handle;
state.AllDirty();
state.Apply();
- uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
- uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
- attrib_position = glGetAttribLocation(shader.handle, "vert_position");
- attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
- glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
+ glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
- glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
+ glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
- glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
- glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
- glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
- glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
+ glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
+ glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
+ glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
+ glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
sizeof(ScreenRectVertex));
@@ -323,24 +370,26 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width;
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.texture.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height;
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.texture.height);
}
- std::array<ScreenRectVertex, 4> vertices = {{
+ const std::array vertices = {
ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
- }};
+ };
state.textures[0] = screen_info.display_texture;
state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
- glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
+ glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
@@ -349,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
state.Apply();
}
-/**
- * Draws the emulated screens to the emulator window.
- */
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
@@ -365,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
// Set projection matrix
- std::array<GLfloat, 3 * 2> ortho_matrix =
- MakeOrthographicMatrix((float)layout.width, (float)layout.height);
- glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
-
- // Bind texture in Texture Unit 0
- glActiveTexture(GL_TEXTURE0);
- glUniform1i(uniform_color_texture, 0);
+ const std::array ortho_matrix =
+ MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+ glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
- DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
- (float)screen.GetWidth(), (float)screen.GetHeight());
+ DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
+ static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
+ static_cast<float>(screen.GetHeight()));
m_current_frame++;
}
-/// Updates the framerate
void RendererOpenGL::UpdateFramerate() {}
void RendererOpenGL::CaptureScreenshot() {
@@ -416,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() {
renderer_settings.screenshot_requested = false;
}
-static const char* GetSource(GLenum source) {
-#define RET(s) \
- case GL_DEBUG_SOURCE_##s: \
- return #s
- switch (source) {
- RET(API);
- RET(WINDOW_SYSTEM);
- RET(SHADER_COMPILER);
- RET(THIRD_PARTY);
- RET(APPLICATION);
- RET(OTHER);
- default:
- UNREACHABLE();
- return "Unknown source";
- }
-#undef RET
-}
-
-static const char* GetType(GLenum type) {
-#define RET(t) \
- case GL_DEBUG_TYPE_##t: \
- return #t
- switch (type) {
- RET(ERROR);
- RET(DEPRECATED_BEHAVIOR);
- RET(UNDEFINED_BEHAVIOR);
- RET(PORTABILITY);
- RET(PERFORMANCE);
- RET(OTHER);
- RET(MARKER);
- default:
- UNREACHABLE();
- return "Unknown type";
- }
-#undef RET
-}
-
-static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
- GLsizei length, const GLchar* message, const void* user_param) {
- const char format[] = "{} {} {}: {}";
- const char* const str_source = GetSource(source);
- const char* const str_type = GetType(type);
-
- switch (severity) {
- case GL_DEBUG_SEVERITY_HIGH:
- LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- case GL_DEBUG_SEVERITY_MEDIUM:
- LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- case GL_DEBUG_SEVERITY_NOTIFICATION:
- case GL_DEBUG_SEVERITY_LOW:
- LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- }
-}
-
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
@@ -493,7 +478,6 @@ bool RendererOpenGL::Init() {
return true;
}
-/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cf26628ca..b56328a7f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -59,21 +59,31 @@ public:
void ShutDown() override;
private:
+ /// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
+
void AddTelemetryFields();
+
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
+
+ /// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
+
void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
+
+ /// Updates the framerate.
void UpdateFramerate();
void CaptureScreenshot();
- // Loads framebuffer from emulated memory into the display information structure
+ /// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
- // Fills active OpenGL texture with the given RGBA color.
+
+ /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
+ /// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
@@ -94,14 +104,6 @@ private:
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
- // Shader uniform location indices
- GLuint uniform_modelview_matrix;
- GLuint uniform_color_texture;
-
- // Shader attribute input indices
- GLuint attrib_position;
- GLuint attrib_tex_coord;
-
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index c504a2c1a..9770dda1c 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -3,7 +3,10 @@
// Refer to the license.txt file included.
#include <string>
+#include <vector>
+
#include <fmt/format.h>
+
#include <glad/glad.h>
#include "common/assert.h"
@@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
-void BindBuffersRangePushBuffer::Setup(GLuint first_) {
- first = first_;
- buffer_pointers.clear();
- offsets.clear();
- sizes.clear();
+void BindBuffersRangePushBuffer::Setup() {
+ entries.clear();
}
-void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
- buffer_pointers.push_back(buffer);
- offsets.push_back(offset);
- sizes.push_back(size);
+void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
+ GLsizeiptr size) {
+ entries.push_back(Entry{binding, buffer, offset, size});
}
void BindBuffersRangePushBuffer::Bind() {
- // Ensure sizes are valid.
- const std::size_t count{buffer_pointers.size()};
- DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
- if (count == 0) {
- return;
+ for (const Entry& entry : entries) {
+ glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
}
-
- // Dereference buffers.
- buffers.resize(count);
- std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
- [](const GLuint* pointer) { return *pointer; });
-
- glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
- sizes.data());
}
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 6c2b45546..d56153fe7 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -43,20 +43,22 @@ public:
explicit BindBuffersRangePushBuffer(GLenum target);
~BindBuffersRangePushBuffer();
- void Setup(GLuint first_);
+ void Setup();
- void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
+ void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
void Bind();
private:
- GLenum target{};
- GLuint first{};
- std::vector<const GLuint*> buffer_pointers;
+ struct Entry {
+ GLuint binding;
+ const GLuint* buffer;
+ GLintptr offset;
+ GLsizeiptr size;
+ };
- std::vector<GLuint> buffers;
- std::vector<GLintptr> offsets;
- std::vector<GLsizeiptr> sizes;
+ GLenum target;
+ std::vector<Entry> entries;
};
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});