aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h36
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp162
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp27
10 files changed, 266 insertions, 233 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 8d9ee81f1..ea4a593af 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -14,28 +14,28 @@
namespace OpenGL {
-CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
+ max_size{max_size} {
buffer.Create();
- // Bind and unbind the buffer so it gets allocated by the driver
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
}
-void CachedGlobalRegion::Reload(u32 size_) {
- constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+CachedGlobalRegion::~CachedGlobalRegion() = default;
+void CachedGlobalRegion::Reload(u32 size_) {
size = size_;
if (size > max_size) {
size = max_size;
- LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+ LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
max_size);
}
+ glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
+}
- // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
+void CachedGlobalRegion::Flush() {
+ LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
+ glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
@@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
return search->second;
}
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
- u8* host_ptr) {
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
+ u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
- region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
+ const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
+ ASSERT(cpu_addr);
+
+ region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
ReserveGlobalRegion(region);
}
region->Reload(size);
@@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
- : RasterizerCache{rasterizer} {}
+ : RasterizerCache{rasterizer} {
+ GLint max_ssbo_size_;
+ glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
+ max_ssbo_size = static_cast<u32>(max_ssbo_size_);
+}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const GLShader::GlobalMemoryEntry& global_region,
@@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
auto& gpu{Core::System::GetInstance().GPU()};
auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
global_region.GetCbufOffset()};
const auto actual_addr{memory_manager.Read<u64>(addr)};
@@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
if (!region) {
// No global region found - create a new one
- region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
+ region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
Register(region);
}
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 5a21ab66f..196e6e278 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -19,7 +19,7 @@ namespace OpenGL {
namespace GLShader {
class GlobalMemoryEntry;
-} // namespace GLShader
+}
class RasterizerOpenGL;
class CachedGlobalRegion;
@@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
- explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
+ explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
+ ~CachedGlobalRegion();
VAddr GetCpuAddr() const override {
return cpu_addr;
@@ -45,14 +46,14 @@ public:
/// Reloads the global region from guest memory
void Reload(u32 size_);
- // TODO(Rodrigo): When global memory is written (STG), implement flushing
- void Flush() override {
- UNIMPLEMENTED();
- }
+ void Flush() override;
private:
VAddr cpu_addr{};
+ u8* host_ptr{};
u32 size{};
+ u32 max_size{};
+
OGLBuffer buffer;
};
@@ -66,10 +67,11 @@ public:
private:
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
- GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+ GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
void ReserveGlobalRegion(GlobalRegion region);
std::unordered_map<CacheAddr, GlobalRegion> reserve;
+ u32 max_ssbo_size{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d250d5cbb..6034dc489 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -101,12 +101,6 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
- // Create sampler objects
- for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
- texture_samplers[i].Create();
- state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
- }
-
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -582,9 +576,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
}
void RasterizerOpenGL::Clear() {
- const auto prev_state{state};
- SCOPE_EXIT({ prev_state.Apply(); });
-
const auto& regs = system.GPU().Maxwell3D().regs;
bool use_color{};
bool use_depth{};
@@ -656,7 +647,10 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
- clear_state.Apply();
+ clear_state.ApplyColorMask();
+ clear_state.ApplyDepth();
+ clear_state.ApplyStencilTest();
+ clear_state.ApplyViewport();
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -756,6 +750,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
return;
}
res_cache.FlushRegion(addr, size);
+ global_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -812,92 +807,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SamplerInfo::Create() {
- sampler.Create();
- mag_filter = Tegra::Texture::TextureFilter::Linear;
- min_filter = Tegra::Texture::TextureFilter::Linear;
- wrap_u = Tegra::Texture::WrapMode::Wrap;
- wrap_v = Tegra::Texture::WrapMode::Wrap;
- wrap_p = Tegra::Texture::WrapMode::Wrap;
- use_depth_compare = false;
- depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
-
- // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
- glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
-
- // Other attributes have correct defaults
-}
-
-void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
- const GLuint sampler_id = sampler.handle;
- if (mag_filter != config.mag_filter) {
- mag_filter = config.mag_filter;
- glSamplerParameteri(
- sampler_id, GL_TEXTURE_MAG_FILTER,
- MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
- }
- if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
- min_filter = config.min_filter;
- mipmap_filter = config.mipmap_filter;
- glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
- MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
- }
-
- if (wrap_u != config.wrap_u) {
- wrap_u = config.wrap_u;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
- }
- if (wrap_v != config.wrap_v) {
- wrap_v = config.wrap_v;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
- }
- if (wrap_p != config.wrap_p) {
- wrap_p = config.wrap_p;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
- }
-
- if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
- use_depth_compare = enabled;
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
- use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
- }
-
- if (depth_compare_func != config.depth_compare_func) {
- depth_compare_func = config.depth_compare_func;
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
- MaxwellToGL::DepthCompareFunc(depth_compare_func));
- }
-
- if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
- border_color = new_border_color;
- glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
- }
-
- if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
- max_anisotropic = anisotropic;
- if (GLAD_GL_ARB_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
- } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
- }
- }
-
- if (const float min = config.GetMinLod(); min_lod != min) {
- min_lod = min;
- glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
- }
- if (const float max = config.GetMaxLod(); max_lod != max) {
- max_lod = max;
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
- }
-
- if (const float bias = config.GetLodBias(); lod_bias != bias) {
- lod_bias = bias;
- glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
- }
-}
-
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
@@ -953,6 +862,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+ if (entry.IsWritten()) {
+ region->MarkAsModified(true, global_cache);
+ }
bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
static_cast<GLsizeiptr>(region->GetSizeInBytes()));
}
@@ -970,10 +882,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ Tegra::Texture::FullTextureInfo texture;
+ if (entry.IsBindless()) {
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
+ texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ } else {
+ texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ }
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
- texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+ state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e4c64ae71..a0e056142 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -25,6 +25,7 @@
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -71,39 +72,7 @@ public:
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
- static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
- static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
- "The maximum size of a global memory must be a multiple of the size of float");
-
private:
- class SamplerInfo {
- public:
- OGLSampler sampler;
-
- /// Creates the sampler object, initializing its state so that it's in sync with the
- /// SamplerInfo struct.
- void Create();
- /// Syncs the sampler object with the config, updating any necessary state.
- void SyncWithConfig(const Tegra::Texture::TSCEntry& info);
-
- private:
- Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
- Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
- Tegra::Texture::TextureMipmapFilter mipmap_filter =
- Tegra::Texture::TextureMipmapFilter::None;
- Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
- Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
- Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
- bool use_depth_compare = false;
- Tegra::Texture::DepthCompareFunc depth_compare_func =
- Tegra::Texture::DepthCompareFunc::Always;
- GLvec4 border_color = {};
- float min_lod = 0.0f;
- float max_lod = 16.0f;
- float lod_bias = 0.0f;
- float max_anisotropic = 1.0f;
- };
-
struct FramebufferConfigState {
bool using_color_fb{};
bool using_depth_fb{};
@@ -208,6 +177,7 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
+ SamplerCacheOpenGL sampler_cache;
Core::System& system;
@@ -223,8 +193,6 @@ private:
FramebufferConfigState current_framebuffer_config_state;
std::pair<bool, bool> current_depth_stencil_usage{};
- std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
-
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
PrimitiveAssembler primitive_assembler{buffer_cache};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index f2ffc4710..7a68b8738 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -281,10 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
- if (!params.is_tiled) {
- const u32 bpp = params.GetFormatBpp() / 8;
- params.pitch = config.width * bpp;
- }
+ params.pitch = config.pitch;
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
new file mode 100644
index 000000000..3ded5ecea
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+
+namespace OpenGL {
+
+SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
+
+SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
+
+OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+ OGLSampler sampler;
+ sampler.Create();
+
+ const GLuint sampler_id{sampler.handle};
+ glSamplerParameteri(
+ sampler_id, GL_TEXTURE_MAG_FILTER,
+ MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
+ MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
+ tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
+ glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
+ MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
+ glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
+ if (GLAD_GL_ARB_texture_filter_anisotropic) {
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
+ } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
+ } else if (tsc.GetMaxAnisotropy() != 1) {
+ LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
+ }
+
+ return sampler;
+}
+
+GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
+ return sampler.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
new file mode 100644
index 000000000..defbc2d81
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -0,0 +1,25 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/sampler_cache.h"
+
+namespace OpenGL {
+
+class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
+public:
+ explicit SamplerCacheOpenGL();
+ ~SamplerCacheOpenGL();
+
+protected:
+ OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
+
+ GLuint ToSamplerType(const OGLSampler& sampler) const;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 28e490b3c..cd462621d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
-constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
- static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
class ShaderWriter {
public:
@@ -121,14 +119,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
/// Returns true if an object has to be treated as precise
bool IsPrecise(Operation operand) {
- const auto& meta = operand.GetMeta();
-
+ const auto& meta{operand.GetMeta()};
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
return arithmetic->precise;
}
- if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
- return half_arithmetic->precise;
- }
return false;
}
@@ -208,8 +202,10 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& gmem : ir.GetGlobalMemoryBases()) {
- entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+ for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+ const auto& [base, usage] = gmem_pair;
+ entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
+ usage.is_read, usage.is_written);
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -380,12 +376,22 @@ private:
}
void DeclareGlobalMemory() {
- for (const auto& entry : ir.GetGlobalMemoryBases()) {
+ for (const auto& gmem : ir.GetGlobalMemory()) {
+ const auto& [base, usage] = gmem;
+
+ // Since we don't know how the shader will use the shader, hint the driver to disable as
+ // much optimizations as possible
+ std::string qualifier = "coherent volatile";
+ if (usage.is_read && !usage.is_written)
+ qualifier += " readonly";
+ else if (usage.is_written && !usage.is_read)
+ qualifier += " writeonly";
+
const std::string binding =
- fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
- code.AddLine("layout (std430, binding = " + binding + ") buffer " +
- GetGlobalMemoryBlock(entry) + " {");
- code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+ fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
+ code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
+ GetGlobalMemoryBlock(base) + " {");
+ code.AddLine(" float " + GetGlobalMemory(base) + "[];");
code.AddLine("};");
code.AddNewLine();
}
@@ -617,28 +623,7 @@ private:
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
- std::string value = VisitOperand(operation, operand_index);
- switch (type) {
- case Type::HalfFloat: {
- const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
- if (!half_meta) {
- value = "toHalf2(" + value + ')';
- }
-
- switch (half_meta->types.at(operand_index)) {
- case Tegra::Shader::HalfType::H0_H1:
- return "toHalf2(" + value + ')';
- case Tegra::Shader::HalfType::F32:
- return "vec2(" + value + ')';
- case Tegra::Shader::HalfType::H0_H0:
- return "vec2(toHalf2(" + value + ")[0])";
- case Tegra::Shader::HalfType::H1_H1:
- return "vec2(toHalf2(" + value + ")[1])";
- }
- }
- default:
- return CastOperand(value, type);
- }
+ return CastOperand(VisitOperand(operation, operand_index), type);
}
std::string CastOperand(const std::string& value, Type type) const {
@@ -652,9 +637,7 @@ private:
case Type::Uint:
return "ftou(" + value + ')';
case Type::HalfFloat:
- // Can't be handled as a stand-alone value
- UNREACHABLE();
- return value;
+ return "toHalf2(" + value + ')';
}
UNREACHABLE();
return value;
@@ -868,6 +851,12 @@ private:
} else if (const auto lmem = std::get_if<LmemNode>(dest)) {
target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
+ } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+ const std::string real = Visit(gmem->GetRealAddress());
+ const std::string base = Visit(gmem->GetBaseAddress());
+ const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
@@ -1067,13 +1056,40 @@ private:
return BitwiseCastResult(value, Type::HalfFloat);
}
+ std::string HClamp(Operation operation) {
+ const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
+ const std::string min = VisitOperand(operation, 1, Type::Float);
+ const std::string max = VisitOperand(operation, 2, Type::Float);
+ const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
+ return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
+ }
+
+ std::string HUnpack(Operation operation) {
+ const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
+ const auto value = [&]() -> std::string {
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32:
+ return "vec2(fromHalf2(" + operand + "))";
+ case Tegra::Shader::HalfType::H0_H0:
+ return "vec2(" + operand + "[0])";
+ case Tegra::Shader::HalfType::H1_H1:
+ return "vec2(" + operand + "[1])";
+ }
+ UNREACHABLE();
+ return "0";
+ }();
+ return "fromHalf2(" + value + ')';
+ }
+
std::string HMergeF32(Operation operation) {
return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
}
std::string HMergeH0(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" +
- Visit(operation[1]) + ")[0]))";
+ return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
+ Visit(operation[0]) + ")[1]))";
}
std::string HMergeH1(Operation operation) {
@@ -1173,34 +1189,46 @@ private:
return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
}
+ template <bool with_nan>
+ std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
+ std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat)};
+ if constexpr (!with_nan) {
+ return comparison;
+ }
+ return "halfFloatNanComparison(" + comparison + ", " +
+ VisitOperand(operation, 0, Type::HalfFloat) + ", " +
+ VisitOperand(operation, 1, Type::HalfFloat) + ')';
+ }
+
+ template <bool with_nan>
std::string Logical2HLessThan(Operation operation) {
- return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "lessThan");
}
+ template <bool with_nan>
std::string Logical2HEqual(Operation operation) {
- return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "equal");
}
+ template <bool with_nan>
std::string Logical2HLessEqual(Operation operation) {
- return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
}
+ template <bool with_nan>
std::string Logical2HGreaterThan(Operation operation) {
- return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "greaterThan");
}
+ template <bool with_nan>
std::string Logical2HNotEqual(Operation operation) {
- return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "notEqual");
}
+ template <bool with_nan>
std::string Logical2HGreaterEqual(Operation operation) {
- return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat,
- Type::HalfFloat);
+ return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
}
std::string Texture(Operation operation) {
@@ -1489,6 +1517,8 @@ private:
&GLSLDecompiler::Fma<Type::HalfFloat>,
&GLSLDecompiler::Absolute<Type::HalfFloat>,
&GLSLDecompiler::HNegate,
+ &GLSLDecompiler::HClamp,
+ &GLSLDecompiler::HUnpack,
&GLSLDecompiler::HMergeF32,
&GLSLDecompiler::HMergeH0,
&GLSLDecompiler::HMergeH1,
@@ -1525,12 +1555,18 @@ private:
&GLSLDecompiler::LogicalNotEqual<Type::Uint>,
&GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
- &GLSLDecompiler::Logical2HLessThan,
- &GLSLDecompiler::Logical2HEqual,
- &GLSLDecompiler::Logical2HLessEqual,
- &GLSLDecompiler::Logical2HGreaterThan,
- &GLSLDecompiler::Logical2HNotEqual,
- &GLSLDecompiler::Logical2HGreaterEqual,
+ &GLSLDecompiler::Logical2HLessThan<false>,
+ &GLSLDecompiler::Logical2HEqual<false>,
+ &GLSLDecompiler::Logical2HLessEqual<false>,
+ &GLSLDecompiler::Logical2HGreaterThan<false>,
+ &GLSLDecompiler::Logical2HNotEqual<false>,
+ &GLSLDecompiler::Logical2HGreaterEqual<false>,
+ &GLSLDecompiler::Logical2HLessThan<true>,
+ &GLSLDecompiler::Logical2HEqual<true>,
+ &GLSLDecompiler::Logical2HLessEqual<true>,
+ &GLSLDecompiler::Logical2HGreaterThan<true>,
+ &GLSLDecompiler::Logical2HNotEqual<true>,
+ &GLSLDecompiler::Logical2HGreaterEqual<true>,
&GLSLDecompiler::Texture,
&GLSLDecompiler::TextureLod,
@@ -1621,9 +1657,7 @@ private:
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
- const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
- "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
@@ -1633,6 +1667,12 @@ std::string GetCommonDeclarations() {
"}\n\n"
"vec2 toHalf2(float value) {\n"
" return unpackHalf2x16(ftou(value));\n"
+ "}\n\n"
+ "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
+ " bvec2 is_nan1 = isnan(pair1);\n"
+ " bvec2 is_nan2 = isnan(pair2);\n"
+ " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
+ "is_nan2.y);\n"
"}\n";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 4e04ab2f8..74032d237 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -39,8 +39,9 @@ private:
class GlobalMemoryEntry {
public:
- explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
- : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+ explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written)
+ : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
+ is_written} {}
u32 GetCbufIndex() const {
return cbuf_index;
@@ -50,14 +51,25 @@ public:
return cbuf_offset;
}
+ bool IsRead() const {
+ return is_read;
+ }
+
+ bool IsWritten() const {
+ return is_written;
+ }
+
private:
u32 cbuf_index{};
u32 cbuf_offset{};
+ bool is_read{};
+ bool is_written{};
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
+ std::vector<SamplerEntry> bindless_samplers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
@@ -68,4 +80,4 @@ std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix);
-} // namespace OpenGL::GLShader \ No newline at end of file
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 8a43eb157..53752b38d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u32 type{};
u8 is_array{};
u8 is_shadow{};
+ u8 is_bindless{};
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
- file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+ file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
+ file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
return {};
}
- entry.entries.samplers.emplace_back(
- static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+ entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
+ static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::TextureType>(type),
+ is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 global_memory_count{};
@@ -337,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
+ u8 is_read{};
+ u8 is_written{};
if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+ file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
+ file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
+ file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+ entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
+ is_written != 0);
}
for (auto& clip_distance : entry.entries.clip_distances) {
@@ -388,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
- file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+ file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
+ file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
return false;
}
}
@@ -397,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
return false;
for (const auto& gmem : entries.global_memory_entries) {
if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
- file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+ file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
+ file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
+ file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
return false;
}
}