diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 116 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_sampler_cache.cpp | 52 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_sampler_cache.h | 25 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 162 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 27 |
10 files changed, 266 insertions, 233 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 8d9ee81f1..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -14,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..196e6e278 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush() override; private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -66,10 +67,11 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d250d5cbb..6034dc489 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -101,12 +101,6 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); @@ -582,9 +576,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -656,7 +647,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -756,6 +750,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -812,92 +807,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = Tegra::Texture::TextureFilter::Linear; - min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = Tegra::Texture::WrapMode::Wrap; - wrap_v = Tegra::Texture::WrapMode::Wrap; - wrap_p = Tegra::Texture::WrapMode::Wrap; - use_depth_compare = false; - depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - - // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); - - // Other attributes have correct defaults -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint sampler_id = sampler.handle; - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - } - if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { - min_filter = config.min_filter; - mipmap_filter = config.mipmap_filter; - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); - } - - if (wrap_u != config.wrap_u) { - wrap_u = config.wrap_u; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); - } - if (wrap_v != config.wrap_v) { - wrap_v = config.wrap_v; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); - } - if (wrap_p != config.wrap_p) { - wrap_p = config.wrap_p; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); - } - - if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { - use_depth_compare = enabled; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - } - - if (depth_compare_func != config.depth_compare_func) { - depth_compare_func = config.depth_compare_func; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(depth_compare_func)); - } - - if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { - border_color = new_border_color; - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); - } - - if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { - max_anisotropic = anisotropic; - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); - } - } - - if (const float min = config.GetMinLod(); min_lod != min) { - min_lod = min; - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); - } - if (const float max = config.GetMaxLod(); max_lod != max) { - max_lod = max; - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); - } - - if (const float bias = config.GetLodBias(); lod_bias != bias) { - lod_bias = bias; - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); - } -} - void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { @@ -953,6 +862,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, static_cast<GLsizeiptr>(region->GetSizeInBytes())); } @@ -970,10 +882,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + Tegra::Texture::FullTextureInfo texture; + if (entry.IsBindless()) { + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } else { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const u32 current_bindpoint = base_bindings.sampler + bindpoint; - texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); + state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e4c64ae71..a0e056142 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -25,6 +25,7 @@ #include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -71,39 +72,7 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: - class SamplerInfo { - public: - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const Tegra::Texture::TSCEntry& info); - - private: - Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mipmap_filter = - Tegra::Texture::TextureMipmapFilter::None; - Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool use_depth_compare = false; - Tegra::Texture::DepthCompareFunc depth_compare_func = - Tegra::Texture::DepthCompareFunc::Always; - GLvec4 border_color = {}; - float min_lod = 0.0f; - float max_lod = 16.0f; - float lod_bias = 0.0f; - float max_anisotropic = 1.0f; - }; - struct FramebufferConfigState { bool using_color_fb{}; bool using_depth_fb{}; @@ -208,6 +177,7 @@ private: RasterizerCacheOpenGL res_cache; ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; + SamplerCacheOpenGL sampler_cache; Core::System& system; @@ -223,8 +193,6 @@ private: FramebufferConfigState current_framebuffer_config_state; std::pair<bool, bool> current_depth_stencil_usage{}; - std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; PrimitiveAssembler primitive_assembler{buffer_cache}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f2ffc4710..7a68b8738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -281,10 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; - if (!params.is_tiled) { - const u32 bpp = params.GetFormatBpp() / 8; - params.pitch = config.width * bpp; - } + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp new file mode 100644 index 000000000..3ded5ecea --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace OpenGL { + +SamplerCacheOpenGL::SamplerCacheOpenGL() = default; + +SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; + +OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + OGLSampler sampler; + sampler.Create(); + + const GLuint sampler_id{sampler.handle}; + glSamplerParameteri( + sampler_id, GL_TEXTURE_MAG_FILTER, + MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, + MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); + if (GLAD_GL_ARB_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); + } else if (GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); + } else if (tsc.GetMaxAnisotropy() != 1) { + LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); + } + + return sampler; +} + +GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { + return sampler.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h new file mode 100644 index 000000000..defbc2d81 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/sampler_cache.h" + +namespace OpenGL { + +class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { +public: + explicit SamplerCacheOpenGL(); + ~SamplerCacheOpenGL(); + +protected: + OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; + + GLuint ToSamplerType(const OGLSampler& sampler) const; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 28e490b3c..cd462621d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); class ShaderWriter { public: @@ -121,14 +119,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { /// Returns true if an object has to be treated as precise bool IsPrecise(Operation operand) { - const auto& meta = operand.GetMeta(); - + const auto& meta{operand.GetMeta()}; if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { return arithmetic->precise; } - if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { - return half_arithmetic->precise; - } return false; } @@ -208,8 +202,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -380,12 +376,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -617,28 +623,7 @@ private: } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { - std::string value = VisitOperand(operation, operand_index); - switch (type) { - case Type::HalfFloat: { - const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); - if (!half_meta) { - value = "toHalf2(" + value + ')'; - } - - switch (half_meta->types.at(operand_index)) { - case Tegra::Shader::HalfType::H0_H1: - return "toHalf2(" + value + ')'; - case Tegra::Shader::HalfType::F32: - return "vec2(" + value + ')'; - case Tegra::Shader::HalfType::H0_H0: - return "vec2(toHalf2(" + value + ")[0])"; - case Tegra::Shader::HalfType::H1_H1: - return "vec2(toHalf2(" + value + ")[1])"; - } - } - default: - return CastOperand(value, type); - } + return CastOperand(VisitOperand(operation, operand_index), type); } std::string CastOperand(const std::string& value, Type type) const { @@ -652,9 +637,7 @@ private: case Type::Uint: return "ftou(" + value + ')'; case Type::HalfFloat: - // Can't be handled as a stand-alone value - UNREACHABLE(); - return value; + return "toHalf2(" + value + ')'; } UNREACHABLE(); return value; @@ -868,6 +851,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1067,13 +1056,40 @@ private: return BitwiseCastResult(value, Type::HalfFloat); } + std::string HClamp(Operation operation) { + const std::string value = VisitOperand(operation, 0, Type::HalfFloat); + const std::string min = VisitOperand(operation, 1, Type::Float); + const std::string max = VisitOperand(operation, 2, Type::Float); + const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); + } + + std::string HUnpack(Operation operation) { + const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; + const auto value = [&]() -> std::string { + switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { + case Tegra::Shader::HalfType::H0_H1: + return operand; + case Tegra::Shader::HalfType::F32: + return "vec2(fromHalf2(" + operand + "))"; + case Tegra::Shader::HalfType::H0_H0: + return "vec2(" + operand + "[0])"; + case Tegra::Shader::HalfType::H1_H1: + return "vec2(" + operand + "[1])"; + } + UNREACHABLE(); + return "0"; + }(); + return "fromHalf2(" + value + ')'; + } + std::string HMergeF32(Operation operation) { return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; } std::string HMergeH0(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + - Visit(operation[1]) + ")[0]))"; + return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + + Visit(operation[0]) + ")[1]))"; } std::string HMergeH1(Operation operation) { @@ -1173,34 +1189,46 @@ private: return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); } + template <bool with_nan> + std::string GenerateHalfComparison(Operation operation, std::string compare_op) { + std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat)}; + if constexpr (!with_nan) { + return comparison; + } + return "halfFloatNanComparison(" + comparison + ", " + + VisitOperand(operation, 0, Type::HalfFloat) + ", " + + VisitOperand(operation, 1, Type::HalfFloat) + ')'; + } + + template <bool with_nan> std::string Logical2HLessThan(Operation operation) { - return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThan"); } + template <bool with_nan> std::string Logical2HEqual(Operation operation) { - return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "equal"); } + template <bool with_nan> std::string Logical2HLessEqual(Operation operation) { - return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); } + template <bool with_nan> std::string Logical2HGreaterThan(Operation operation) { - return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThan"); } + template <bool with_nan> std::string Logical2HNotEqual(Operation operation) { - return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "notEqual"); } + template <bool with_nan> std::string Logical2HGreaterEqual(Operation operation) { - return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); } std::string Texture(Operation operation) { @@ -1489,6 +1517,8 @@ private: &GLSLDecompiler::Fma<Type::HalfFloat>, &GLSLDecompiler::Absolute<Type::HalfFloat>, &GLSLDecompiler::HNegate, + &GLSLDecompiler::HClamp, + &GLSLDecompiler::HUnpack, &GLSLDecompiler::HMergeF32, &GLSLDecompiler::HMergeH0, &GLSLDecompiler::HMergeH1, @@ -1525,12 +1555,18 @@ private: &GLSLDecompiler::LogicalNotEqual<Type::Uint>, &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, + &GLSLDecompiler::Logical2HLessThan<false>, + &GLSLDecompiler::Logical2HEqual<false>, + &GLSLDecompiler::Logical2HLessEqual<false>, + &GLSLDecompiler::Logical2HGreaterThan<false>, + &GLSLDecompiler::Logical2HNotEqual<false>, + &GLSLDecompiler::Logical2HGreaterEqual<false>, + &GLSLDecompiler::Logical2HLessThan<true>, + &GLSLDecompiler::Logical2HEqual<true>, + &GLSLDecompiler::Logical2HLessEqual<true>, + &GLSLDecompiler::Logical2HGreaterThan<true>, + &GLSLDecompiler::Logical2HNotEqual<true>, + &GLSLDecompiler::Logical2HGreaterEqual<true>, &GLSLDecompiler::Texture, &GLSLDecompiler::TextureLod, @@ -1621,9 +1657,7 @@ private: std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" @@ -1633,6 +1667,12 @@ std::string GetCommonDeclarations() { "}\n\n" "vec2 toHalf2(float value) {\n" " return unpackHalf2x16(ftou(value));\n" + "}\n\n" + "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" + " bvec2 is_nan1 = isnan(pair1);\n" + " bvec2 is_nan2 = isnan(pair2);\n" + " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " + "is_nan2.y);\n" "}\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 4e04ab2f8..74032d237 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -39,8 +39,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -50,14 +51,25 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; std::vector<SamplerEntry> samplers; + std::vector<SamplerEntry> bindless_samplers; std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; @@ -68,4 +80,4 @@ std::string GetCommonDeclarations(); ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix); -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 8a43eb157..53752b38d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u32 type{}; u8 is_array{}; u8 is_shadow{}; + u8 is_bindless{}; if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || - file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { + file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0); + entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), + static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), + is_array != 0, is_shadow != 0, is_bindless != 0); } u32 global_memory_count{}; @@ -337,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; + u8 is_read{}; + u8 is_written{}; if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, + is_written != 0); } for (auto& clip_distance : entry.entries.clip_distances) { @@ -388,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || - file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) { + file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) { return false; } } @@ -397,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu return false; for (const auto& gmem : entries.global_memory_entries) { if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { return false; } } |
