aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h17
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp36
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h17
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp105
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h49
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp169
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp610
-rw-r--r--src/video_core/renderer_opengl/gl_state.h52
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
22 files changed, 689 insertions, 666 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..7989ec11b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,30 +7,33 @@
#include "common/alignment.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+ std::size_t alignment, u8* host_ptr)
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+ alignment{alignment} {}
+
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
- std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+ bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
+ const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
if (cache) {
- auto entry = TryGet(*cpu_addr);
+ auto entry = TryGet(host_ptr);
if (entry) {
- if (entry->size >= size && entry->alignment == alignment) {
- return entry->offset;
+ if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+ return entry->GetOffset();
}
Unregister(entry);
}
@@ -39,17 +42,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
AlignBuffer(alignment);
const GLintptr uploaded_offset = buffer_offset;
- Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+ if (!host_ptr) {
+ return uploaded_offset;
+ }
+ std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
- auto entry = std::make_shared<CachedBufferEntry>();
- entry->offset = uploaded_offset;
- entry->size = size;
- entry->alignment = alignment;
- entry->addr = *cpu_addr;
+ auto entry = std::make_shared<CachedBufferEntry>(
+ *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
Register(entry);
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
class RasterizerOpenGL;
-struct CachedBufferEntry final : public RasterizerCacheObject {
- VAddr GetAddr() const override {
- return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+ explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+ std::size_t alignment, u8* host_ptr);
+
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
+ std::size_t GetSize() const {
+ return size;
+ }
+
+ GLintptr GetOffset() const {
+ return offset;
+ }
+
+ std::size_t GetAlignment() const {
+ return alignment;
+ }
+
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
- VAddr addr;
- std::size_t size;
- GLintptr offset;
- std::size_t alignment;
+private:
+ VAddr cpu_addr{};
+ std::size_t size{};
+ GLintptr offset{};
+ std::size_t alignment{};
};
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
- GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+ GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7161d1dea..5842d6213 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,10 +4,8 @@
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +13,13 @@
namespace OpenGL {
-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
buffer.Create();
// Bind and unbind the buffer so it gets allocated by the driver
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
- LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+ LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
}
void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +34,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
}
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
const auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
@@ -46,11 +45,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
return search->second;
}
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
- GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
+ u8* host_ptr) {
+ GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
- region = std::make_shared<CachedGlobalRegion>(addr, size);
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+ region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
ReserveGlobalRegion(region);
}
region->Reload(size);
@@ -58,7 +60,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
- reserve.insert_or_assign(region->GetAddr(), std::move(region));
+ reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +71,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
- const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
- const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
- cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
- ASSERT(cbuf_addr);
-
- const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
- const auto size = Memory::Read32(*cbuf_addr + 8);
- const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
- ASSERT(actual_addr);
+ auto& memory_manager{gpu.MemoryManager()};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+ const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+ global_region.GetCbufOffset()};
+ const auto actual_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
// Look up global region in the cache based on address
- GlobalRegion region = TryGet(*actual_addr);
+ const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
+ GlobalRegion region{TryGet(host_ptr)};
if (!region) {
// No global region found - create a new one
- region = GetUncachedGlobalRegion(*actual_addr, size);
+ region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
Register(region);
}
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index ba2bdc60c..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
- explicit CachedGlobalRegion(VAddr addr, u32 size);
+ explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
- /// Gets the address of the shader in guest memory, required for cache management
- VAddr GetAddr() const override {
- return addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
- /// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const override {
return size;
}
@@ -53,9 +51,8 @@ public:
}
private:
- VAddr addr{};
+ VAddr cpu_addr{};
u32 size{};
-
OGLBuffer buffer;
};
@@ -68,11 +65,11 @@ public:
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
- GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
- GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+ GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+ GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
void ReserveGlobalRegion(GlobalRegion region);
- std::unordered_map<VAddr, GlobalRegion> reserve;
+ std::unordered_map<CacheAddr, GlobalRegion> reserve;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
@@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
return index_offset;
}
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
- u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
- const u8* source{Memory::GetPointer(*cpu_addr)};
+ const u8* source{memory_manager.GetPointer(gpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
return index_offset;
}
-} // namespace OpenGL \ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
#pragma once
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace OpenGL {
@@ -24,7 +22,7 @@ public:
GLintptr MakeQuadArray(u32 first, u32 count);
- GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+ GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 976f64c24..7ff1e6737 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,7 +17,6 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
@@ -26,7 +25,6 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/video_core.h"
namespace OpenGL {
@@ -100,11 +98,9 @@ struct FramebufferCacheKey {
}
};
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
- ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
- emu_window{window}, system{system}, screen_info{info},
- buffer_cache(*this, STREAM_BUFFER_SIZE) {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
+ : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+ screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -225,8 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
if (!vertex_array.IsEnabled())
continue;
- const Tegra::GPUVAddr start = vertex_array.StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = vertex_array.StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
const u64 size = end - start + 1;
@@ -320,7 +316,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+ ubo.SetFromRegs(gpu, stage);
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
@@ -421,8 +417,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
if (!regs.vertex_array[index].IsEnabled())
continue;
- const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = regs.vertex_array[index].StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
size += end - start + 1;
@@ -449,7 +445,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
-void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
@@ -747,20 +743,26 @@ void RasterizerOpenGL::DrawArrays() {
void RasterizerOpenGL::FlushAll() {}
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (!addr || !size) {
+ return;
+ }
res_cache.FlushRegion(addr, size);
}
-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (!addr || !size) {
+ return;
+ }
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}
@@ -782,7 +784,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+ const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ca3de0592..54fbf48aa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -12,15 +12,12 @@
#include <optional>
#include <tuple>
#include <utility>
-#include <vector>
#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -29,10 +26,8 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace Core {
class System;
@@ -50,16 +45,15 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
- explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
- ScreenInfo& info);
+ explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
~RasterizerOpenGL() override;
void DrawArrays() override;
void Clear() override;
void FlushAll() override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(CacheAddr addr, u64 size) override;
+ void InvalidateRegion(CacheAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
@@ -67,7 +61,7 @@ public:
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
- void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+ void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -214,7 +208,6 @@ private:
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
- Core::Frontend::EmuWindow& emu_window;
Core::System& system;
ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index bd1409660..5876145ef 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -13,7 +13,6 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
-#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/morton.h"
@@ -55,12 +54,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
}
}
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
- addr = cpu_addr ? *cpu_addr : 0;
gpu_addr = gpu_addr_;
+ host_ptr = memory_manager.GetPointer(gpu_addr_);
size_in_bytes = SizeInBytesRaw();
if (IsPixelFormatASTC(pixel_format)) {
@@ -223,7 +221,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
@@ -446,7 +444,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
- gl_buffer.data() + offset_gl, params.addr + offset);
+ gl_buffer.data() + offset_gl, params.host_ptr + offset);
offset += layer_size;
offset_gl += gl_size;
}
@@ -455,7 +453,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
- gl_buffer.data(), params.addr + offset);
+ gl_buffer.data(), params.host_ptr + offset);
}
}
@@ -513,9 +511,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
"reinterpretation but the texture is tiled.");
}
const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
- Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+ memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -563,8 +561,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
}
CachedSurface::CachedSurface(const SurfaceParams& params)
- : params(params), gl_target(SurfaceTargetToGL(params.target)),
- cached_size_in_bytes(params.size_in_bytes) {
+ : RasterizerCacheObject{params.host_ptr}, params{params},
+ gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
+
+ const auto optional_cpu_addr{
+ Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+ ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+ cpu_addr = *optional_cpu_addr;
+
texture.Create(gl_target);
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,19 +607,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
ApplyTextureDefaults(texture.handle, params.max_mip_level);
- OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
-
- // Clamp size to mapped GPU memory region
- // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
- // R32F render buffer. We do not yet know if this is a game bug or something else, but this
- // check is necessary to prevent flushing from overwriting unmapped memory.
-
- auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
- if (cached_size_in_bytes > max_size) {
- LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
- cached_size_in_bytes = max_size;
- }
+ OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -633,10 +625,9 @@ void CachedSurface::LoadGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
- std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
- params.size_in_bytes_gl);
+ std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
} else {
- const u8* start = Memory::GetPointer(params.addr);
+ const u8* start{params.host_ptr};
u8* write_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(write_to, start, copy_size);
@@ -670,8 +661,8 @@ void CachedSurface::FlushGLBuffer() {
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+ glPixelStorei(GL_PACK_ALIGNMENT, align);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -680,8 +671,6 @@ void CachedSurface::FlushGLBuffer() {
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
params.height, params.depth, true, true);
- const u8* const texture_src_data = Memory::GetPointer(params.addr);
- ASSERT(texture_src_data);
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
@@ -691,9 +680,9 @@ void CachedSurface::FlushGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
- std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+ std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
} else {
- u8* start = Memory::GetPointer(params.addr);
+ u8* start{params.host_ptr};
const u8* read_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(start, read_to, copy_size);
@@ -718,8 +707,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, align);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -927,12 +916,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
- if (params.addr == 0 || params.height * params.width == 0) {
+ if (!params.IsValid()) {
return {};
}
// Look up surface in the cache based on address
- Surface surface{TryGet(params.addr)};
+ Surface surface{TryGet(params.host_ptr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
// Use the cached surface as-is unless it's not synced with memory
@@ -943,7 +932,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
- UnregisterSurface(surface);
+ Unregister(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
@@ -951,7 +940,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
- UnregisterSurface(surface);
+ Unregister(surface);
}
}
@@ -981,14 +970,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& init_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- VAddr address = init_params.addr;
- const std::size_t layer_size = dst_params.LayerMemorySize();
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ GPUVAddr address{init_params.gpu_addr};
+ const std::size_t layer_size{dst_params.LayerMemorySize()};
for (u32 layer = 0; layer < dst_params.depth; layer++) {
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
- const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
- const Surface& copy = TryGet(sub_address);
- if (!copy)
+ const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+ const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+ if (!copy) {
continue;
+ }
const auto& src_params{copy->GetSurfaceParams()};
const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1163,7 +1154,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const auto& dst_params{dst_surface->GetSurfaceParams()};
// Flush enough memory for both the source and destination surface
- FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+ FlushRegion(ToCacheAddr(src_params.host_ptr),
+ std::max(src_params.MemorySize(), dst_params.MemorySize()));
LoadSurface(dst_surface);
}
@@ -1215,8 +1207,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
- return TryGet(addr);
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
+ return TryGet(host_ptr);
}
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1243,9 +1235,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
return {};
}
-static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
- const std::size_t size = params.LayerMemorySize();
- VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
+ const std::size_t size{params.LayerMemorySize()};
+ GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
@@ -1267,7 +1259,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot =
- TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+ TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1275,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
}
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
- const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
- const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+ const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
+ const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
if (bound2 > bound1)
return true;
const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1302,12 +1294,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
FlushObject(intersect);
@@ -1327,7 +1319,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
void RasterizerCacheOpenGL::SignalPostDrawCall() {
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
if (current_color_buffers[i] != nullptr) {
- Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+ Surface intersect =
+ CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
if (intersect != nullptr) {
PartialReinterpretSurface(current_color_buffers[i], intersect);
texception = true;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9cf6f50be..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,13 +5,13 @@
#pragma once
#include <array>
-#include <map>
#include <memory>
#include <string>
-#include <unordered_set>
+#include <tuple>
#include <vector>
#include "common/alignment.h"
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
@@ -109,6 +109,11 @@ struct SurfaceParams {
return size;
}
+ /// Returns true if the parameters constitute a valid rasterizer surface.
+ bool IsValid() const {
+ return gpu_addr && host_ptr && height && width;
+ }
+
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
/// mipmaps.
std::size_t LayerMemorySize() const {
@@ -201,6 +206,13 @@ struct SurfaceParams {
return bd;
}
+ u32 RowAlign(u32 mip_level) const {
+ const u32 m_width = MipWidth(mip_level);
+ const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+ const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+ return (1U << l2);
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
@@ -210,7 +222,7 @@ struct SurfaceParams {
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
@@ -232,7 +244,7 @@ struct SurfaceParams {
}
/// Initializes parameters for caching, should be called after everything has been initialized
- void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+ void InitCacheParameters(GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
@@ -296,8 +308,8 @@ struct SurfaceParams {
bool is_array;
bool srgb_conversion;
// Parameters used for caching
- VAddr addr;
- Tegra::GPUVAddr gpu_addr;
+ u8* host_ptr;
+ GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
@@ -345,10 +357,10 @@ class RasterizerOpenGL;
class CachedSurface final : public RasterizerCacheObject {
public:
- CachedSurface(const SurfaceParams& params);
+ explicit CachedSurface(const SurfaceParams& params);
- VAddr GetAddr() const override {
- return params.addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
@@ -432,6 +444,7 @@ private:
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
+ VAddr cpu_addr{};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -449,7 +462,7 @@ public:
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Tries to find a framebuffer using on the provided CPU address
- Surface TryFindFramebufferSurface(VAddr addr) const;
+ Surface TryFindFramebufferSurface(const u8* host_ptr) const;
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +519,12 @@ private:
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
- using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+ using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
- return SurfaceInterval::right_open(object->GetAddr() + 1,
- object->GetAddr() + object->GetMemorySize() - 1);
+ return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
+ object->GetCacheAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +536,7 @@ private:
reinterpret_surface->MarkReinterpreted();
}
- Surface CollideOnReinterpretedSurface(VAddr addr) const {
+ Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
@@ -532,13 +545,17 @@ private:
return nullptr;
}
+ void Register(const Surface& object) override {
+ RasterizerCache<Surface>::Register(object);
+ }
+
/// Unregisters an object from the cache
- void UnregisterSurface(const Surface& object) {
+ void Unregister(const Surface& object) override {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
- Unregister(object);
+ RasterizerCache<Surface>::Unregister(object);
}
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..ab381932c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,13 +6,11 @@
#include "common/assert.h"
#include "common/hash.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
@@ -32,19 +30,20 @@ struct UnspecializedShader {
namespace {
/// Gets the address for the specified shader stage program
-VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
- const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
- shader_config.offset);
- ASSERT_MSG(address, "Invalid GPU address");
- return *address;
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+ const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+ const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+ return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
- Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
+ ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+ std::fill(program_code.begin(), program_code.end(), 0);
+ return program_code;
+ });
+ std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}
@@ -214,12 +213,13 @@ std::set<GLenum> GetSupportedFormats() {
} // namespace
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- ProgramCode&& program_code, ProgramCode&& program_code_b)
- : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
- disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+ ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
+ : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
+ unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
+ precompiled_programs{precompiled_programs} {
const std::size_t code_size = CalculateProgramSize(program_code);
const std::size_t code_size_b =
@@ -243,12 +243,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
disk_cache.SaveRaw(raw);
}
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- GLShader::ProgramResult result)
- : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
- disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+ GLShader::ProgramResult result, u8* host_ptr)
+ : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+ program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
+ precompiled_programs} {
code = std::move(result.first);
entries = result.second;
@@ -271,7 +272,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
- LabelGLObject(GL_PROGRAM, program->handle, addr);
+ LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
}
handle = program->handle;
@@ -323,7 +324,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
- LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+ LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
return target_program->handle;
};
@@ -486,29 +487,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
return last_shaders[static_cast<u32>(program)];
}
- const VAddr program_addr{GetShaderAddress(program)};
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ const GPUVAddr program_addr{GetShaderAddress(program)};
// Look up shader in the cache based on address
- Shader shader{TryGet(program_addr)};
+ const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+ Shader shader{TryGet(host_ptr)};
if (!shader) {
// No shader found - create a new one
- ProgramCode program_code = GetShaderCode(program_addr);
+ ProgramCode program_code{GetShaderCode(host_ptr)};
ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+ program_code_b = GetShaderCode(
+ memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
}
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-
+ const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const auto found = precompiled_shaders.find(unique_identifier);
if (found != precompiled_shaders.end()) {
shader =
- std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
- precompiled_programs, found->second);
+ std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+ precompiled_programs, found->second, host_ptr);
} else {
shader = std::make_shared<CachedShader>(
- program_addr, unique_identifier, program, disk_cache, precompiled_programs,
- std::move(program_code), std::move(program_code_b));
+ cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+ std::move(program_code), std::move(program_code_b), host_ptr);
}
Register(shader);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,21 +5,20 @@
#pragma once
#include <array>
+#include <atomic>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
+#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
-#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;
@@ -39,18 +38,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject {
public:
- explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+ explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- ProgramCode&& program_code, ProgramCode&& program_code_b);
+ ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
- explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+ explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- GLShader::ProgramResult result);
+ GLShader::ProgramResult result, u8* host_ptr);
- VAddr GetAddr() const override {
- return addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
@@ -91,7 +90,8 @@ private:
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
- VAddr addr{};
+ u8* host_ptr{};
+ VAddr cpu_addr{};
u64 unique_identifier{};
Maxwell::ShaderProgram program_type{};
ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 11d1169f0..3ea08ef7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
namespace OpenGL::GLShader {
+namespace {
+
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
-
class ShaderWriter {
public:
void AddExpression(std::string_view text) {
@@ -69,10 +75,10 @@ public:
shader_source += '\n';
}
- std::string GenerateTemporal() {
- std::string temporal = "tmp";
- temporal += std::to_string(temporal_index++);
- return temporal;
+ std::string GenerateTemporary() {
+ std::string temporary = "tmp";
+ temporary += std::to_string(temporary_index++);
+ return temporary;
}
std::string GetResult() {
@@ -87,11 +93,11 @@ private:
}
std::string shader_source;
- u32 temporal_index = 1;
+ u32 temporary_index = 1;
};
/// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u32 elem) {
+std::string GetSwizzle(u32 elem) {
ASSERT(elem <= 3);
std::string swizzle = ".";
swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
}
/// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
case Tegra::Shader::OutputTopology::PointList:
return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
}
/// Returns true if an object has to be treated as precise
-static bool IsPrecise(Operation operand) {
+bool IsPrecise(Operation operand) {
const auto& meta = operand.GetMeta();
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
return false;
}
-static bool IsPrecise(Node node) {
+bool IsPrecise(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
return IsPrecise(*operation);
}
@@ -426,9 +432,14 @@ private:
std::string Visit(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+ if (operation_index >= operation_decompilers.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
+ return {};
+ }
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
- UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+ UNREACHABLE_MSG("Undefined operation: {}", operation_index);
+ return {};
}
return (this->*decompiler)(*operation);
@@ -540,7 +551,7 @@ private:
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
- const std::string final_offset = code.GenerateTemporal();
+ const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
@@ -587,9 +598,9 @@ private:
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
- const std::string temporal = code.GenerateTemporal();
- code.AddLine(precise + "float " + temporal + " = " + value + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine(precise + "float " + temporary + " = " + value + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index) {
@@ -601,9 +612,9 @@ private:
return Visit(operand);
}
- const std::string temporal = code.GenerateTemporal();
- code.AddLine("float " + temporal + " = " + Visit(operand) + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
@@ -718,8 +729,8 @@ private:
result_type));
}
- std::string GenerateTexture(Operation operation, const std::string& func,
- const std::vector<std::pair<Type, Node>>& extras) {
+ std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+ const std::vector<TextureIR>& extras) {
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -729,11 +740,11 @@ private:
const bool has_array = meta->sampler.IsArray();
const bool has_shadow = meta->sampler.IsShadow();
- std::string expr = func;
- expr += '(';
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
+ std::string expr = "texture" + function_suffix;
+ if (!meta->aoffi.empty()) {
+ expr += "Offset";
+ }
+ expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
@@ -751,36 +762,74 @@ private:
}
expr += ')';
- for (const auto& extra_pair : extras) {
- const auto [type, operand] = extra_pair;
- if (operand == nullptr) {
- continue;
+ for (const auto& variant : extras) {
+ if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+ expr += GenerateTextureArgument(*argument);
+ } else if (std::get_if<TextureAoffi>(&variant)) {
+ expr += GenerateTextureAoffi(meta->aoffi);
+ } else {
+ UNREACHABLE();
}
- expr += ", ";
+ }
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += "ftoi(" + Visit(operand) + ')';
- }
- break;
- case Type::Float:
- expr += Visit(operand);
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
+ return expr + ')';
+ }
+
+ std::string GenerateTextureArgument(TextureArgument argument) {
+ const auto [type, operand] = argument;
+ if (operand == nullptr) {
+ return {};
+ }
+
+ std::string expr = ", ";
+ switch (type) {
+ case Type::Int:
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (some extra arguments are
+ // required to be constant)
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
}
+ break;
+ case Type::Float:
+ expr += Visit(operand);
+ break;
+ default: {
+ const auto type_int = static_cast<u32>(type);
+ UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+ expr += '0';
+ break;
+ }
+ }
+ return expr;
+ }
+
+ std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+ if (aoffi.empty()) {
+ return {};
+ }
+ constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ std::string expr = ", ";
+ expr += coord_constructors.at(aoffi.size() - 1);
+ expr += '(';
+
+ for (std::size_t index = 0; index < aoffi.size(); ++index) {
+ const auto operand{aoffi.at(index)};
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+ // to be constant by the standard).
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
+ }
+ if (index + 1 < aoffi.size()) {
+ expr += ", ";
}
}
+ expr += ')';
- return expr + ')';
+ return expr;
}
std::string Assign(Operation operation) {
@@ -1159,7 +1208,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+ std::string expr = GenerateTexture(
+ operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1170,7 +1220,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+ std::string expr = GenerateTexture(
+ operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1182,7 +1233,8 @@ private:
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+ return GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
GetSwizzle(meta->element);
}
@@ -1196,11 +1248,12 @@ private:
switch (meta->element) {
case 0:
case 1:
- return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element);
+ return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
+ GetSwizzle(meta->element) + "))";
case 2:
return "0";
case 3:
- return "textureQueryLevels(" + sampler + ')';
+ return "itof(textureQueryLevels(" + sampler + "))";
}
UNREACHABLE();
return "0";
@@ -1211,8 +1264,8 @@ private:
ASSERT(meta);
if (meta->element < 2) {
- return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
- " * vec2(256))" + GetSwizzle(meta->element) + "))";
+ return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+ GetSwizzle(meta->element) + "))";
}
return "0";
}
@@ -1565,6 +1618,8 @@ private:
ShaderWriter code;
};
+} // Anonymous namespace
+
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 72aca4938..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,7 +5,6 @@
#pragma once
#include <array>
-#include <set>
#include <string>
#include <utility>
#include <vector>
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 82fc4d44b..8a43eb157 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -4,7 +4,6 @@
#include <cstring>
#include <fmt/format.h>
-#include <lz4.h>
#include "common/assert.h"
#include "common/common_paths.h"
@@ -12,6 +11,7 @@
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
+#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
@@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
return hash;
}
-template <typename T>
-std::vector<u8> CompressData(const T* source, std::size_t source_size) {
- if (source_size > LZ4_MAX_INPUT_SIZE) {
- // Source size exceeds LZ4 maximum input size
- return {};
- }
- const auto source_size_int = static_cast<int>(source_size);
- const int max_compressed_size = LZ4_compressBound(source_size_int);
- std::vector<u8> compressed(max_compressed_size);
- const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
- reinterpret_cast<char*>(compressed.data()),
- source_size_int, max_compressed_size);
- if (compressed_size <= 0) {
- // Compression failed
- return {};
- }
- compressed.resize(compressed_size);
- return compressed;
-}
-
-std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
- std::vector<u8> uncompressed(uncompressed_size);
- const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
- reinterpret_cast<char*>(uncompressed.data()),
- static_cast<int>(compressed.size()),
- static_cast<int>(uncompressed.size()));
- if (static_cast<int>(uncompressed_size) != size_check) {
- // Decompression failed
- return {};
- }
- return uncompressed;
-}
-
} // namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
@@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- dump.binary = DecompressData(compressed_binary, binary_length);
+ dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
if (dump.binary.empty()) {
return {};
}
@@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- const std::vector<u8> code = DecompressData(compressed_code, code_size);
+ const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
if (code.empty()) {
return {};
}
@@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
- const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+ const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+ reinterpret_cast<const u8*>(code.data()), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
@@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+ const std::vector<u8> compressed_binary =
+ Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
+
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
usage.unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7d96649af..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <fmt/format.h>
-#include "common/assert.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fba8e681b..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,12 +4,9 @@
#pragma once
-#include <array>
-#include <string>
#include <vector>
#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& regs = gpu.regs;
- const auto& state = gpu.state;
+using Tegra::Engines::Maxwell3D;
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+ const auto& regs = maxwell.regs;
+ const auto& state = maxwell.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
u32 func = static_cast<u32>(regs.alpha_test_func);
// Normalize the gl variants of opCompare to be the same as the normal variants
- u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never);
+ const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
if (func >= op_gl_variant_base) {
func = func - op_gl_variant_base + 1U;
}
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
// Assign in which stage the position has to be flipped
// (the last stage before the fragment shader).
- if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ if (maxwell.regs.shader_config[geometry_index].enable) {
+ flip_stage = geometry_index;
} else {
flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
namespace OpenGL::GLShader {
-using Tegra::Engines::Maxwell3D;
-
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-// Not following that rule will cause problems on some AMD drivers.
+/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
+/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+/// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
- void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+
alignas(16) GLvec4 viewport_flip;
struct alignas(16) {
GLuint instance_id;
@@ -63,7 +62,6 @@ public:
UpdatePipeline();
state.draw.shader_program = 0;
state.draw.program_pipeline = pipeline.handle;
- state.geometry_shaders.enabled = (gs != 0);
}
private:
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9419326a3..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@
namespace OpenGL {
-OpenGLState OpenGLState::cur_state;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+OpenGLState OpenGLState::cur_state;
bool OpenGLState::s_rgb_used;
+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+ if (enable) {
+ glEnable(cap);
+ } else {
+ glDisable(cap);
+ }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+ if (enable) {
+ glEnablei(cap, index);
+ } else {
+ glDisablei(cap, index);
+ }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, index, new_value);
+}
+
+} // namespace
+
OpenGLState::OpenGLState() {
// These all match default OpenGL values
- geometry_shaders.enabled = false;
framebuffer_srgb.enabled = false;
+
multisample_control.alpha_to_coverage = false;
multisample_control.alpha_to_one = false;
+
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
primitive_restart.enabled = false;
primitive_restart.index = 0;
+
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
- stencil.test_enabled = false;
- auto reset_stencil = [](auto& config) {
+
+ const auto ResetStencil = [](auto& config) {
config.test_func = GL_ALWAYS;
config.test_ref = 0;
config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
config.action_depth_pass = GL_KEEP;
config.action_stencil_fail = GL_KEEP;
};
- reset_stencil(stencil.front);
- reset_stencil(stencil.back);
+ stencil.test_enabled = false;
+ ResetStencil(stencil.front);
+ ResetStencil(stencil.back);
+
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
item.scissor.width = 0;
item.scissor.height = 0;
}
+
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
+
independant_blend.enabled = false;
+
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
+
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
clip_distance = {};
point.size = 1;
+
fragment_color_clamp.enabled = false;
+
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
+
polygon_offset.fill_enable = false;
polygon_offset.line_enable = false;
polygon_offset.point_enable = false;
@@ -103,260 +159,255 @@ OpenGLState::OpenGLState() {
}
void OpenGLState::ApplyDefaultState() {
+ glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_PRIMITIVE_RESTART);
glDisable(GL_STENCIL_TEST);
- glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_SCISSOR_TEST);
}
+void OpenGLState::ApplyFramebufferState() const {
+ if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+ }
+ if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+ }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+ if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+ glBindVertexArray(draw.vertex_array);
+ }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+ if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+ glUseProgram(draw.shader_program);
+ }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+ if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+ glBindProgramPipeline(draw.program_pipeline);
+ }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+ for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+ Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+ clip_distance[i]);
+ }
+}
+
+void OpenGLState::ApplyPointSize() const {
+ if (UpdateValue(cur_state.point.size, point.size)) {
+ glPointSize(point.size);
+ }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+ if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+ fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+ }
+}
+
+void OpenGLState::ApplyMultisample() const {
+ Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+ multisample_control.alpha_to_coverage);
+ Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+ multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+ if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+ depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+ return;
+ }
+ cur_state.depth_clamp = depth_clamp;
+
+ UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+ "Unimplemented Depth Clamp Separation!");
+
+ Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
void OpenGLState::ApplySRgb() const {
- if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
- if (framebuffer_srgb.enabled) {
- // Track if sRGB is used
- s_rgb_used = true;
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
- }
+ if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+ return;
+ cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+ if (framebuffer_srgb.enabled) {
+ // Track if sRGB is used
+ s_rgb_used = true;
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
}
}
void OpenGLState::ApplyCulling() const {
- if (cull.enabled != cur_state.cull.enabled) {
- if (cull.enabled) {
- glEnable(GL_CULL_FACE);
- } else {
- glDisable(GL_CULL_FACE);
- }
- }
+ Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
- if (cull.mode != cur_state.cull.mode) {
+ if (UpdateValue(cur_state.cull.mode, cull.mode)) {
glCullFace(cull.mode);
}
- if (cull.front_face != cur_state.cull.front_face) {
+ if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
glFrontFace(cull.front_face);
}
}
void OpenGLState::ApplyColorMask() const {
- if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- const auto& updated = color_mask[i];
- const auto& current = cur_state.color_mask[i];
- if (updated.red_enabled != current.red_enabled ||
- updated.green_enabled != current.green_enabled ||
- updated.blue_enabled != current.blue_enabled ||
- updated.alpha_enabled != current.alpha_enabled) {
- glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
- updated.blue_enabled, updated.alpha_enabled);
- }
- }
- } else {
- const auto& updated = color_mask[0];
- const auto& current = cur_state.color_mask[0];
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ const auto& updated = color_mask[i];
+ auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
- glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
- updated.alpha_enabled);
+ current = updated;
+ glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+ updated.blue_enabled, updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() const {
- if (depth.test_enabled != cur_state.depth.test_enabled) {
- if (depth.test_enabled) {
- glEnable(GL_DEPTH_TEST);
- } else {
- glDisable(GL_DEPTH_TEST);
- }
- }
+ Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
- if (depth.test_func != cur_state.depth.test_func) {
+ if (cur_state.depth.test_func != depth.test_func) {
+ cur_state.depth.test_func = depth.test_func;
glDepthFunc(depth.test_func);
}
- if (depth.write_mask != cur_state.depth.write_mask) {
+ if (cur_state.depth.write_mask != depth.write_mask) {
+ cur_state.depth.write_mask = depth.write_mask;
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
- if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
- if (primitive_restart.enabled) {
- glEnable(GL_PRIMITIVE_RESTART);
- } else {
- glDisable(GL_PRIMITIVE_RESTART);
- }
- }
+ Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
- if (primitive_restart.index != cur_state.primitive_restart.index) {
+ if (cur_state.primitive_restart.index != primitive_restart.index) {
+ cur_state.primitive_restart.index = primitive_restart.index;
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() const {
- if (stencil.test_enabled != cur_state.stencil.test_enabled) {
- if (stencil.test_enabled) {
- glEnable(GL_STENCIL_TEST);
- } else {
- glDisable(GL_STENCIL_TEST);
- }
- }
-
- const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
- if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
- config.test_mask != prev_config.test_mask) {
+ Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
+
+ const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+ if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+ current.test_mask != config.test_mask) {
+ current.test_func = config.test_func;
+ current.test_ref = config.test_ref;
+ current.test_mask = config.test_mask;
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
- if (config.action_depth_fail != prev_config.action_depth_fail ||
- config.action_depth_pass != prev_config.action_depth_pass ||
- config.action_stencil_fail != prev_config.action_stencil_fail) {
+ if (current.action_depth_fail != config.action_depth_fail ||
+ current.action_depth_pass != config.action_depth_pass ||
+ current.action_stencil_fail != config.action_stencil_fail) {
+ current.action_depth_fail = config.action_depth_fail;
+ current.action_depth_pass = config.action_depth_pass;
+ current.action_stencil_fail = config.action_stencil_fail;
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
- if (config.write_mask != prev_config.write_mask) {
+ if (current.write_mask != config.write_mask) {
+ current.write_mask = config.write_mask;
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
-void OpenGLState::EmulateViewportWithScissor() {
- auto& current = viewports[0];
- if (current.scissor.enabled) {
- const GLint left = std::max(current.x, current.scissor.x);
- const GLint right =
- std::max(current.x + current.width, current.scissor.x + current.scissor.width);
- const GLint bottom = std::max(current.y, current.scissor.y);
- const GLint top =
- std::max(current.y + current.height, current.scissor.y + current.scissor.height);
- current.scissor.x = std::max(left, 0);
- current.scissor.y = std::max(bottom, 0);
- current.scissor.width = std::max(right - left, 0);
- current.scissor.height = std::max(top - bottom, 0);
- } else {
- current.scissor.enabled = true;
- current.scissor.x = current.x;
- current.scissor.y = current.y;
- current.scissor.width = current.width;
- current.scissor.height = current.height;
- }
-}
void OpenGLState::ApplyViewport() const {
- if (geometry_shaders.enabled) {
- for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
- i++) {
- const auto& current = cur_state.viewports[i];
- const auto& updated = viewports[i];
- if (updated.x != current.x || updated.y != current.y ||
- updated.width != current.width || updated.height != current.height) {
- glViewportIndexedf(
- i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
- static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
- }
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
- }
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnablei(GL_SCISSOR_TEST, i);
- } else {
- glDisablei(GL_SCISSOR_TEST, i);
- }
- }
-
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
- }
- }
- } else {
- const auto& current = cur_state.viewports[0];
- const auto& updated = viewports[0];
- if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
- updated.height != current.height) {
- glViewport(updated.x, updated.y, updated.width, updated.height);
- }
-
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRange(updated.depth_range_near, updated.depth_range_far);
+ for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+ const auto& updated = viewports[i];
+ auto& current = cur_state.viewports[i];
+
+ if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+ current.height != updated.height) {
+ current.x = updated.x;
+ current.y = updated.y;
+ current.width = updated.width;
+ current.height = updated.height;
+ glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+ static_cast<GLfloat>(updated.width),
+ static_cast<GLfloat>(updated.height));
}
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnable(GL_SCISSOR_TEST);
- } else {
- glDisable(GL_SCISSOR_TEST);
- }
+ if (current.depth_range_near != updated.depth_range_near ||
+ current.depth_range_far != updated.depth_range_far) {
+ current.depth_range_near = updated.depth_range_near;
+ current.depth_range_far = updated.depth_range_far;
+ glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
+ Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+ if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+ current.scissor.width != updated.scissor.width ||
+ current.scissor.height != updated.scissor.height) {
+ current.scissor.x = updated.scissor.x;
+ current.scissor.y = updated.scissor.y;
+ current.scissor.width = updated.scissor.width;
+ current.scissor.height = updated.scissor.height;
+ glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+ updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
- const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
- if (updated.enabled != current.enabled) {
- if (updated.enabled) {
- glEnable(GL_BLEND);
- } else {
- glDisable(GL_BLEND);
- }
- }
- if (!updated.enabled) {
- return;
- }
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ Blend& current = cur_state.blend[0];
+
+ Enable(GL_BLEND, current.enabled, updated.enabled);
+
+ if (current.src_rgb_func != updated.src_rgb_func ||
+ current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+ current.dst_a_func != updated.dst_a_func) {
+ current.src_rgb_func = updated.src_rgb_func;
+ current.dst_rgb_func = updated.dst_rgb_func;
+ current.src_a_func = updated.src_a_func;
+ current.dst_a_func = updated.dst_a_func;
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+ current.rgb_equation = updated.rgb_equation;
+ current.a_equation = updated.a_equation;
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
- const Blend& current = cur_state.blend[target];
- if (updated.enabled != current.enabled || force) {
- if (updated.enabled) {
- glEnablei(GL_BLEND, static_cast<GLuint>(target));
- } else {
- glDisablei(GL_BLEND, static_cast<GLuint>(target));
- }
+ Blend& current = cur_state.blend[target];
+
+ if (current.enabled != updated.enabled || force) {
+ current.enabled = updated.enabled;
+ Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
}
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+ current.dst_a_func),
+ std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+ updated.dst_a_func))) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+ std::tie(updated.rgb_equation, updated.a_equation))) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
@@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- ApplyTargetBlending(i,
- independant_blend.enabled != cur_state.independant_blend.enabled);
+ const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+ for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+ ApplyTargetBlending(target, force);
}
} else {
ApplyGlobalBlending();
}
- if (blend_color.red != cur_state.blend_color.red ||
- blend_color.green != cur_state.blend_color.green ||
- blend_color.blue != cur_state.blend_color.blue ||
- blend_color.alpha != cur_state.blend_color.alpha) {
+ cur_state.independant_blend.enabled = independant_blend.enabled;
+
+ if (UpdateTie(
+ std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+ cur_state.blend_color.blue, cur_state.blend_color.alpha),
+ std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
- if (logic_op.enabled != cur_state.logic_op.enabled) {
- if (logic_op.enabled) {
- glEnable(GL_COLOR_LOGIC_OP);
- } else {
- glDisable(GL_COLOR_LOGIC_OP);
- }
- }
+ Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
- if (logic_op.operation != cur_state.logic_op.operation) {
+ if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
- const bool fill_enable_changed =
- polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
- const bool line_enable_changed =
- polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
- const bool point_enable_changed =
- polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
- const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
- const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
- const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
- if (fill_enable_changed) {
- if (polygon_offset.fill_enable) {
- glEnable(GL_POLYGON_OFFSET_FILL);
- } else {
- glDisable(GL_POLYGON_OFFSET_FILL);
- }
- }
-
- if (line_enable_changed) {
- if (polygon_offset.line_enable) {
- glEnable(GL_POLYGON_OFFSET_LINE);
- } else {
- glDisable(GL_POLYGON_OFFSET_LINE);
- }
- }
-
- if (point_enable_changed) {
- if (polygon_offset.point_enable) {
- glEnable(GL_POLYGON_OFFSET_POINT);
- } else {
- glDisable(GL_POLYGON_OFFSET_POINT);
- }
- }
-
- if (factor_changed || units_changed || clamp_changed) {
+ Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+ polygon_offset.fill_enable);
+ Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+ polygon_offset.line_enable);
+ Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+ polygon_offset.point_enable);
+
+ if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+ cur_state.polygon_offset.clamp),
+ std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
- glPolygonOffset(polygon_offset.factor, polygon_offset.units);
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
+ glPolygonOffset(polygon_offset.factor, polygon_offset.units);
}
}
}
@@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+ std::array<GLuint, Maxwell::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
- const auto& cur_state_texture_unit = cur_state.texture_units[i];
+ auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
-
- if (textures[i] != cur_state_texture_unit.texture) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (cur_state_texture_unit.texture == textures[i])
+ continue;
+ cur_state_texture_unit.texture = textures[i];
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
-
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
@@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+ std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
for (std::size_t i = 0; i < std::size(samplers); ++i) {
+ if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+ continue;
+ cur_state.texture_units[i].sampler = texture_units[i].sampler;
samplers[i] = texture_units[i].sampler;
- if (samplers[i] != cur_state.texture_units[i].sampler) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
@@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const {
}
}
-void OpenGLState::ApplyFramebufferState() const {
- if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
- glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
- }
- if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
- }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
- if (draw.vertex_array != cur_state.draw.vertex_array) {
- glBindVertexArray(draw.vertex_array);
- }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
- if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
- depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
- return;
- }
- UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
- "Unimplemented Depth Clamp Separation!");
-
- if (depth_clamp.far_plane || depth_clamp.near_plane) {
- glEnable(GL_DEPTH_CLAMP);
- } else {
- glDisable(GL_DEPTH_CLAMP);
- }
-}
-
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
-
- // Shader program
- if (draw.shader_program != cur_state.draw.shader_program) {
- glUseProgram(draw.shader_program);
- }
-
- // Program pipeline
- if (draw.program_pipeline != cur_state.draw.program_pipeline) {
- glBindProgramPipeline(draw.program_pipeline);
- }
- // Clip distance
- for (std::size_t i = 0; i < clip_distance.size(); ++i) {
- if (clip_distance[i] != cur_state.clip_distance[i]) {
- if (clip_distance[i]) {
- glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- } else {
- glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- }
- }
- }
- // Point
- if (point.size != cur_state.point.size) {
- glPointSize(point.size);
- }
- if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
- glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
- fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
- }
- if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
- if (multisample_control.alpha_to_coverage) {
- glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- }
- }
- if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
- if (multisample_control.alpha_to_one) {
- glEnable(GL_SAMPLE_ALPHA_TO_ONE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_ONE);
- }
- }
+ ApplyShaderProgram();
+ ApplyProgramPipeline();
+ ApplyClipDistances();
+ ApplyPointSize();
+ ApplyFragmentColorClamp();
+ ApplyMultisample();
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
@@ -574,7 +531,28 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
- cur_state = *this;
+}
+
+void OpenGLState::EmulateViewportWithScissor() {
+ auto& current = viewports[0];
+ if (current.scissor.enabled) {
+ const GLint left = std::max(current.x, current.scissor.x);
+ const GLint right =
+ std::max(current.x + current.width, current.scissor.x + current.scissor.width);
+ const GLint bottom = std::max(current.y, current.scissor.y);
+ const GLint top =
+ std::max(current.y + current.height, current.scissor.y + current.scissor.height);
+ current.scissor.x = std::max(left, 0);
+ current.scissor.y = std::max(bottom, 0);
+ current.scissor.width = std::max(right - left, 0);
+ current.scissor.height = std::max(top - bottom, 0);
+ } else {
+ current.scissor.enabled = true;
+ current.scissor.x = current.x;
+ current.scissor.y = current.y;
+ current.scissor.width = current.width;
+ current.scissor.height = current.height;
+ }
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 9e1eda5b1..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
} depth_clamp; // GL_DEPTH_CLAMP
struct {
- bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
- } geometry_shaders;
-
- struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
GLenum front_face; // GL_FRONT_FACE
@@ -184,34 +180,26 @@ public:
static OpenGLState GetCurState() {
return cur_state;
}
+
static bool GetsRGBUsed() {
return s_rgb_used;
}
+
static void ClearsRGBUsed() {
s_rgb_used = false;
}
+
/// Apply this state as the current OpenGL state
void Apply() const;
- /// Apply only the state affecting the framebuffer
+
void ApplyFramebufferState() const;
- /// Apply only the state affecting the vertex array
void ApplyVertexArrayState() const;
- /// Set the initial OpenGL state
- static void ApplyDefaultState();
- /// Resets any references to the given resource
- OpenGLState& UnbindTexture(GLuint handle);
- OpenGLState& ResetSampler(GLuint handle);
- OpenGLState& ResetProgram(GLuint handle);
- OpenGLState& ResetPipeline(GLuint handle);
- OpenGLState& ResetVertexArray(GLuint handle);
- OpenGLState& ResetFramebuffer(GLuint handle);
- void EmulateViewportWithScissor();
-
-private:
- static OpenGLState cur_state;
- // Workaround for sRGB problems caused by
- // QT not supporting srgb output
- static bool s_rgb_used;
+ void ApplyShaderProgram() const;
+ void ApplyProgramPipeline() const;
+ void ApplyClipDistances() const;
+ void ApplyPointSize() const;
+ void ApplyFragmentColorClamp() const;
+ void ApplyMultisample() const;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
+
+ /// Set the initial OpenGL state
+ static void ApplyDefaultState();
+
+ /// Resets any references to the given resource
+ OpenGLState& UnbindTexture(GLuint handle);
+ OpenGLState& ResetSampler(GLuint handle);
+ OpenGLState& ResetProgram(GLuint handle);
+ OpenGLState& ResetPipeline(GLuint handle);
+ OpenGLState& ResetVertexArray(GLuint handle);
+ OpenGLState& ResetFramebuffer(GLuint handle);
+
+ /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+ void EmulateViewportWithScissor();
+
+private:
+ static OpenGLState cur_state;
+
+ // Workaround for sRGB problems caused by QT not supporting srgb output
+ static bool s_rgb_used;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b97576309..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
@@ -164,8 +163,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
- Memory::FlushMode::Flush);
+ rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
constexpr u32 linear_bpp = 4;
VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
@@ -267,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
}
// Initialize sRGB Usage
OpenGLState::ClearsRGBUsed();
- rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,