aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp47
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h9
6 files changed, 65 insertions, 42 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index b7e9ed2e9..f4db62787 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -376,9 +376,11 @@ private:
std::string temporary = AllocTemporary();
std::string address;
std::string_view opname;
+ bool robust = false;
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
address = GlobalMemoryPointer(*gmem);
opname = "ATOM";
+ robust = true;
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
opname = "ATOMS";
@@ -386,7 +388,15 @@ private:
UNREACHABLE();
return "{0, 0, 0, 0}";
}
+ if (robust) {
+ AddLine("IF NE.x;");
+ }
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ if (robust) {
+ AddLine("ELSE;");
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("ENDIF;");
+ }
return temporary;
}
@@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
}
void ARBDecompiler::DeclareGlobalMemory() {
- const std::size_t num_entries = ir.GetGlobalMemory().size();
+ const size_t num_entries = ir.GetGlobalMemory().size();
if (num_entries > 0) {
- const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
+ AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
}
}
@@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
std::string temporary = AllocTemporary();
- AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
+ AddLine("MOV {}, 0;", temporary);
+ AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
return temporary;
}
@@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
}
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+ // Read a bindless SSBO, return its address and set CC accordingly
+ // address = c[binding].xy
+ // length = c[binding].z
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
- const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
const std::string pointer = AllocLongVectorTemporary();
std::string temporary = AllocTemporary();
- const u32 local_index = binding / 2;
- AddLine("PK64.U {}, c[{}];", pointer, local_index);
+ AddLine("PK64.U {}, c[{}];", pointer, binding);
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
Visit(gmem.GetBaseAddress()));
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+ AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
+ // Compare offset to length and set CC
+ AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
return fmt::format("{}.x", pointer);
}
@@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
ResetTemporaries();
return {};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ AddLine("IF NE.x;");
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
+ AddLine("ENDIF;");
ResetTemporaries();
return {};
} else {
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e7d95149f..a94e4f72e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -193,7 +193,6 @@ bool IsASTCSupported() {
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 2bb8ec2b8..1a3d9720e 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -32,10 +32,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory)
- : VideoCommon::QueryCacheBase<
- QueryCache, CachedQuery, CounterStream, HostCounter,
- std::vector<OGLQuery>>{static_cast<VideoCore::RasterizerInterface&>(rasterizer),
- maxwell3d, gpu_memory},
+ : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
+ rasterizer, maxwell3d, gpu_memory),
gl_rasterizer{rasterizer} {}
QueryCache::~QueryCache() = default;
@@ -91,6 +89,8 @@ u64 HostCounter::BlockingQuery() const {
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+CachedQuery::~CachedQuery() = default;
+
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index dd626b66b..82cac51ee 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,8 +26,8 @@ class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
-class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
- HostCounter, std::vector<OGLQuery>> {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory);
@@ -41,6 +41,7 @@ public:
private:
RasterizerOpenGL& gl_rasterizer;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@@ -63,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
u8* host_ptr);
- CachedQuery(CachedQuery&& rhs) noexcept;
- CachedQuery(const CachedQuery&) = delete;
+ ~CachedQuery() override;
+ CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+
+ CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bbb2eb17c..36bf92808 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
- if (num_entries == 0) {
+void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
+ if (num_ssbos == 0) {
return;
}
- if (num_entries % 2 == 1) {
- pointers[num_entries] = 0;
- }
- const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
- glProgramLocalParametersI4uivNV(target, 0, num_vectors,
- reinterpret_cast<const GLuint*>(pointers));
+ glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
+ reinterpret_cast<const GLuint*>(ssbos));
}
} // Anonymous namespace
@@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
- static constexpr std::array PARAMETER_LUT = {
- GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ static constexpr std::array PARAMETER_LUT{
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
- GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
-
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+ };
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
@@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
+ std::array<BindlessSSBO, 32> ssbos;
+ ASSERT(entries.size() < ssbos.size());
const bool assembly_shaders = device.UseAssemblyShaders();
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
@@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (assembly_shaders) {
- UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
+ UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
}
}
@@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries};
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
+ std::array<BindlessSSBO, 32> ssbos;
+ ASSERT(entries.size() < ssbos.size());
u32 binding = 0;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (device.UseAssemblyShaders()) {
- UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
+ UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, std::size_t size,
- GLuint64EXT* pointer) {
- const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
+ GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
+ const size_t alignment{device.GetShaderStorageBufferAlignment()};
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
if (device.UseAssemblyShaders()) {
- *pointer = info.address + info.offset;
+ *ssbo = BindlessSSBO{
+ .address = static_cast<GLuint64EXT>(info.address + info.offset),
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
static_cast<GLsizeiptr>(size));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f451404b2..1d0f585fa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -53,6 +53,13 @@ namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
+
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
@@ -126,7 +133,7 @@ private:
/// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- std::size_t size, GLuint64EXT* pointer);
+ size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, Shader* shader);