diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 9 |
6 files changed, 65 insertions, 42 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index b7e9ed2e9..f4db62787 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -376,9 +376,11 @@ private: std::string temporary = AllocTemporary(); std::string address; std::string_view opname; + bool robust = false; if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { address = GlobalMemoryPointer(*gmem); opname = "ATOM"; + robust = true; } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -386,7 +388,15 @@ private: UNREACHABLE(); return "{0, 0, 0, 0}"; } + if (robust) { + AddLine("IF NE.x;"); + } AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); + if (robust) { + AddLine("ELSE;"); + AddLine("MOV.S {}, 0;", temporary); + AddLine("ENDIF;"); + } return temporary; } @@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - const std::size_t num_entries = ir.GetGlobalMemory().size(); + const size_t num_entries = ir.GetGlobalMemory().size(); if (num_entries > 0) { - const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); } } @@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if<GmemNode>(&*node)) { std::string temporary = AllocTemporary(); - AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); + AddLine("MOV {}, 0;", temporary); + AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { } std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + // Read a bindless SSBO, return its address and set CC accordingly + // address = c[binding].xy + // length = c[binding].z const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; const std::string pointer = AllocLongVectorTemporary(); std::string temporary = AllocTemporary(); - const u32 local_index = binding / 2; - AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("PK64.U {}, c[{}];", pointer, binding); AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), Visit(gmem.GetBaseAddress())); AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); + // Compare offset to length and set CC + AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); return fmt::format("{}.x", pointer); } @@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { + AddLine("IF NE.x;"); AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); + AddLine("ENDIF;"); ResetTemporaries(); return {}; } else { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e7d95149f..a94e4f72e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -193,7 +193,6 @@ bool IsASTCSupported() { Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); - const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 2bb8ec2b8..1a3d9720e 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -32,10 +32,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory) - : VideoCommon::QueryCacheBase< - QueryCache, CachedQuery, CounterStream, HostCounter, - std::vector<OGLQuery>>{static_cast<VideoCore::RasterizerInterface&>(rasterizer), - maxwell3d, gpu_memory}, + : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>( + rasterizer, maxwell3d, gpu_memory), gl_rasterizer{rasterizer} {} QueryCache::~QueryCache() = default; @@ -91,6 +89,8 @@ u64 HostCounter::BlockingQuery() const { CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} +CachedQuery::~CachedQuery() = default; + CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index dd626b66b..82cac51ee 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -26,8 +26,8 @@ class RasterizerOpenGL; using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; -class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, - HostCounter, std::vector<OGLQuery>> { +class QueryCache final + : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { public: explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory); @@ -41,6 +41,7 @@ public: private: RasterizerOpenGL& gl_rasterizer; + std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools; }; class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { @@ -63,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { public: explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); - CachedQuery(CachedQuery&& rhs) noexcept; - CachedQuery(const CachedQuery&) = delete; + ~CachedQuery() override; + CachedQuery(CachedQuery&& rhs) noexcept; CachedQuery& operator=(CachedQuery&& rhs) noexcept; + + CachedQuery(const CachedQuery&) = delete; CachedQuery& operator=(const CachedQuery&) = delete; void Flush() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bbb2eb17c..36bf92808 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } -void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { - if (num_entries == 0) { +void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { + if (num_ssbos == 0) { return; } - if (num_entries % 2 == 1) { - pointers[num_entries] = 0; - } - const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); - glProgramLocalParametersI4uivNV(target, 0, num_vectors, - reinterpret_cast<const GLuint*>(pointers)); + glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), + reinterpret_cast<const GLuint*>(ssbos)); } } // Anonymous namespace @@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { - static constexpr std::array PARAMETER_LUT = { - GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, + static constexpr std::array PARAMETER_LUT{ + GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, - GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; - + GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, + }; MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = maxwell3d.state.shader_stages; const auto& shader_stage = stages[stage_index]; @@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; - std::array<GLuint64EXT, 32> pointers; - ASSERT(entries.size() < pointers.size()); + std::array<BindlessSSBO, 32> ssbos; + ASSERT(entries.size() < ssbos.size()); const bool assembly_shaders = device.UseAssemblyShaders(); u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; @@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; const u32 size{gpu_memory.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); ++binding; } if (assembly_shaders) { - UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); + UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); } } @@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; const auto& entries{kernel->GetEntries().global_memory_entries}; - std::array<GLuint64EXT, 32> pointers; - ASSERT(entries.size() < pointers.size()); + std::array<BindlessSSBO, 32> ssbos; + ASSERT(entries.size() < ssbos.size()); u32 binding = 0; for (const auto& entry : entries) { const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; const u32 size{gpu_memory.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); ++binding; } if (device.UseAssemblyShaders()) { - UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); + UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size, - GLuint64EXT* pointer) { - const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { + const size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); if (device.UseAssemblyShaders()) { - *pointer = info.address + info.offset; + *ssbo = BindlessSSBO{ + .address = static_cast<GLuint64EXT>(info.address + info.offset), + .length = static_cast<GLsizei>(size), + .padding = 0, + }; } else { glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast<GLsizeiptr>(size)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f451404b2..1d0f585fa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -53,6 +53,13 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; +struct BindlessSSBO { + GLuint64EXT address; + GLsizei length; + GLsizei padding; +}; +static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); + class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, @@ -126,7 +133,7 @@ private: /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size, GLuint64EXT* pointer); + size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); |
