diff options
Diffstat (limited to 'src/video_core')
21 files changed, 216 insertions, 141 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f00c71dae..d6ee82836 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -229,7 +229,7 @@ endif() create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad) +target_link_libraries(video_core PRIVATE glad xbyak) if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 024c9e43b..004f6b261 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -457,8 +457,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. - ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, - "Units other than CROP are unimplemented"); + if (regs.query.query_get.unit != Regs::QueryUnit::Crop) { + LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented"); + } switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: @@ -534,8 +535,8 @@ void Maxwell3D::ProcessCounterReset() { rasterizer.ResetCounter(QueryType::SamplesPassed); break; default: - LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", - static_cast<int>(regs.counter_reset)); + LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", + static_cast<int>(regs.counter_reset)); break; } } @@ -592,8 +593,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { system.GPU().GetTicks()); return {}; default: - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); + LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); return 1; } } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 466a911db..e1b245288 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; - const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); @@ -182,7 +180,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_broken_compute = is_intel_proprietary; has_fast_buffer_sub_data = is_nvidia; use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5; @@ -206,7 +203,6 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_variable_aoffi = true; has_component_indexing_bug = false; - has_broken_compute = false; has_precise_bug = false; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e915dbd86..683ed9002 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -80,10 +80,6 @@ public: return has_precise_bug; } - bool HasBrokenCompute() const { - return has_broken_compute; - } - bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } @@ -109,7 +105,6 @@ private: bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; - bool has_broken_compute{}; bool has_fast_buffer_sub_data{}; bool use_assembly_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index efbbf11f9..3c421dd16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -655,10 +655,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - if (device.HasBrokenCompute()) { - return; - } - buffer_cache.Acquire(); current_cbuf = 0; @@ -977,16 +973,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu glBindTextureUnit(binding, 0); return; } - glBindTextureUnit(binding, view->GetTexture()); - - if (view->GetSurfaceParams().IsBuffer()) { - return; + const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, + texture.tic.z_source, texture.tic.w_source); + glBindTextureUnit(binding, handle); + if (!view->GetSurfaceParams().IsBuffer()) { + glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); } - // Apply swizzle to textures that are not buffers. - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); - - glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); } void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { @@ -1015,14 +1007,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); return; } - if (!tic.IsBuffer()) { - view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - } if (entry.is_written) { view->MarkAsModified(texture_cache.Tick()); } - glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, - view->GetFormat()); + const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); } void RasterizerOpenGL::SyncViewport() { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 84ca830f4..9cb115959 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2335,7 +2335,21 @@ private: return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } - Expression MemoryBarrierGL(Operation) { + Expression Barrier(Operation) { + if (!ir.IsDecompiled()) { + LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); + return {}; + } + code.AddLine("barrier();"); + return {}; + } + + Expression MemoryBarrierGroup(Operation) { + code.AddLine("groupMemoryBarrier();"); + return {}; + } + + Expression MemoryBarrierGlobal(Operation) { code.AddLine("memoryBarrier();"); return {}; } @@ -2581,7 +2595,9 @@ private: &GLSLDecompiler::ThreadMask<Func::Lt>, &GLSLDecompiler::ShuffleIndexed, - &GLSLDecompiler::MemoryBarrierGL, + &GLSLDecompiler::Barrier, + &GLSLDecompiler::MemoryBarrierGroup, + &GLSLDecompiler::MemoryBarrierGlobal, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 96605db84..8e754fa90 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -47,6 +47,10 @@ void ProgramManager::BindHostPipeline(GLuint pipeline) { old_state.geometry = 0; glDisable(GL_GEOMETRY_PROGRAM_NV); } + } else { + if (!is_graphics_bound) { + glUseProgram(0); + } } glBindProgramPipeline(pipeline); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 94fbd2a22..4faa8b90c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", namespace { struct FormatTuple { - GLint internal_format; + GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; }; @@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte return texture; } +constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, + SwizzleSource w_source) { + return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | + (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +} + } // Anonymous namespace CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, @@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() { } void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { - LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); + LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); } View CachedSurface::CreateView(const ViewParams& view_key) { @@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr } CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - const bool is_proxy) - : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { - target = GetTextureTarget(params.target); - format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; + bool is_proxy) + : VideoCommon::ViewBase(params), surface{surface}, + format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format}, + target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { if (!is_proxy) { - texture_view = CreateTextureView(); + main_view = CreateTextureView(); } - swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); } CachedSurfaceView::~CachedSurfaceView() = default; @@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { } } -void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, +GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (new_swizzle == swizzle) - return; - swizzle = new_swizzle; - const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), - GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; - const GLuint handle = GetTexture(); - const PixelFormat format = surface.GetSurfaceParams().pixel_format; - switch (format) { + if (GetSurfaceParams().IsBuffer()) { + return GetTexture(); + } + const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (current_swizzle == new_swizzle) { + return current_view; + } + current_swizzle = new_swizzle; + + const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); + OGLTextureView& view = entry->second; + if (!is_cache_miss) { + current_view = view.handle; + return view.handle; + } + view = CreateTextureView(); + current_view = view.handle; + + std::array swizzle{x_source, y_source, z_source, w_source}; + + switch (const PixelFormat format = GetSurfaceParams().pixel_format) { case PixelFormat::Z24S8: case PixelFormat::Z32FS8: case PixelFormat::S8Z24: - glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); + glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, GetComponent(format, x_source == SwizzleSource::R)); + + // Make sure we sample the first component + std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { + return value == SwizzleSource::G ? SwizzleSource::R : value; + }); + [[fallthrough]]; + default: { + const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), + GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; + glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); break; - default: - glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - break; } + } + return view.handle; } OGLTextureView CachedSurfaceView::CreateTextureView() const { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 02d9981a1..8a2ac8603 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -83,7 +83,7 @@ public: /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, + GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); @@ -98,7 +98,7 @@ public: if (is_proxy) { return surface.GetTexture(); } - return texture_view.handle; + return main_view.handle; } GLenum GetFormat() const { @@ -110,23 +110,19 @@ public: } private: - u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source) const { - return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | - (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); - } - OGLTextureView CreateTextureView() const; CachedSurface& surface; - GLenum target{}; - GLenum format{}; + const GLenum format; + const GLenum target; + const bool is_proxy; + + std::unordered_map<u32, OGLTextureView> view_cache; + OGLTextureView main_view; - OGLTextureView texture_view; - u32 swizzle{}; - bool is_proxy{}; + // Use an invalid default so it always fails the comparison test + u32 current_swizzle = 0xffffffff; + GLuint current_view = 0; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b489e6db..e7952924a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() { bool RendererOpenGL::Init() { if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); + if (Settings::values.renderer_debug) { + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + } glDebugMessageCallback(DebugHandler, nullptr); } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 12be691a5..2871035f5 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -142,7 +142,7 @@ struct FormatTuple { {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 - {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 + {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F @@ -168,7 +168,7 @@ struct FormatTuple { {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 - {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB + {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f0c491d00..750e5a0ca 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, VK_FORMAT_R4G4B4A4_UNORM_PACK16, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D16_UNORM, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..65a1c6245 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { ASSERT(point_size != 0.0f); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); + const auto& attribute = fixed_state.vertex_input.attributes[i]; + specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; + specialization.attribute_types[i] = attribute.Type(); } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index be5b77fae..a3d992ed3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& attrib = regs.vertex_attrib_format[index]; - if (!attrib.IsValid()) { + if (attrib.IsConstant()) { vertex_input.SetAttribute(index, false, 0, 0, {}, {}); continue; } - - [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; - ASSERT(buffer.IsEnabled()); - vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), attrib.size.Value()); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b8169d832..a13e8baa7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -741,8 +741,10 @@ private: if (!IsGenericAttribute(index)) { continue; } - const u32 location = GetGenericAttributeLocation(index); + if (!IsAttributeEnabled(location)) { + continue; + } const auto type_descriptor = GetAttributeType(location); Id type; if (IsInputAttributeArray()) { @@ -986,6 +988,10 @@ private: return stage == ShaderType::TesselationControl; } + bool IsAttributeEnabled(u32 location) const { + return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; + } + u32 GetNumInputVertices() const { switch (stage) { case ShaderType::Geometry: @@ -1201,16 +1207,20 @@ private: UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); return {v_float_zero, Type::Float}; default: - if (IsGenericAttribute(attribute)) { - const u32 location = GetGenericAttributeLocation(attribute); - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; + if (!IsGenericAttribute(attribute)) { + break; } - break; + const u32 location = GetGenericAttributeLocation(attribute); + if (!IsAttributeEnabled(location)) { + // Disabled attributes (also known as constant attributes) always return zero. + return {v_float_zero, Type::Float}; + } + const auto type_descriptor = GetAttributeType(location); + const Type type = type_descriptor.type; + const Id attribute_id = input_attributes.at(attribute); + const std::vector elements = {element}; + const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); + return {OpLoad(GetTypeDefinition(type), pointer), type}; } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); return {v_float_zero, Type::Float}; @@ -2199,8 +2209,24 @@ private: return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; } - Expression MemoryBarrierGL(Operation) { - const auto scope = spv::Scope::Device; + Expression Barrier(Operation) { + if (!ir.IsDecompiled()) { + LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); + return {}; + } + + const auto scope = spv::Scope::Workgroup; + const auto memory = spv::Scope::Workgroup; + const auto semantics = + spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; + OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), + Constant(t_uint, static_cast<u32>(memory)), + Constant(t_uint, static_cast<u32>(semantics))); + return {}; + } + + template <spv::Scope scope> + Expression MemoryBarrier(Operation) { const auto semantics = spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | @@ -2664,7 +2690,9 @@ private: &SPIRVDecompiler::ThreadMask<4>, // Lt &SPIRVDecompiler::ShuffleIndexed, - &SPIRVDecompiler::MemoryBarrierGL, + &SPIRVDecompiler::Barrier, + &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, + &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..b7af26388 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -88,7 +88,8 @@ struct Specialization final { u32 shared_memory_size{}; // Graphics specific - std::optional<float> point_size{}; + std::optional<float> point_size; + std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; bool ndc_minus_one_to_one{}; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 55f43e61b..2f1d5021d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default; VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (last_image_view && last_swizzle == swizzle) { + const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (last_image_view && last_swizzle == new_swizzle) { return last_image_view; } - last_swizzle = swizzle; + last_swizzle = new_swizzle; - const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); + const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); auto& image_view = entry->second; if (!is_cache_miss) { return last_image_view = *image_view; } - auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); - auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); - auto swizzle_z = MaxwellToVK::SwizzleSource(z_source); - auto swizzle_w = MaxwellToVK::SwizzleSource(w_source); - + std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), + MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. - std::swap(swizzle_x, swizzle_z); + std::swap(swizzle[0], swizzle[2]); } // Games can sample depth or stencil values on textures. This is decided by the swizzle value on @@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y UNIMPLEMENTED(); } - // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity - swizzle_x = VK_COMPONENT_SWIZZLE_R; - swizzle_y = VK_COMPONENT_SWIZZLE_G; - swizzle_z = VK_COMPONENT_SWIZZLE_B; - swizzle_w = VK_COMPONENT_SWIZZLE_A; + // Make sure we sample the first component + std::transform( + swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { + return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; + }); } VkImageViewCreateInfo ci; @@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y ci.image = surface.GetImageHandle(); ci.viewType = image_view_type; ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; + ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; ci.subresourceRange.aspectMask = aspect; ci.subresourceRange.baseMipLevel = base_level; ci.subresourceRange.levelCount = num_levels; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 9392f065b..63adbc4a3 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } case OpCode::Id::RED: { UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); - UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); break; } case OpCode::Id::ATOM: { diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 399a455c4..d00e10913 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -293,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); break; } + case OpCode::Id::BAR: { + UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); + bb.push_back(Operation(OperationCode::Barrier)); + break; + } case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + const OperationCode type = [instr] { + switch (instr.membar.type) { + case Tegra::Shader::MembarType::CTA: + return OperationCode::MemoryBarrierGroup; + case Tegra::Shader::MembarType::GL: + return OperationCode::MemoryBarrierGlobal; + default: + UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); + return OperationCode::MemoryBarrierGlobal; + } + }(); + bb.push_back(Operation(type)); break; } case OpCode::Id::DEPBAR: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index cce8aeebe..c5e5165ff 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -233,7 +233,9 @@ enum class OperationCode { ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint - MemoryBarrierGL, /// () -> void + Barrier, /// () -> void + MemoryBarrierGroup, /// () -> void + MemoryBarrierGlobal, /// () -> void Amount, }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..45e3ddd2c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include <unordered_map> #include <vector> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_map.hpp> #include <boost/range/iterator_range.hpp> @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { + using VectorSurface = boost::container::small_vector<TSurface, 1>; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -308,18 +310,20 @@ public: dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector<TSurface>& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -498,7 +502,7 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; @@ -538,9 +542,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -650,7 +653,7 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { @@ -708,7 +711,7 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const VAddr cpu_addr, @@ -810,7 +813,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector<TSurface> overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -991,7 +994,9 @@ private: params.target = target; params.is_tiled = false; params.srgb_conversion = false; - params.is_layered = false; + params.is_layered = + target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || + target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; params.block_width = 0; params.block_height = 0; params.block_depth = 0; @@ -1124,23 +1129,25 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector<TSurface> surfaces; - while (start <= end) { - std::vector<TSurface>& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + VectorSurface surfaces; + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); |
