diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/blit_image.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 43 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 11 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 7 |
10 files changed, 37 insertions, 92 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 81fac94bf..40f7755e8 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -56,6 +56,18 @@ AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pi av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; return PREFERRED_CPU_FMT; } + +// List all the currently available hwcontext in ffmpeg +std::vector<AVHWDeviceType> ListSupportedContexts() { + std::vector<AVHWDeviceType> contexts{}; + AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + do { + current_device_type = av_hwdevice_iterate_types(current_device_type); + contexts.push_back(current_device_type); + } while (current_device_type != AV_HWDEVICE_TYPE_NONE); + return contexts; +} + } // namespace void AVFrameDeleter(AVFrame* ptr) { @@ -76,17 +88,6 @@ Codec::~Codec() { av_buffer_unref(&av_gpu_decoder); } -// List all the currently available hwcontext in ffmpeg -static std::vector<AVHWDeviceType> ListSupportedContexts() { - std::vector<AVHWDeviceType> contexts{}; - AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; - do { - current_device_type = av_hwdevice_iterate_types(current_device_type); - contexts.push_back(current_device_type); - } while (current_device_type != AV_HWDEVICE_TYPE_NONE); - return contexts; -} - bool Codec::CreateGpuAvDevice() { static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; static const auto supported_contexts = ListSupportedContexts(); @@ -96,6 +97,8 @@ bool Codec::CreateGpuAvDevice() { LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); continue; } + // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create + av_buffer_unref(&av_gpu_decoder); const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); if (hwdevice_res < 0) { LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", @@ -127,15 +130,19 @@ bool Codec::CreateGpuAvDevice() { av_codec->name, av_hwdevice_get_type_name(type)); break; } - if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { - av_codec_ctx->pix_fmt = config->pix_fmt; - if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) { + if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { +#if defined(__unix__) + // Some linux decoding backends are reported to crash with this config method + // TODO(ameerj): Properly support this method + if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) { // skip zero-copy decoders, we don't currently support them LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", av_hwdevice_get_type_name(type), config->methods); continue; } +#endif LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + av_codec_ctx->pix_fmt = config->pix_fmt; return true; } } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 656dd7eb0..597301eeb 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -282,7 +282,7 @@ void main() { u64 Device::GetCurrentDedicatedVideoMemory() const { GLint cur_avail_mem_kb = 0; - glGetIntegerv(GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX, &cur_avail_mem_kb); + glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb); return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e06d0069..e6f9ece8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -15,8 +15,9 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/settings.h" -#include "core/memory.h" + #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -210,6 +211,7 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); + SCOPE_EXIT({ gpu.TickWork(); }); query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; @@ -265,8 +267,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); - - gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { @@ -352,7 +352,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { shader_cache.OnCPUWrite(addr, size); { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.CachedWriteMemory(addr, size); + texture_cache.WriteMemory(addr, size); } { std::scoped_lock lock{buffer_cache.mutex}; @@ -364,10 +364,6 @@ void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); shader_cache.SyncGuestHost(); { - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.FlushCachedWrites(); - } - { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.FlushCachedWrites(); } diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index ec03cca38..abda1c490 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -367,17 +367,14 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, PipelineLayoutCreateInfo(two_textures_set_layout.address()))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), - nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { - if (device.IsExtShaderStencilExportSupported()) { - blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); - } -} + nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {} BlitImageHelper::~BlitImageHelper() = default; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index dd6e0027e..fa87d37f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -408,7 +408,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.CachedWriteMemory(addr, size); + texture_cache.WriteMemory(addr, size); } { std::scoped_lock lock{buffer_cache.mutex}; @@ -419,10 +419,6 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { void RasterizerVulkan::SyncGuestHost() { pipeline_cache.SyncGuestHost(); { - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.FlushCachedWrites(); - } - { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.FlushCachedWrites(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f2890d263..2c2ccc7c6 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1451,8 +1451,7 @@ bool Image::BlitScaleHelper(bool scale_up) { runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, src_region, operation, BLIT_OPERATION); - } else if (!runtime->device.IsBlitDepthStencilSupported() && - aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!blit_framebuffer) { blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent); } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index cc7999027..dd0106432 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -39,9 +39,6 @@ enum class ImageFlagBits : u32 { Rescaled = 1 << 13, CheckingRescalable = 1 << 14, IsRescalable = 1 << 15, - - // Cached CPU - CachedCpuModified = 1 << 16, ///< Contents have been modified from the CPU }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 099b2ae1b..8fef74117 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -438,23 +438,6 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { } template <class P> -void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) { - const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE); - const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE); - ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CachedCpuModified)) { - return; - } - image.flags |= ImageFlagBits::CachedCpuModified; - cached_cpu_invalidate.insert(image_id); - - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { std::vector<ImageId> images; ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { @@ -512,18 +495,6 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { } template <class P> -void TextureCache<P>::FlushCachedWrites() { - for (ImageId image_id : cached_cpu_invalidate) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CachedCpuModified)) { - image.flags &= ~ImageFlagBits::CachedCpuModified; - image.flags |= ImageFlagBits::CpuModified; - } - } - cached_cpu_invalidate.clear(); -} - -template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { @@ -1109,8 +1080,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; - new_image.modification_tick = - std::max(overlap.modification_tick, new_image.modification_tick); } if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); @@ -1589,9 +1558,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { template <class P> void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); - if (True(image.flags & ImageFlagBits::CachedCpuModified)) { - return; - } image.flags |= ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); @@ -1648,9 +1614,6 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - if (True(image.flags & ImageFlagBits::CachedCpuModified)) { - cached_cpu_invalidate.erase(image_id); - } const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { @@ -1817,11 +1780,7 @@ template <class P> void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { Image& image = slot_images[image_id]; if (invalidate) { - if (True(image.flags & ImageFlagBits::CachedCpuModified)) { - cached_cpu_invalidate.erase(image_id); - } - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified | - ImageFlagBits::CachedCpuModified); + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { TrackImage(image, image_id); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index ad5978a33..b1324edf3 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -8,7 +8,6 @@ #include <span> #include <type_traits> #include <unordered_map> -#include <unordered_set> #include <vector> #include <queue> @@ -51,9 +50,6 @@ class TextureCache { /// Address shift for caching images into a hash table static constexpr u64 PAGE_BITS = 20; - static constexpr u64 CPU_PAGE_BITS = 12; - static constexpr u64 CPU_PAGE_SIZE = 1ULL << CPU_PAGE_BITS; - /// Enables debugging features to the texture cache static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; /// Implement blits as copies between framebuffers @@ -140,9 +136,6 @@ public: /// Mark images in a range as modified from the CPU void WriteMemory(VAddr cpu_addr, size_t size); - /// Mark images in a range as modified from the CPU - void CachedWriteMemory(VAddr cpu_addr, size_t size); - /// Download contents of host images to guest memory in a region void DownloadMemory(VAddr cpu_addr, size_t size); @@ -152,8 +145,6 @@ public: /// Remove images in a region void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); - void FlushCachedWrites(); - /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, @@ -375,8 +366,6 @@ private: std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; - std::unordered_set<ImageId> cached_cpu_invalidate; - VAddr virtual_invalid_space{}; bool has_deleted_images = false; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e142bee35..f3a05ada9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -621,6 +621,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR khr_push_descriptor = false; break; } + const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; + if (nv_major_version >= 510) { + LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits"); + cant_blit_msaa = true; + } } const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; if (ext_extended_dynamic_state && is_radv) { @@ -731,7 +736,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags } void Device::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); + LOG_CRITICAL(Render_Vulkan, "Device loss occurred!"); // Wait for the log to flush and for Nsight Aftermath to dump the results std::this_thread::sleep_for(std::chrono::seconds{15}); |
