diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 8 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_master_semaphore.h | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 3 |
5 files changed, 35 insertions, 26 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index dc768b952..051616124 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -32,7 +32,7 @@ enum class VideoPixelFormat : u64_le { RGBA8 = 0x1f, BGRA8 = 0x20, RGBX8 = 0x23, - Yuv420 = 0x44, + YUV420 = 0x44, }; } // Anonymous namespace @@ -88,12 +88,10 @@ void Vic::Execute() { const u64 surface_width = config.surface_width_minus1 + 1; const u64 surface_height = config.surface_height_minus1 + 1; if (static_cast<u64>(frame->width) != surface_width || - static_cast<u64>(frame->height) > surface_height) { + static_cast<u64>(frame->height) != surface_height) { // TODO: Properly support multiple video streams with differing frame dimensions - LOG_WARNING(Debug, - "Frame dimensions {}x{} can't be safely decoded into surface dimensions {}x{}", + LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", frame->width, frame->height, surface_width, surface_height); - return; } switch (config.pixel_format) { case VideoPixelFormat::RGBA8: @@ -101,7 +99,7 @@ void Vic::Execute() { case VideoPixelFormat::RGBX8: WriteRGBFrame(frame, config); break; - case VideoPixelFormat::Yuv420: + case VideoPixelFormat::YUV420: WriteYUVFrame(frame, config); break; default: @@ -136,21 +134,20 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { scaler_height = frame->height; converted_frame_buffer.reset(); } - // Get Converted frame - const u32 width = static_cast<u32>(frame->width); - const u32 height = static_cast<u32>(frame->height); - const std::size_t linear_size = width * height * 4; - - // Only allocate frame_buffer once per stream, as the size is not expected to change if (!converted_frame_buffer) { - converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; + const size_t frame_size = frame->width * frame->height * 4; + converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; } const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; - sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, converted_stride.data()); + // Use the minimum of surface/frame dimensions to avoid buffer overflow. + const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; + const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; + const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); + const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); const u32 blk_kind = static_cast<u32>(config.block_linear_kind); if (blk_kind != 0) { // swizzle pitch linear to block linear @@ -158,11 +155,12 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); luma_buffer.resize(size); Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), - converted_frame_buffer.get(), block_height, 0, 0); + converted_frame_buf_addr, block_height, 0, 0); gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); } else { // send pitch linear frame + const size_t linear_size = width * height * 4; gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, linear_size); } @@ -173,9 +171,10 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { const std::size_t surface_width = config.surface_width_minus1 + 1; const std::size_t surface_height = config.surface_height_minus1 + 1; + const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; + // Use the minimum of surface/frame dimensions to avoid buffer overflow. const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); - const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; const auto stride = static_cast<size_t>(frame->linesize[0]); diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 73231061a..392f82eb7 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -258,9 +258,9 @@ private: void AsyncFlushQuery(VAddr addr) { if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>(); + uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); } - uncommitted_flushes->insert(addr); + uncommitted_flushes->push_back(addr); } static constexpr std::uintptr_t PAGE_SIZE = 4096; @@ -276,8 +276,8 @@ private: std::array<CounterStream, VideoCore::NumQueryTypes> streams; - std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{}; - std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes; + std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; + std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; }; template <class QueryCache, class HostCounter> diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index ea879bfdd..249644e50 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -42,7 +42,7 @@ private: }; static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); - std::array<CacheEntry, 0x1000000> cached_pages; + std::array<CacheEntry, 0x2000000> cached_pages; Core::Memory::Memory& cpu_memory; }; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 4f8688118..0886b7da8 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -21,12 +21,12 @@ public: /// Returns the current logical tick. [[nodiscard]] u64 CurrentTick() const noexcept { - return current_tick.load(std::memory_order_relaxed); + return current_tick.load(std::memory_order_acquire); } /// Returns the last known GPU tick. [[nodiscard]] u64 KnownGpuTick() const noexcept { - return gpu_tick.load(std::memory_order_relaxed); + return gpu_tick.load(std::memory_order_acquire); } /// Returns the timeline semaphore handle. @@ -41,12 +41,21 @@ public: /// Advance to the logical tick and return the old one [[nodiscard]] u64 NextTick() noexcept { - return current_tick.fetch_add(1, std::memory_order::relaxed); + return current_tick.fetch_add(1, std::memory_order_release); } /// Refresh the known GPU tick void Refresh() { - gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed); + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = semaphore.GetCounter(); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); } /// Waits for a tick to be hit on the GPU diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index c9cb32d71..259cba156 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -117,7 +117,8 @@ u64 HostCounter::BlockingQuery() const { cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); switch (query_result) { case VK_SUCCESS: |
