diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 115 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/blit_image.cpp | 123 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/blit_image.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_blit_screen.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 8 | ||||
| -rw-r--r-- | src/video_core/shader_notify.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader_notify.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/format_lookup_table.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 29 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 1 |
17 files changed, 232 insertions, 258 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..6a6325e38 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(host_shaders) if(LIBVA_FOUND) set_source_files_properties(command_classes/codecs/codec.cpp PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) + list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) endif() add_library(video_core STATIC diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 02d309170..2a532b883 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -17,12 +17,28 @@ extern "C" { #include <libavutil/opt.h> +#ifdef LIBVA_FOUND +// for querying VAAPI driver information +#include <libavutil/hwcontext_vaapi.h> +#endif } namespace Tegra { namespace { constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; +constexpr std::array PREFERRED_GPU_DECODERS = { + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, + AV_HWDEVICE_TYPE_DXVA2, +#elif defined(__linux__) + AV_HWDEVICE_TYPE_VAAPI, + AV_HWDEVICE_TYPE_VDPAU, +#endif + // last resort for Linux Flatpak (w/ NVIDIA) + AV_HWDEVICE_TYPE_VULKAN, +}; void AVPacketDeleter(AVPacket* ptr) { av_packet_free(&ptr); @@ -61,83 +77,50 @@ Codec::~Codec() { av_buffer_unref(&av_gpu_decoder); } -#ifdef LIBVA_FOUND -// List all the currently loaded Linux modules -static std::vector<std::string> ListLinuxKernelModules() { - using FILEPtr = std::unique_ptr<FILE, decltype(&std::fclose)>; - auto module_listing = FILEPtr{fopen("/proc/modules", "rt"), std::fclose}; - std::vector<std::string> modules{}; - if (!module_listing) { - LOG_WARNING(Service_NVDRV, "Could not open /proc/modules to collect available modules"); - return modules; - } - char* buffer = nullptr; - size_t buf_len = 0; - while (getline(&buffer, &buf_len, module_listing.get()) != -1) { - // format for the module listing file (sysfs) - // <name> <module_size> <depended_by_count> <depended_by_names> <status> <load_address> - auto line = std::string(buffer); - // we are only interested in module names - auto name_pos = line.find_first_of(" "); - if (name_pos == std::string::npos) { - continue; - } - modules.push_back(line.erase(name_pos)); - } - free(buffer); - return modules; +// List all the currently available hwcontext in ffmpeg +static std::vector<AVHWDeviceType> ListSupportedContexts() { + std::vector<AVHWDeviceType> contexts{}; + AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + do { + current_device_type = av_hwdevice_iterate_types(current_device_type); + contexts.push_back(current_device_type); + } while (current_device_type != AV_HWDEVICE_TYPE_NONE); + return contexts; } -#endif bool Codec::CreateGpuAvDevice() { -#if defined(LIBVA_FOUND) - static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { - "i915", - "iHD", - "amdgpu", - }; - AVDictionary* hwdevice_options = nullptr; - const auto loaded_modules = ListLinuxKernelModules(); - av_dict_set(&hwdevice_options, "connection_type", "drm", 0); - for (const auto& driver : VAAPI_DRIVERS) { - // first check if the target driver is loaded in the kernel - bool found = std::any_of(loaded_modules.begin(), loaded_modules.end(), - [&driver](const auto& module) { return module == driver; }); - if (!found) { - LOG_DEBUG(Service_NVDRV, "Kernel driver {} is not loaded, trying the next one", driver); + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static const auto supported_contexts = ListSupportedContexts(); + for (const auto& type : PREFERRED_GPU_DECODERS) { + if (std::none_of(supported_contexts.begin(), supported_contexts.end(), + [&type](const auto& context) { return context == type; })) { + LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); continue; } - av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, - nullptr, hwdevice_options, 0); - if (hwdevice_error >= 0) { - LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); - av_dict_free(&hwdevice_options); - av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; - return true; - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); - av_dict_free(&hwdevice_options); -#endif - static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; - static constexpr std::array GPU_DECODER_TYPES{ -#ifdef linux - AV_HWDEVICE_TYPE_VDPAU, -#endif - AV_HWDEVICE_TYPE_CUDA, -#ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, -#endif - }; - for (const auto& type : GPU_DECODER_TYPES) { const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); if (hwdevice_res < 0) { LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", av_hwdevice_get_type_name(type), hwdevice_res); continue; } +#ifdef LIBVA_FOUND + if (type == AV_HWDEVICE_TYPE_VAAPI) { + // we need to determine if this is an impersonated VAAPI driver + AVHWDeviceContext* hwctx = + static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data)); + AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); + const char* vendor_name = vaQueryVendorString(vactx->display); + if (strstr(vendor_name, "VDPAU backend")) { + // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them + LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); + continue; + } else { + // according to some user testing, certain vaapi driver (Intel?) could be buggy + // so let's log the driver name which may help the developers/supporters + LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); + } + } +#endif for (int i = 0;; i++) { const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); if (!config) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ab7c21a49..8788f5148 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -185,16 +185,6 @@ struct GPU::Impl { return *dma_pusher; } - /// Returns a reference to the GPU CDMA pusher. - [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { - return *cdma_pusher; - } - - /// Returns a const reference to the GPU CDMA pusher. - [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { - return *cdma_pusher; - } - /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer() { return *renderer; @@ -338,25 +328,27 @@ struct GPU::Impl { } /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { if (!use_nvdec) { return; } - if (!cdma_pusher) { - cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); + if (!cdma_pushers.contains(id)) { + cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu)); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working // TODO(ameerj): RE proper async nvdec operation // gpu_thread.SubmitCommandBuffer(std::move(entries)); - - cdma_pusher->ProcessEntries(std::move(entries)); + cdma_pushers[id]->ProcessEntries(std::move(entries)); } /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance() { - cdma_pusher.reset(); + void ClearCdmaInstance(u32 id) { + const auto iter = cdma_pushers.find(id); + if (iter != cdma_pushers.end()) { + cdma_pushers.erase(iter); + } } /// Swap buffers (render frame) @@ -659,7 +651,7 @@ struct GPU::Impl { Core::System& system; std::unique_ptr<Tegra::MemoryManager> memory_manager; std::unique_ptr<Tegra::DmaPusher> dma_pusher; - std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; + std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; std::unique_ptr<VideoCore::RendererBase> renderer; VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; @@ -811,14 +803,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const { return impl->DmaPusher(); } -Tegra::CDmaPusher& GPU::CDmaPusher() { - return impl->CDmaPusher(); -} - -const Tegra::CDmaPusher& GPU::CDmaPusher() const { - return impl->CDmaPusher(); -} - VideoCore::RendererBase& GPU::Renderer() { return impl->Renderer(); } @@ -887,12 +871,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) { impl->PushGPUEntries(std::move(entries)); } -void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - impl->PushCommandBuffer(entries); +void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { + impl->PushCommandBuffer(id, entries); } -void GPU::ClearCdmaInstance() { - impl->ClearCdmaInstance(); +void GPU::ClearCdmaInstance(u32 id) { + impl->ClearCdmaInstance(id); } void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index c89a5d693..500411176 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -242,10 +242,10 @@ public: void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance(); + void ClearCdmaInstance(u32 id); /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 37d5e6a6b..dbf1df79c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -92,7 +92,7 @@ public: void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { UNIMPLEMENTED(); } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 28daacd82..f81c1b233 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -437,39 +437,29 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, fxaa_texture.handle); } - - // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - GLuint fragment_handle; - const auto filter = Settings::values.scaling_filter.GetValue(); - switch (filter) { - case Settings::ScalingFilter::NearestNeighbor: - fragment_handle = present_bilinear_fragment.handle; - break; - case Settings::ScalingFilter::Bilinear: - fragment_handle = present_bilinear_fragment.handle; - break; - case Settings::ScalingFilter::Bicubic: - fragment_handle = present_bicubic_fragment.handle; - break; - case Settings::ScalingFilter::Gaussian: - fragment_handle = present_gaussian_fragment.handle; - break; - case Settings::ScalingFilter::ScaleForce: - fragment_handle = present_scaleforce_fragment.handle; - break; - case Settings::ScalingFilter::Fsr: - LOG_WARNING( - Render_OpenGL, - "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); - fragment_handle = present_scaleforce_fragment.handle; - break; - default: - fragment_handle = present_bilinear_fragment.handle; - break; - } + const auto fragment_handle = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return present_bilinear_fragment.handle; + case Settings::ScalingFilter::Bicubic: + return present_bicubic_fragment.handle; + case Settings::ScalingFilter::Gaussian: + return present_gaussian_fragment.handle; + case Settings::ScalingFilter::ScaleForce: + return present_scaleforce_fragment.handle; + case Settings::ScalingFilter::Fsr: + LOG_WARNING( + Render_OpenGL, + "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce"); + return present_scaleforce_fragment.handle; + default: + return present_bilinear_fragment.handle; + } + }(); program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 9a38b6b34..cd5995897 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,6 +4,7 @@ #include <algorithm> +#include "common/settings.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" @@ -335,6 +336,17 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } + +VkExtent2D GetConversionExtent(const ImageView& src_image_view) { + const auto& resolution = Settings::values.resolution_info; + const bool is_rescaled = src_image_view.IsRescaled(); + u32 width = src_image_view.size.width; + u32 height = src_image_view.size.height; + return VkExtent2D{ + .width = is_rescaled ? resolution.ScaleUp(width) : width, + .height = is_rescaled ? resolution.ScaleUp(height) : height, + }; +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, @@ -425,61 +437,52 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const ImageView& src_image_view) { ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_abgr8_to_d24s8_frag, true); - ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + convert_abgr8_to_d24s8_frag); + Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ImageView& src_image_view) { ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_abgr8_frag, false); - ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + convert_d24s8_to_abgr8_frag); + ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; + const VkExtent2D extent = GetConversionExtent(src_image_view); + scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, - this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -563,18 +566,16 @@ void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_f } void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ImageView& src_image_view) { const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkImageView src_depth_view = src_image_view.DepthView(); const VkImageView src_stencil_view = src_image_view.StencilView(); const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; + const VkExtent2D extent = GetConversionExtent(src_image_view); + scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, - down_shift, this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, + this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -695,11 +696,14 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } -void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { +void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, + bool is_target_depth) { if (pipeline) { return; } - const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); + VkShaderModule frag_shader = + is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; + const std::array stages = MakeStages(*full_screen_vert, frag_shader); pipeline = device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -712,8 +716,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = nullptr, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, + .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO + : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *one_texture_pipeline_layout, .renderPass = renderpass, @@ -723,37 +728,17 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend }); } +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + ConvertPipeline(pipeline, renderpass, false); +} + void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { - if (pipeline) { - return; - } - const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(stages.size()), - .pStages = stages.data(), - .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pTessellationState = nullptr, - .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, - .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .layout = *one_texture_pipeline_layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }); + ConvertPipeline(pipeline, renderpass, true); } void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool is_target_depth, - bool single_texture) { + vk::ShaderModule& module, bool single_texture, + bool is_target_depth) { if (pipeline) { return; } @@ -782,13 +767,13 @@ void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass ren } void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { - ConvertPipelineEx(pipeline, renderpass, module, false, single_texture); + vk::ShaderModule& module) { + ConvertPipelineEx(pipeline, renderpass, module, false, false); } void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { - ConvertPipelineEx(pipeline, renderpass, module, true, single_texture); + vk::ShaderModule& module) { + ConvertPipelineEx(pipeline, renderpass, module, true, true); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index b1a717090..1d9f61a52 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -44,50 +44,46 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift); + const ImageView& src_image_view); void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift); + ImageView& src_image_view); [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); + void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool is_target_depth, bool single_texture); + vk::ShaderModule& module, bool single_texture, bool is_target_depth); void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + vk::ShaderModule& module); void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + vk::ShaderModule& module); const Device& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 31adada56..751e4792b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -162,7 +162,7 @@ struct FormatTuple { {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT - {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT @@ -176,8 +176,8 @@ struct FormatTuple { {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 1e447e621..c71a1f44d 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -391,28 +391,23 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .offset = {0, 0}, .extent = size, }; - const auto filter = Settings::values.scaling_filter.GetValue(); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - switch (filter) { - case Settings::ScalingFilter::NearestNeighbor: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - case Settings::ScalingFilter::Bilinear: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - case Settings::ScalingFilter::Bicubic: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); - break; - case Settings::ScalingFilter::Gaussian: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline); - break; - case Settings::ScalingFilter::ScaleForce: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); - break; - default: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - } + auto graphics_pipeline = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return *bilinear_pipeline; + case Settings::ScalingFilter::Bicubic: + return *bicubic_pipeline; + case Settings::ScalingFilter::Gaussian: + return *gaussian_pipeline; + case Settings::ScalingFilter::ScaleForce: + return *scaleforce_pipeline; + default: + return *bilinear_pipeline; + } + }(); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 197cba8e3..1941170cb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1057,37 +1057,37 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }); } -void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, - bool rescaled) { - const u32 up_scale = rescaled ? resolution.up_scale : 1; - const u32 down_shift = rescaled ? resolution.down_shift : 0; +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { switch (dst_view.format) { case PixelFormat::R16_UNORM: if (src_view.format == PixelFormat::D16_UNORM) { - return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD16ToR16(dst, src_view); } break; case PixelFormat::A8B8G8R8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD32ToR32(dst, src_view); } break; case PixelFormat::D16_UNORM: if (src_view.format == PixelFormat::R16_UNORM) { - return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertR16ToD16(dst, src_view); } break; case PixelFormat::S8_UINT_D24_UNORM: - return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM) { + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { - return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertR32ToD32(dst, src_view); } break; default: @@ -1329,6 +1329,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm } } +bool Image::IsRescaled() const noexcept { + return True(flags & ImageFlagBits::Rescaled); +} + bool Image::ScaleUp(bool ignore) { if (True(flags & ImageFlagBits::Rescaled)) { return false; @@ -1469,7 +1473,8 @@ bool Image::BlitScaleHelper(bool scale_up) { ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, - image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} { + src_image{&image}, image_handle{image.Handle()}, + samples(ConvertSampleCount(image.info.num_samples)) { using Shader::TextureType; const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); @@ -1607,6 +1612,13 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type, return *view; } +bool ImageView::IsRescaled() const noexcept { + if (!src_image) { + return false; + } + return src_image->IsRescaled(); +} + vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 753e3e8a1..c592f2666 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -65,7 +65,7 @@ public: void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); bool CanAccelerateImageUpload(Image&) const noexcept { return false; @@ -139,6 +139,8 @@ public: return std::exchange(initialized, true); } + bool IsRescaled() const noexcept; + bool ScaleUp(bool ignore = false); bool ScaleDown(bool ignore = false); @@ -189,6 +191,8 @@ public: [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); + [[nodiscard]] bool IsRescaled() const noexcept; + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { return *image_views[static_cast<size_t>(texture_type)]; } @@ -222,6 +226,8 @@ private: [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; + const Image* src_image{}; + std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; std::unique_ptr<StorageViews> storage_views; vk::ImageView depth_view; diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index dc6995b46..bcaf5f575 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept { const int now_complete = num_complete.load(std::memory_order::relaxed); const int now_building = num_building.load(std::memory_order::relaxed); if (now_complete == now_building) { - const auto now = std::chrono::high_resolution_clock::now(); + const auto now = std::chrono::steady_clock::now(); if (completed && num_complete == num_when_completed) { if (now - complete_time > TIME_TO_STOP_REPORTING) { report_base = now_complete; diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index ad363bfb5..4d8d52071 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -28,6 +28,6 @@ private: bool completed{}; int num_when_completed{}; - std::chrono::high_resolution_clock::time_point complete_time; + std::chrono::steady_clock::time_point complete_time; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index ddfb726fe..afa807d5d 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::D16_UNORM; case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 565b99254..2e19fced2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1122,7 +1122,7 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( break; } if (can_be_depth_blit) { - const ImageBase* const dst_image = src_id ? &slot_images[src_id] : nullptr; + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; DeduceBlitImages(dst_info, src_info, dst_image, src_image); if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; @@ -1135,8 +1135,16 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); } } while (has_deleted_images); - if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) { - // Make sure the images are depth and/or stencil textures. + const ImageBase& src_image = slot_images[src_id]; + const ImageBase& dst_image = slot_images[dst_id]; + const bool native_bgr = runtime.HasNativeBgr(); + if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) || + GetFormatType(src_info.format) != GetFormatType(src_image.info.format) || + !VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false, + native_bgr) || + !VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false, + native_bgr)) { + // Make sure the images match the expected format. do { has_deleted_images = false; src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); @@ -1847,9 +1855,20 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag .height = std::min(dst_view.size.height, src_view.size.height), .depth = std::min(dst_view.size.depth, src_view.size.depth), }; - UNIMPLEMENTED_IF(copy.extent != expected_size); + const Extent3D scaled_extent = [is_rescaled, expected_size]() { + if (!is_rescaled) { + return expected_size; + } + const auto& resolution = Settings::values.resolution_info; + return Extent3D{ + .width = resolution.ScaleUp(expected_size.width), + .height = resolution.ScaleUp(expected_size.height), + .depth = expected_size.depth, + }; + }(); + UNIMPLEMENTED_IF(copy.extent != scaled_extent); - runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled); + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 70c52aaac..7bf5b6578 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -130,6 +130,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_SNORM, |
