diff options
Diffstat (limited to 'src/video_core/renderer_vulkan')
17 files changed, 2399 insertions, 0 deletions
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h new file mode 100644 index 000000000..ba25b5bc7 --- /dev/null +++ b/src/video_core/renderer_vulkan/declarations.h @@ -0,0 +1,45 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vulkan/vulkan.hpp> + +namespace Vulkan { + +// vulkan.hpp unique handlers use DispatchLoaderStatic +template <typename T> +using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; + +using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; +using UniqueBuffer = UniqueHandle<vk::Buffer>; +using UniqueBufferView = UniqueHandle<vk::BufferView>; +using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; +using UniqueCommandPool = UniqueHandle<vk::CommandPool>; +using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; +using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; +using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; +using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; +using UniqueDevice = UniqueHandle<vk::Device>; +using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; +using UniqueEvent = UniqueHandle<vk::Event>; +using UniqueFence = UniqueHandle<vk::Fence>; +using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; +using UniqueImage = UniqueHandle<vk::Image>; +using UniqueImageView = UniqueHandle<vk::ImageView>; +using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; +using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; +using UniquePipeline = UniqueHandle<vk::Pipeline>; +using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; +using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; +using UniqueQueryPool = UniqueHandle<vk::QueryPool>; +using UniqueRenderPass = UniqueHandle<vk::RenderPass>; +using UniqueSampler = UniqueHandle<vk::Sampler>; +using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; +using UniqueSemaphore = UniqueHandle<vk::Semaphore>; +using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; +using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; +using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp new file mode 100644 index 000000000..34bf26ff2 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -0,0 +1,483 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/surface.h" + +namespace Vulkan::MaxwellToVK { + +namespace Sampler { + +vk::Filter Filter(Tegra::Texture::TextureFilter filter) { + switch (filter) { + case Tegra::Texture::TextureFilter::Linear: + return vk::Filter::eLinear; + case Tegra::Texture::TextureFilter::Nearest: + return vk::Filter::eNearest; + } + UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); + return {}; +} + +vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { + switch (mipmap_filter) { + case Tegra::Texture::TextureMipmapFilter::None: + // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping + // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to + // use an image view with a single mipmap level to emulate this. + return vk::SamplerMipmapMode::eLinear; + case Tegra::Texture::TextureMipmapFilter::Linear: + return vk::SamplerMipmapMode::eLinear; + case Tegra::Texture::TextureMipmapFilter::Nearest: + return vk::SamplerMipmapMode::eNearest; + } + UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); + return {}; +} + +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { + switch (wrap_mode) { + case Tegra::Texture::WrapMode::Wrap: + return vk::SamplerAddressMode::eRepeat; + case Tegra::Texture::WrapMode::Mirror: + return vk::SamplerAddressMode::eMirroredRepeat; + case Tegra::Texture::WrapMode::ClampToEdge: + return vk::SamplerAddressMode::eClampToEdge; + case Tegra::Texture::WrapMode::Border: + return vk::SamplerAddressMode::eClampToBorder; + case Tegra::Texture::WrapMode::ClampOGL: + // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use + // eClampToBorder to get the border color of the texture, and then sample the edge to + // manually mix them. However the shader part of this is not yet implemented. + return vk::SamplerAddressMode::eClampToBorder; + case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: + return vk::SamplerAddressMode::eMirrorClampToEdge; + case Tegra::Texture::WrapMode::MirrorOnceBorder: + UNIMPLEMENTED(); + return vk::SamplerAddressMode::eMirrorClampToEdge; + } + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; +} + +vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { + switch (depth_compare_func) { + case Tegra::Texture::DepthCompareFunc::Never: + return vk::CompareOp::eNever; + case Tegra::Texture::DepthCompareFunc::Less: + return vk::CompareOp::eLess; + case Tegra::Texture::DepthCompareFunc::LessEqual: + return vk::CompareOp::eLessOrEqual; + case Tegra::Texture::DepthCompareFunc::Equal: + return vk::CompareOp::eEqual; + case Tegra::Texture::DepthCompareFunc::NotEqual: + return vk::CompareOp::eNotEqual; + case Tegra::Texture::DepthCompareFunc::Greater: + return vk::CompareOp::eGreater; + case Tegra::Texture::DepthCompareFunc::GreaterEqual: + return vk::CompareOp::eGreaterOrEqual; + case Tegra::Texture::DepthCompareFunc::Always: + return vk::CompareOp::eAlways; + } + UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", + static_cast<u32>(depth_compare_func)); + return {}; +} + +} // namespace Sampler + +struct FormatTuple { + vk::Format format; ///< Vulkan format + ComponentType component_type; ///< Abstracted component type + bool attachable; ///< True when this format can be used as an attachment +}; + +static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ + {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI + {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U + {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U + {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI + {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1 + {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23 + {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45 + {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F + {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4 + + // Compressed sRGB formats + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB + + // Depth formats + {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F + {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16 + + // DepthStencil formats + {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8 + {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated) + {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8 +}}; + +static constexpr bool IsZetaFormat(PixelFormat pixel_format) { + return pixel_format >= PixelFormat::MaxColorFormat && + pixel_format < PixelFormat::MaxDepthStencilFormat; +} + +std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, + PixelFormat pixel_format, ComponentType component_type) { + ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); + + const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; + UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, + "Unimplemented texture format with pixel format={} and component type={}", + static_cast<u32>(pixel_format), static_cast<u32>(component_type)); + ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch"); + + auto usage = vk::FormatFeatureFlagBits::eSampledImage | + vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; + if (tuple.attachable) { + usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment + : vk::FormatFeatureFlagBits::eColorAttachment; + } + return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; +} + +vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { + switch (stage) { + case Maxwell::ShaderStage::Vertex: + return vk::ShaderStageFlagBits::eVertex; + case Maxwell::ShaderStage::TesselationControl: + return vk::ShaderStageFlagBits::eTessellationControl; + case Maxwell::ShaderStage::TesselationEval: + return vk::ShaderStageFlagBits::eTessellationEvaluation; + case Maxwell::ShaderStage::Geometry: + return vk::ShaderStageFlagBits::eGeometry; + case Maxwell::ShaderStage::Fragment: + return vk::ShaderStageFlagBits::eFragment; + } + UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); + return {}; +} + +vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return vk::PrimitiveTopology::ePointList; + case Maxwell::PrimitiveTopology::Lines: + return vk::PrimitiveTopology::eLineList; + case Maxwell::PrimitiveTopology::LineStrip: + return vk::PrimitiveTopology::eLineStrip; + case Maxwell::PrimitiveTopology::Triangles: + return vk::PrimitiveTopology::eTriangleList; + case Maxwell::PrimitiveTopology::TriangleStrip: + return vk::PrimitiveTopology::eTriangleStrip; + } + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; +} + +vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { + switch (type) { + case Maxwell::VertexAttribute::Type::SignedNorm: + break; + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Unorm; + default: + break; + } + break; + case Maxwell::VertexAttribute::Type::SignedInt: + break; + case Maxwell::VertexAttribute::Type::UnsignedInt: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_32: + return vk::Format::eR32Uint; + default: + break; + } + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + break; + case Maxwell::VertexAttribute::Type::Float: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return vk::Format::eR32G32B32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32: + return vk::Format::eR32G32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32: + return vk::Format::eR32Sfloat; + default: + break; + } + break; + } + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), + static_cast<u32>(size)); + return {}; +} + +vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return vk::CompareOp::eNever; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return vk::CompareOp::eLess; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return vk::CompareOp::eEqual; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return vk::CompareOp::eLessOrEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return vk::CompareOp::eGreater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return vk::CompareOp::eNotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return vk::CompareOp::eGreaterOrEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return vk::CompareOp::eAlways; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); + return {}; +} + +vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) { + switch (index_format) { + case Maxwell::IndexFormat::UnsignedByte: + UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format"); + return vk::IndexType::eUint16; + case Maxwell::IndexFormat::UnsignedShort: + return vk::IndexType::eUint16; + case Maxwell::IndexFormat::UnsignedInt: + return vk::IndexType::eUint32; + } + UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); + return {}; +} + +vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { + switch (stencil_op) { + case Maxwell::StencilOp::Keep: + case Maxwell::StencilOp::KeepOGL: + return vk::StencilOp::eKeep; + case Maxwell::StencilOp::Zero: + case Maxwell::StencilOp::ZeroOGL: + return vk::StencilOp::eZero; + case Maxwell::StencilOp::Replace: + case Maxwell::StencilOp::ReplaceOGL: + return vk::StencilOp::eReplace; + case Maxwell::StencilOp::Incr: + case Maxwell::StencilOp::IncrOGL: + return vk::StencilOp::eIncrementAndClamp; + case Maxwell::StencilOp::Decr: + case Maxwell::StencilOp::DecrOGL: + return vk::StencilOp::eDecrementAndClamp; + case Maxwell::StencilOp::Invert: + case Maxwell::StencilOp::InvertOGL: + return vk::StencilOp::eInvert; + case Maxwell::StencilOp::IncrWrap: + case Maxwell::StencilOp::IncrWrapOGL: + return vk::StencilOp::eIncrementAndWrap; + case Maxwell::StencilOp::DecrWrap: + case Maxwell::StencilOp::DecrWrapOGL: + return vk::StencilOp::eDecrementAndWrap; + } + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); + return {}; +} + +vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { + switch (equation) { + case Maxwell::Blend::Equation::Add: + case Maxwell::Blend::Equation::AddGL: + return vk::BlendOp::eAdd; + case Maxwell::Blend::Equation::Subtract: + case Maxwell::Blend::Equation::SubtractGL: + return vk::BlendOp::eSubtract; + case Maxwell::Blend::Equation::ReverseSubtract: + case Maxwell::Blend::Equation::ReverseSubtractGL: + return vk::BlendOp::eReverseSubtract; + case Maxwell::Blend::Equation::Min: + case Maxwell::Blend::Equation::MinGL: + return vk::BlendOp::eMin; + case Maxwell::Blend::Equation::Max: + case Maxwell::Blend::Equation::MaxGL: + return vk::BlendOp::eMax; + } + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); + return {}; +} + +vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: + return vk::BlendFactor::eZero; + case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: + return vk::BlendFactor::eOne; + case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: + return vk::BlendFactor::eSrcColor; + case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: + return vk::BlendFactor::eOneMinusSrcColor; + case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: + return vk::BlendFactor::eSrcAlpha; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: + return vk::BlendFactor::eOneMinusSrcAlpha; + case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: + return vk::BlendFactor::eDstAlpha; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: + return vk::BlendFactor::eOneMinusDstAlpha; + case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: + return vk::BlendFactor::eDstColor; + case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: + return vk::BlendFactor::eOneMinusDstColor; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: + return vk::BlendFactor::eSrcAlphaSaturate; + case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: + return vk::BlendFactor::eSrc1Color; + case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: + return vk::BlendFactor::eOneMinusSrc1Color; + case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: + return vk::BlendFactor::eSrc1Alpha; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: + return vk::BlendFactor::eOneMinusSrc1Alpha; + case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: + return vk::BlendFactor::eConstantColor; + case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: + return vk::BlendFactor::eOneMinusConstantColor; + case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: + return vk::BlendFactor::eConstantAlpha; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: + return vk::BlendFactor::eOneMinusConstantAlpha; + } + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); + return {}; +} + +vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { + switch (front_face) { + case Maxwell::Cull::FrontFace::ClockWise: + return vk::FrontFace::eClockwise; + case Maxwell::Cull::FrontFace::CounterClockWise: + return vk::FrontFace::eCounterClockwise; + } + UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); + return {}; +} + +vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { + switch (cull_face) { + case Maxwell::Cull::CullFace::Front: + return vk::CullModeFlagBits::eFront; + case Maxwell::Cull::CullFace::Back: + return vk::CullModeFlagBits::eBack; + case Maxwell::Cull::CullFace::FrontAndBack: + return vk::CullModeFlagBits::eFrontAndBack; + } + UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); + return {}; +} + +vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { + switch (swizzle) { + case Tegra::Texture::SwizzleSource::Zero: + return vk::ComponentSwizzle::eZero; + case Tegra::Texture::SwizzleSource::R: + return vk::ComponentSwizzle::eR; + case Tegra::Texture::SwizzleSource::G: + return vk::ComponentSwizzle::eG; + case Tegra::Texture::SwizzleSource::B: + return vk::ComponentSwizzle::eB; + case Tegra::Texture::SwizzleSource::A: + return vk::ComponentSwizzle::eA; + case Tegra::Texture::SwizzleSource::OneInt: + case Tegra::Texture::SwizzleSource::OneFloat: + return vk::ComponentSwizzle::eOne; + } + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); + return {}; +} + +} // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h new file mode 100644 index 000000000..4cadc0721 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -0,0 +1,58 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <utility> +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/surface.h" +#include "video_core/textures/texture.h" + +namespace Vulkan::MaxwellToVK { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using PixelFormat = VideoCore::Surface::PixelFormat; +using ComponentType = VideoCore::Surface::ComponentType; + +namespace Sampler { + +vk::Filter Filter(Tegra::Texture::TextureFilter filter); + +vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); + +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode); + +vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); + +} // namespace Sampler + +std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, + PixelFormat pixel_format, ComponentType component_type); + +vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); + +vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); + +vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); + +vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); + +vk::IndexType IndexFormat(Maxwell::IndexFormat index_format); + +vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); + +vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); + +vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); + +vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); + +vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); + +vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); + +} // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp new file mode 100644 index 000000000..95eab3fec --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -0,0 +1,123 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <memory> +#include <optional> +#include <tuple> + +#include "common/alignment.h" +#include "common/assert.h" +#include "core/memory.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, + std::size_t alignment, u8* host_ptr) + : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ + host_ptr} {} + +VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) + : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} { + const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer; + const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | + vk::AccessFlagBits::eUniformRead; + stream_buffer = + std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, + vk::PipelineStageFlagBits::eAllCommands); + buffer_handle = stream_buffer->GetBuffer(); +} + +VKBufferCache::~VKBufferCache() = default; + +u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, + bool cache) { + const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; + ASSERT_MSG(cpu_addr, "Invalid GPU address"); + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + cache &= size >= 2048; + + const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; + if (cache) { + auto entry = TryGet(host_ptr); + if (entry) { + if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { + return entry->GetOffset(); + } + Unregister(entry); + } + } + + AlignBuffer(alignment); + const u64 uploaded_offset = buffer_offset; + + if (!host_ptr) { + return uploaded_offset; + } + + std::memcpy(buffer_ptr, host_ptr, size); + buffer_ptr += size; + buffer_offset += size; + + if (cache) { + auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, + alignment, host_ptr); + Register(entry); + } + + return uploaded_offset; +} + +u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { + AlignBuffer(alignment); + std::memcpy(buffer_ptr, raw_pointer, size); + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return uploaded_offset; +} + +std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { + AlignBuffer(alignment); + u8* const uploaded_ptr = buffer_ptr; + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return {uploaded_ptr, uploaded_offset}; +} + +void VKBufferCache::Reserve(std::size_t max_size) { + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); + buffer_offset = buffer_offset_base; + + if (invalidate) { + InvalidateAll(); + } +} + +VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { + return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); +} + +void VKBufferCache::AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h new file mode 100644 index 000000000..8b415744b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -0,0 +1,104 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <tuple> + +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKMemoryManager; +class VKStreamBuffer; + +class CachedBufferEntry final : public RasterizerCacheObject { +public: + explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, + u8* host_ptr); + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return size; + } + + std::size_t GetSize() const { + return size; + } + + u64 GetOffset() const { + return offset; + } + + std::size_t GetAlignment() const { + return alignment; + } + + // We do not have to flush this cache as things in it are never modified by us. + void Flush() override {} + +private: + VAddr cpu_addr{}; + std::size_t size{}; + u64 offset{}; + std::size_t alignment{}; +}; + +class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { +public: + explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); + ~VKBufferCache(); + + /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been + /// allocated. + u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, + bool cache = true); + + /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. + u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); + + /// Reserves memory to be used by host's CPU. Returns mapped address and offset. + std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); + + /// Reserves a region of memory to be used in subsequent upload/reserve operations. + void Reserve(std::size_t max_size); + + /// Ensures that the set data is sent to the device. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); + + /// Returns the buffer cache handle. + vk::Buffer GetBuffer() const { + return buffer_handle; + } + +private: + void AlignBuffer(std::size_t alignment); + + Tegra::MemoryManager& tegra_memory_manager; + + std::unique_ptr<VKStreamBuffer> stream_buffer; + vk::Buffer buffer_handle; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp new file mode 100644 index 000000000..00242ecbe --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -0,0 +1,238 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <optional> +#include <set> +#include <vector> +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" + +namespace Vulkan { + +namespace Alternatives { + +constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { + vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; +constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { + vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; + +} // namespace Alternatives + +constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { + switch (format) { + case vk::Format::eD24UnormS8Uint: + return Alternatives::Depth24UnormS8Uint.data(); + case vk::Format::eD16UnormS8Uint: + return Alternatives::Depth16UnormS8Uint.data(); + default: + return nullptr; + } +} + +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, + FormatType format_type) { + switch (format_type) { + case FormatType::Linear: + return properties.linearTilingFeatures; + case FormatType::Optimal: + return properties.optimalTilingFeatures; + case FormatType::Buffer: + return properties.bufferFeatures; + default: + return {}; + } +} + +VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) + : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { + SetupFamilies(dldi, surface); + SetupProperties(dldi); +} + +VKDevice::~VKDevice() = default; + +bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { + const auto queue_cis = GetDeviceQueueCreateInfos(); + vk::PhysicalDeviceFeatures device_features{}; + + const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), + 0, nullptr, static_cast<u32>(extensions.size()), + extensions.data(), &device_features); + vk::Device dummy_logical; + if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); + return false; + } + + dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); + logical = UniqueDevice( + dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + + graphics_queue = logical->getQueue(graphics_family, 0, dld); + present_queue = logical->getQueue(present_family, 0, dld); + return true; +} + +vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, + vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { + return wanted_format; + } + // The wanted format is not supported by hardware, search for alternatives + const vk::Format* alternatives = GetFormatAlternatives(wanted_format); + if (alternatives == nullptr) { + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; + } + + std::size_t i = 0; + for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; + alternative = alternatives[++i]) { + if (!IsFormatSupported(alternative, wanted_usage, format_type)) + continue; + LOG_WARNING(Render_Vulkan, + "Emulating format={} with alternative format={} with usage={} and type={}", + static_cast<u32>(wanted_format), static_cast<u32>(alternative), + static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); + return alternative; + } + + // No alternatives found, panic + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; +} + +bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + const auto it = format_properties.find(wanted_format); + if (it == format_properties.end()) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); + UNREACHABLE(); + return true; + } + const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); + return (supported_usage & wanted_usage) == wanted_usage; +} + +bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) { + const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; + + bool has_swapchain{}; + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + has_swapchain |= prop.extensionName == swapchain_extension; + } + if (!has_swapchain) { + // The device doesn't support creating swapchains. + return false; + } + + bool has_graphics{}, has_present{}; + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + const auto& family = queue_family_properties[i]; + if (family.queueCount == 0) + continue; + + has_graphics |= + (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); + has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + } + if (!has_graphics || !has_present) { + // The device doesn't have a graphics and present queue. + return false; + } + + // TODO(Rodrigo): Check if the device matches all requeriments. + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + if (props.limits.maxUniformBufferRange < 65536) { + return false; + } + + // Device is suitable. + return true; +} + +void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { + std::optional<u32> graphics_family_, present_family_; + + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + if (graphics_family_ && present_family_) + break; + + const auto& queue_family = queue_family_properties[i]; + if (queue_family.queueCount == 0) + continue; + + if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + graphics_family_ = i; + if (physical.getSurfaceSupportKHR(i, surface, dldi)) + present_family_ = i; + } + ASSERT(graphics_family_ && present_family_); + + graphics_family = *graphics_family_; + present_family = *present_family_; +} + +void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + device_type = props.deviceType; + uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); +} + +std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { + static const float QUEUE_PRIORITY = 1.f; + + std::set<u32> unique_queue_families = {graphics_family, present_family}; + std::vector<vk::DeviceQueueCreateInfo> queue_cis; + + for (u32 queue_family : unique_queue_families) + queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); + + return queue_cis; +} + +std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + std::map<vk::Format, vk::FormatProperties> format_properties; + + const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + format_properties.emplace(format, physical.getFormatProperties(format, dldi)); + }; + AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); + AddFormatQuery(vk::Format::eB5G6R5UnormPack16); + AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); + AddFormatQuery(vk::Format::eR8G8B8A8Srgb); + AddFormatQuery(vk::Format::eR8Unorm); + AddFormatQuery(vk::Format::eD32Sfloat); + AddFormatQuery(vk::Format::eD16Unorm); + AddFormatQuery(vk::Format::eD16UnormS8Uint); + AddFormatQuery(vk::Format::eD24UnormS8Uint); + AddFormatQuery(vk::Format::eD32SfloatS8Uint); + AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); + AddFormatQuery(vk::Format::eBc2UnormBlock); + AddFormatQuery(vk::Format::eBc3UnormBlock); + AddFormatQuery(vk::Format::eBc4UnormBlock); + + return format_properties; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h new file mode 100644 index 000000000..e87c7a508 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -0,0 +1,116 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <vector> +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +/// Format usage descriptor +enum class FormatType { Linear, Optimal, Buffer }; + +/// Handles data specific to a physical device. +class VKDevice final { +public: + explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + ~VKDevice(); + + /// Initializes the device. Returns true on success. + bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + + /** + * Returns a format supported by the device for the passed requeriments. + * @param wanted_format The ideal format to be returned. It may not be the returned format. + * @param wanted_usage The usage that must be fulfilled even if the format is not supported. + * @param format_type Format type usage. + * @returns A format supported by the device. + */ + vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the dispatch loader with direct function pointers of the device + const vk::DispatchLoaderDynamic& GetDispatchLoader() const { + return dld; + } + + /// Returns the logical device + vk::Device GetLogical() const { + return logical.get(); + } + + /// Returns the physical device. + vk::PhysicalDevice GetPhysical() const { + return physical; + } + + /// Returns the main graphics queue. + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + /// Returns the main present queue. + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Returns main graphics queue family index. + u32 GetGraphicsFamily() const { + return graphics_family; + } + + /// Returns main present queue family index. + u32 GetPresentFamily() const { + return present_family; + } + + /// Returns if the device is integrated with the host CPU + bool IsIntegrated() const { + return device_type == vk::PhysicalDeviceType::eIntegratedGpu; + } + + /// Returns uniform buffer alignment requeriment + u64 GetUniformBufferAlignment() const { + return uniform_buffer_alignment; + } + + /// Checks if the physical device is suitable. + static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + +private: + /// Sets up queue families. + void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + + /// Sets up device properties. + void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + + /// Returns a list of queue initialization descriptors. + std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + + /// Returns true if a format is supported. + bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the device properties for Vulkan formats. + static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); + + const vk::PhysicalDevice physical; ///< Physical device + vk::DispatchLoaderDynamic dld; ///< Device function pointers + UniqueDevice logical; ///< Logical device + vk::Queue graphics_queue; ///< Main graphics queue + vk::Queue present_queue; ///< Main present queue + u32 graphics_family{}; ///< Main graphics queue family index + u32 present_family{}; ///< Main present queue family index + vk::PhysicalDeviceType device_type; ///< Physical device type + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp new file mode 100644 index 000000000..0451babbf --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -0,0 +1,252 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include <tuple> +#include <vector> +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +// TODO(Rodrigo): Fine tune this number +constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; + +class VKMemoryAllocation final { +public: + explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, + vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) + : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, + shifted_type{ShiftType(type)}, is_mappable{properties & + vk::MemoryPropertyFlagBits::eHostVisible} { + if (is_mappable) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld)); + } + } + + ~VKMemoryAllocation() { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + if (is_mappable) + dev.unmapMemory(memory, dld); + dev.free(memory, nullptr, dld); + } + + VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { + auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), + static_cast<u64>(alignment)); + if (!found) { + found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), + static_cast<u64>(alignment)); + if (!found) { + // Signal out of memory, it'll try to do more allocations. + return nullptr; + } + } + u8* address = is_mappable ? base_address + *found : nullptr; + auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found, + *found + commit_size); + commits.push_back(commit.get()); + + // Last commit's address is highly probable to be free. + free_iterator = *found + commit_size; + + return commit; + } + + void Free(const VKMemoryCommitImpl* commit) { + ASSERT(commit); + const auto it = + std::find_if(commits.begin(), commits.end(), + [&](const auto& stored_commit) { return stored_commit == commit; }); + if (it == commits.end()) { + LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); + UNREACHABLE(); + return; + } + commits.erase(it); + } + + /// Returns whether this allocation is compatible with the arguments. + bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { + return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && + (type_mask & shifted_type) != 0; + } + +private: + static constexpr u32 ShiftType(u32 type) { + return 1U << type; + } + + /// A memory allocator, it may return a free region between "start" and "end" with the solicited + /// requeriments. + std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { + u64 iterator = start; + while (iterator + size < end) { + const u64 try_left = Common::AlignUp(iterator, alignment); + const u64 try_right = try_left + size; + + bool overlap = false; + for (const auto& commit : commits) { + const auto [commit_left, commit_right] = commit->interval; + if (try_left < commit_right && commit_left < try_right) { + // There's an overlap, continue the search where the overlapping commit ends. + iterator = commit_right; + overlap = true; + break; + } + } + if (!overlap) { + // A free address has been found. + return try_left; + } + } + // No free regions where found, return an empty optional. + return std::nullopt; + } + + const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const vk::MemoryPropertyFlags properties; ///< Vulkan properties. + const u64 alloc_size; ///< Size of this allocation. + const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. + const bool is_mappable; ///< Whether the allocation is mappable. + + /// Base address of the mapped pointer. + u8* base_address{}; + + /// Hints where the next free region is likely going to be. + u64 free_iterator{}; + + /// Stores all commits done from this allocation. + std::vector<const VKMemoryCommitImpl*> commits; +}; + +VKMemoryManager::VKMemoryManager(const VKDevice& device) + : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, + is_memory_unified{GetMemoryUnified(props)} {} + +VKMemoryManager::~VKMemoryManager() = default; + +VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { + ASSERT(reqs.size < ALLOC_CHUNK_SIZE); + + // When a host visible commit is asked, search for host visible and coherent, otherwise search + // for a fast device local type. + const vk::MemoryPropertyFlags wanted_properties = + host_visible + ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent + : vk::MemoryPropertyFlagBits::eDeviceLocal; + + const auto TryCommit = [&]() -> VKMemoryCommit { + for (auto& alloc : allocs) { + if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) + continue; + + if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { + return commit; + } + } + return {}; + }; + + if (auto commit = TryCommit(); commit) { + return commit; + } + + // Commit has failed, allocate more memory. + if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { + // TODO(Rodrigo): Try to use host memory. + LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); + UNREACHABLE(); + } + + // Commit again, this time it won't fail since there's a fresh allocation above. If it does, + // there's a bug. + auto commit = TryCommit(); + ASSERT(commit); + return commit; +} + +VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); + auto commit = Commit(requeriments, host_visible); + dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); + return commit; +} + +VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto requeriments = dev.getImageMemoryRequirements(image, dld); + auto commit = Commit(requeriments, host_visible); + dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); + return commit; +} + +bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, + u64 size) { + const u32 type = [&]() { + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + const auto flags = props.memoryTypes[type_index].propertyFlags; + if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { + // The type matches in type and in the wanted properties. + return type_index; + } + } + LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); + UNREACHABLE(); + return 0u; + }(); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + + // Try to allocate found type. + const vk::MemoryAllocateInfo memory_ai(size, type); + vk::DeviceMemory memory; + if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); + res != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); + return false; + } + allocs.push_back( + std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); + return true; +} + +/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { + for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { + if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { + // Memory is considered unified when heaps are device local only. + return false; + } + } + return true; +} + +VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, + u8* data, u64 begin, u64 end) + : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} + +VKMemoryCommitImpl::~VKMemoryCommitImpl() { + allocation->Free(this); +} + +u8* VKMemoryCommitImpl::GetData() const { + ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); + return data; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h new file mode 100644 index 000000000..073597b35 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -0,0 +1,87 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <utility> +#include <vector> +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKMemoryAllocation; +class VKMemoryCommitImpl; + +using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; + +class VKMemoryManager final { +public: + explicit VKMemoryManager(const VKDevice& device); + ~VKMemoryManager(); + + /** + * Commits a memory with the specified requeriments. + * @param reqs Requeriments returned from a Vulkan call. + * @param host_visible Signals the allocator that it *must* use host visible and coherent + * memory. When passing false, it will try to allocate device local memory. + * @returns A memory commit. + */ + VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); + + /// Commits memory required by the buffer and binds it. + VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); + + /// Commits memory required by the image and binds it. + VKMemoryCommit Commit(vk::Image image, bool host_visible); + + /// Returns true if the memory allocations are done always in host visible and coherent memory. + bool IsMemoryUnified() const { + return is_memory_unified; + } + +private: + /// Allocates a chunk of memory. + bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); + + /// Returns true if the device uses an unified memory model. + static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); + + const VKDevice& device; ///< Device handler. + const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. + std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. +}; + +class VKMemoryCommitImpl final { + friend VKMemoryAllocation; + +public: + explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, + u64 begin, u64 end); + ~VKMemoryCommitImpl(); + + /// Returns the writeable memory map. The commit has to be mappable. + u8* GetData() const; + + /// Returns the Vulkan memory handler. + vk::DeviceMemory GetMemory() const { + return memory; + } + + /// Returns the start position of the commit relative to the allocation. + vk::DeviceSize GetOffset() const { + return static_cast<vk::DeviceSize>(interval.first); + } + +private: + std::pair<u64, u64> interval{}; ///< Interval where the commit exists. + vk::DeviceMemory memory; ///< Vulkan device memory handler. + VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. + u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp new file mode 100644 index 000000000..a1e117443 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -0,0 +1,285 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace Vulkan { + +// TODO(Rodrigo): Fine tune these numbers. +constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; +constexpr std::size_t FENCES_GROW_STEP = 0x40; + +class CommandBufferPool final : public VKFencedPool { +public: + CommandBufferPool(const VKDevice& device) + : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} + + void Allocate(std::size_t begin, std::size_t end) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const u32 graphics_family = device.GetGraphicsFamily(); + + auto pool = std::make_unique<Pool>(); + + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | + vk::CommandPoolCreateFlagBits::eResetCommandBuffer; + const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); + pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); + + const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, + vk::CommandBufferLevel::ePrimary, + static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); + pool->cmdbufs = + dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); + + pools.push_back(std::move(pool)); + } + + vk::CommandBuffer Commit(VKFence& fence) { + const std::size_t index = CommitResource(fence); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return *pools[pool_index]->cmdbufs[sub_index]; + } + +private: + struct Pool { + UniqueCommandPool handle; + std::vector<UniqueCommandBuffer> cmdbufs; + }; + + const VKDevice& device; + + std::vector<std::unique_ptr<Pool>> pools; +}; + +VKResource::VKResource() = default; + +VKResource::~VKResource() = default; + +VKFence::VKFence(const VKDevice& device, UniqueFence handle) + : device{device}, handle{std::move(handle)} {} + +VKFence::~VKFence() = default; + +void VKFence::Wait() { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); +} + +void VKFence::Release() { + is_owned = false; +} + +void VKFence::Commit() { + is_owned = true; + is_used = true; +} + +bool VKFence::Tick(bool gpu_wait, bool owner_wait) { + if (!is_used) { + // If a fence is not used it's always free. + return true; + } + if (is_owned && !owner_wait) { + // The fence is still being owned (Release has not been called) and ownership wait has + // not been asked. + return false; + } + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + if (gpu_wait) { + // Wait for the fence if it has been requested. + dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); + } else { + if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { + // Vulkan fence is not ready, not much it can do here + return false; + } + } + + // Broadcast resources their free state. + for (auto* resource : protected_resources) { + resource->OnFenceRemoval(this); + } + protected_resources.clear(); + + // Prepare fence for reusage. + dev.resetFences({*handle}, dld); + is_used = false; + return true; +} + +void VKFence::Protect(VKResource* resource) { + protected_resources.push_back(resource); +} + +void VKFence::Unprotect(VKResource* resource) { + const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); + ASSERT(it != protected_resources.end()); + + resource->OnFenceRemoval(this); + protected_resources.erase(it); +} + +VKFenceWatch::VKFenceWatch() = default; + +VKFenceWatch::~VKFenceWatch() { + if (fence) { + fence->Unprotect(this); + } +} + +void VKFenceWatch::Wait() { + if (fence == nullptr) { + return; + } + fence->Wait(); + fence->Unprotect(this); +} + +void VKFenceWatch::Watch(VKFence& new_fence) { + Wait(); + fence = &new_fence; + fence->Protect(this); +} + +bool VKFenceWatch::TryWatch(VKFence& new_fence) { + if (fence) { + return false; + } + fence = &new_fence; + fence->Protect(this); + return true; +} + +void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) { + ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence"); + fence = nullptr; +} + +VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {} + +VKFencedPool::~VKFencedPool() = default; + +std::size_t VKFencedPool::CommitResource(VKFence& fence) { + const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> { + for (std::size_t iterator = begin; iterator < end; ++iterator) { + if (watches[iterator]->TryWatch(fence)) { + // The resource is now being watched, a free resource was successfully found. + return iterator; + } + } + return {}; + }; + // Try to find a free resource from the hinted position to the end. + auto found = Search(free_iterator, watches.size()); + if (!found) { + // Search from beginning to the hinted position. + found = Search(0, free_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const std::size_t free_resource = ManageOverflow(); + + // Watch will wait for the resource to be free. + watches[free_resource]->Watch(fence); + found = free_resource; + } + } + // Free iterator is hinted to the resource after the one that's been commited. + free_iterator = (*found + 1) % watches.size(); + return *found; +} + +std::size_t VKFencedPool::ManageOverflow() { + const std::size_t old_capacity = watches.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void VKFencedPool::Grow() { + const std::size_t old_capacity = watches.size(); + watches.resize(old_capacity + grow_step); + std::generate(watches.begin() + old_capacity, watches.end(), + []() { return std::make_unique<VKFenceWatch>(); }); + Allocate(old_capacity, old_capacity + grow_step); +} + +VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} { + GrowFences(FENCES_GROW_STEP); + command_buffer_pool = std::make_unique<CommandBufferPool>(device); +} + +VKResourceManager::~VKResourceManager() = default; + +VKFence& VKResourceManager::CommitFence() { + const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* { + const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); }; + const auto hinted = fences.begin() + fences_iterator; + + auto it = std::find_if(hinted, fences.end(), Tick); + if (it == fences.end()) { + it = std::find_if(fences.begin(), hinted, Tick); + if (it == hinted) { + return nullptr; + } + } + fences_iterator = std::distance(fences.begin(), it) + 1; + if (fences_iterator >= fences.size()) + fences_iterator = 0; + + auto& fence = *it; + fence->Commit(); + return fence.get(); + }; + + VKFence* found_fence = StepFences(false, false); + if (!found_fence) { + // Try again, this time waiting. + found_fence = StepFences(true, false); + + if (!found_fence) { + // Allocate new fences and try again. + LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(), + fences.size() + FENCES_GROW_STEP); + + GrowFences(FENCES_GROW_STEP); + found_fence = StepFences(true, false); + ASSERT(found_fence != nullptr); + } + } + return *found_fence; +} + +vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { + return command_buffer_pool->Commit(fence); +} + +void VKResourceManager::GrowFences(std::size_t new_fences_count) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const vk::FenceCreateInfo fence_ci; + + const std::size_t previous_size = fences.size(); + fences.resize(previous_size + new_fences_count); + + std::generate(fences.begin() + previous_size, fences.end(), [&]() { + return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h new file mode 100644 index 000000000..5bfe4cead --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -0,0 +1,180 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <memory> +#include <vector> +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKResourceManager; + +class CommandBufferPool; + +/// Interface for a Vulkan resource +class VKResource { +public: + explicit VKResource(); + virtual ~VKResource(); + + /** + * Signals the object that an owning fence has been signaled. + * @param signaling_fence Fence that signals its usage end. + */ + virtual void OnFenceRemoval(VKFence* signaling_fence) = 0; +}; + +/** + * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access. + * They must be commited from the resource manager. Their usage flow is: commit the fence from the + * resource manager, protect resources with it and use them, send the fence to an execution queue + * and Wait for it if needed and then call Release. Used resources will automatically be signaled + * when they are free to be reused. + * @brief Protects resources for concurrent usage and signals its release. + */ +class VKFence { + friend class VKResourceManager; + +public: + explicit VKFence(const VKDevice& device, UniqueFence handle); + ~VKFence(); + + /** + * Waits for the fence to be signaled. + * @warning You must have ownership of the fence and it has to be previously sent to a queue to + * call this function. + */ + void Wait(); + + /** + * Releases ownership of the fence. Pass after it has been sent to an execution queue. + * Unmanaged usage of the fence after the call will result in undefined behavior because it may + * be being used for something else. + */ + void Release(); + + /// Protects a resource with this fence. + void Protect(VKResource* resource); + + /// Removes protection for a resource. + void Unprotect(VKResource* resource); + + /// Retreives the fence. + operator vk::Fence() const { + return *handle; + } + +private: + /// Take ownership of the fence. + void Commit(); + + /** + * Updates the fence status. + * @warning Waiting for the owner might soft lock the execution. + * @param gpu_wait Wait for the fence to be signaled by the driver. + * @param owner_wait Wait for the owner to signal its freedom. + * @returns True if the fence is free. Waiting for gpu and owner will always return true. + */ + bool Tick(bool gpu_wait, bool owner_wait); + + const VKDevice& device; ///< Device handler + UniqueFence handle; ///< Vulkan fence + std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence + bool is_owned = false; ///< The fence has been commited but not released yet. + bool is_used = false; ///< The fence has been commited but it has not been checked to be free. +}; + +/** + * A fence watch is used to keep track of the usage of a fence and protect a resource or set of + * resources without having to inherit VKResource from their handlers. + */ +class VKFenceWatch final : public VKResource { +public: + explicit VKFenceWatch(); + ~VKFenceWatch(); + + /// Waits for the fence to be released. + void Wait(); + + /** + * Waits for a previous fence and watches a new one. + * @param new_fence New fence to wait to. + */ + void Watch(VKFence& new_fence); + + /** + * Checks if it's currently being watched and starts watching it if it's available. + * @returns True if a watch has started, false if it's being watched. + */ + bool TryWatch(VKFence& new_fence); + + void OnFenceRemoval(VKFence* signaling_fence) override; + +private: + VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. +}; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class VKFencedPool { +public: + explicit VKFencedPool(std::size_t grow_step); + virtual ~VKFencedPool(); + +protected: + /** + * Commits a free resource and protects it with a fence. It may allocate new resources. + * @param fence Fence that protects the commited resource. + * @returns Index of the resource commited. + */ + std::size_t CommitResource(VKFence& fence); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(std::size_t begin, std::size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + std::size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + + std::size_t grow_step = 0; ///< Number of new resources created after an overflow + std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources +}; + +/** + * The resource manager handles all resources that can be protected with a fence avoiding + * driver-side or GPU-side concurrent usage. Usage is documented in VKFence. + */ +class VKResourceManager final { +public: + explicit VKResourceManager(const VKDevice& device); + ~VKResourceManager(); + + /// Commits a fence. It has to be sent to a queue and released. + VKFence& CommitFence(); + + /// Commits an unused command buffer and protects it with a fence. + vk::CommandBuffer CommitCommandBuffer(VKFence& fence); + +private: + /// Allocates new fences. + void GrowFences(std::size_t new_fences_count); + + const VKDevice& device; ///< Device handler. + std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found. + std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences. + std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp new file mode 100644 index 000000000..ed3178f09 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -0,0 +1,81 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <optional> +#include <unordered_map> + +#include "common/assert.h" +#include "common/cityhash.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_sampler_cache.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) { + // TODO(Rodrigo): Manage integer border colors + if (color == std::array<float, 4>{0, 0, 0, 0}) { + return vk::BorderColor::eFloatTransparentBlack; + } else if (color == std::array<float, 4>{0, 0, 0, 1}) { + return vk::BorderColor::eFloatOpaqueBlack; + } else if (color == std::array<float, 4>{1, 1, 1, 1}) { + return vk::BorderColor::eFloatOpaqueWhite; + } else { + return {}; + } +} + +std::size_t SamplerCacheKey::Hash() const { + static_assert(sizeof(raw) % sizeof(u64) == 0); + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); +} + +bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { + return raw == rhs.raw; +} + +VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} + +VKSamplerCache::~VKSamplerCache() = default; + +vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) { + const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); + auto& sampler = entry->second; + if (is_cache_miss) { + sampler = CreateSampler(tsc); + } + return *sampler; +} + +UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) { + const float max_anisotropy = tsc.GetMaxAnisotropy(); + const bool has_anisotropy = max_anisotropy > 1.0f; + + const auto border_color = tsc.GetBorderColor(); + const auto vk_border_color = TryConvertBorderColor(border_color); + UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", + border_color[0], border_color[1], border_color[2], border_color[3]); + + constexpr bool unnormalized_coords = false; + + const vk::SamplerCreateInfo sampler_ci( + {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), + MaxwellToVK::Sampler::Filter(tsc.min_filter), + MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v), + MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy, + max_anisotropy, tsc.depth_compare_enabled, + MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), + tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), + unnormalized_coords); + + const auto& dld = device.GetDispatchLoader(); + const auto dev = device.GetLogical(); + return dev.createSamplerUnique(sampler_ci, nullptr, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h new file mode 100644 index 000000000..c6394dc87 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +class VKDevice; + +struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { + std::size_t Hash() const; + + bool operator==(const SamplerCacheKey& rhs) const; + + bool operator!=(const SamplerCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace Vulkan + +namespace std { + +template <> +struct hash<Vulkan::SamplerCacheKey> { + std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace Vulkan { + +class VKSamplerCache { +public: + explicit VKSamplerCache(const VKDevice& device); + ~VKSamplerCache(); + + vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc); + +private: + UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc); + + const VKDevice& device; + std::unordered_map<SamplerCacheKey, UniqueSampler> cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp new file mode 100644 index 000000000..f1fea1871 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -0,0 +1,60 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) + : device{device}, resource_manager{resource_manager} { + next_fence = &resource_manager.CommitFence(); + AllocateNewContext(); +} + +VKScheduler::~VKScheduler() = default; + +VKExecutionContext VKScheduler::GetExecutionContext() const { + return VKExecutionContext(current_fence, current_cmdbuf); +} + +VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) { + SubmitExecution(semaphore); + current_fence->Release(); + AllocateNewContext(); + return GetExecutionContext(); +} + +VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { + SubmitExecution(semaphore); + current_fence->Wait(); + current_fence->Release(); + AllocateNewContext(); + return GetExecutionContext(); +} + +void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { + const auto& dld = device.GetDispatchLoader(); + current_cmdbuf.end(dld); + + const auto queue = device.GetGraphicsQueue(); + const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1u : 0u, + &semaphore); + queue.submit({submit_info}, *current_fence, dld); +} + +void VKScheduler::AllocateNewContext() { + current_fence = next_fence; + current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); + next_fence = &resource_manager.CommitFence(); + + const auto& dld = device.GetDispatchLoader(); + current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h new file mode 100644 index 000000000..cfaf5376f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -0,0 +1,69 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKExecutionContext; +class VKFence; +class VKResourceManager; + +/// The scheduler abstracts command buffer and fence management with an interface that's able to do +/// OpenGL-like operations on Vulkan command buffers. +class VKScheduler { +public: + explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); + ~VKScheduler(); + + /// Gets the current execution context. + [[nodiscard]] VKExecutionContext GetExecutionContext() const; + + /// Sends the current execution context to the GPU. It invalidates the current execution context + /// and returns a new one. + VKExecutionContext Flush(vk::Semaphore semaphore = nullptr); + + /// Sends the current execution context to the GPU and waits for it to complete. It invalidates + /// the current execution context and returns a new one. + VKExecutionContext Finish(vk::Semaphore semaphore = nullptr); + +private: + void SubmitExecution(vk::Semaphore semaphore); + + void AllocateNewContext(); + + const VKDevice& device; + VKResourceManager& resource_manager; + vk::CommandBuffer current_cmdbuf; + VKFence* current_fence = nullptr; + VKFence* next_fence = nullptr; +}; + +class VKExecutionContext { + friend class VKScheduler; + +public: + VKExecutionContext() = default; + + VKFence& GetFence() const { + return *fence; + } + + vk::CommandBuffer GetCommandBuffer() const { + return cmdbuf; + } + +private: + explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf) + : fence{fence}, cmdbuf{cmdbuf} {} + + VKFence* fence{}; + vk::CommandBuffer cmdbuf; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..58ffa42f2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,90 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <optional> +#include <vector> + +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) + : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ + pipeline_stage} { + CreateBuffers(memory_manager, usage); + ReserveWatches(WATCHES_INITIAL_RESERVE); +} + +VKStreamBuffer::~VKStreamBuffer() = default; + +std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { + ASSERT(size <= buffer_size); + mapped_size = size; + + if (offset + size > buffer_size) { + // The buffer would overflow, save the amount of used buffers, signal an invalidation and + // reset the state. + invalidation_mark = used_watches; + used_watches = 0; + offset = 0; + } + + return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; +} + +VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); + + if (invalidation_mark) { + // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. + exctx = scheduler.Flush(); + std::for_each(watches.begin(), watches.begin() + *invalidation_mark, + [&](auto& resource) { resource->Wait(); }); + invalidation_mark = std::nullopt; + } + + if (used_watches + 1 >= watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(WATCHES_RESERVE_CHUNK); + } + // Add a watch for this allocation. + watches[used_watches++]->Watch(exctx.GetFence()); + + offset += size; + + return exctx; +} + +void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, + nullptr); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); + commit = memory_manager.Commit(*buffer, true); + mapped_pointer = commit->GetData(); +} + +void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { + const std::size_t previous_size = watches.size(); + watches.resize(previous_size + grow_size); + std::generate(watches.begin() + previous_size, watches.end(), + []() { return std::make_unique<VKFenceWatch>(); }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..69d036ccd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,72 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <tuple> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKFenceWatch; +class VKResourceManager; +class VKScheduler; + +class VKStreamBuffer { +public: + explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + ~VKStreamBuffer(); + + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer + * offset and a boolean that's true when buffer has been invalidated. + */ + std::tuple<u8*, u64, bool> Reserve(u64 size); + + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); + + vk::Buffer GetBuffer() const { + return *buffer; + } + +private: + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + + /// Increases the amount of watches available. + void ReserveWatches(std::size_t grow_size); + + const VKDevice& device; ///< Vulkan device manager. + VKScheduler& scheduler; ///< Command scheduler. + const u64 buffer_size; ///< Total size of the stream buffer. + const vk::AccessFlags access; ///< Access usage of this stream buffer. + const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. + + UniqueBuffer buffer; ///< Mapped buffer. + VKMemoryCommit commit; ///< Memory commit. + u8* mapped_pointer{}; ///< Pointer to the host visible commit + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + + std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches + std::size_t used_watches{}; ///< Count of watches, reset on invalidation. + std::optional<std::size_t> + invalidation_mark{}; ///< Number of watches used in the current invalidation. +}; + +} // namespace Vulkan |
