From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: Reduce amount of size calculations. --- src/common/common_funcs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/common') diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..00a5698f3 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,7 @@ #pragma once +#include #include #if !defined(ARCHITECTURE_x86_64) @@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + } // namespace Common -- cgit v1.2.3 From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/common/alignment.h | 5 +++ src/video_core/engines/fermi_2d.h | 9 ++-- src/video_core/engines/maxwell_dma.h | 4 +- src/video_core/texture_cache/surface_base.cpp | 3 +- src/video_core/texture_cache/surface_params.cpp | 39 +++++++++--------- src/video_core/texture_cache/surface_params.h | 7 ++-- src/video_core/texture_cache/texture_cache.h | 3 ++ src/video_core/textures/decoders.cpp | 55 ++++++++++++++++--------- src/video_core/textures/decoders.h | 4 +- src/video_core/textures/texture.h | 9 ++-- 10 files changed, 78 insertions(+), 60 deletions(-) (limited to 'src/common') diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..3379a6967 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -19,6 +19,11 @@ constexpr T AlignDown(T value, std::size_t size) { return static_cast(value - value % size); } +template +constexpr T AlignBits(T value, T align) { + return (value + ((1 << align) - 1)) >> align << align; +} + template constexpr bool Is4KBAligned(T value) { static_assert(std::is_unsigned_v, "T must be an unsigned value."); diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..3d28afa91 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -63,18 +63,15 @@ public: } u32 BlockWidth() const { - // The block width is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - // The block depth is stored in log2 format. - return 1 << block_depth; + return block_depth; } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..522fa97dc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -59,11 +59,11 @@ public: }; u32 BlockHeight() const { - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - return 1 << block_depth; + return block_depth; } }; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ params.GetHostSizeInBytes()} { - std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); + params.block_width = std::min(block_width, 5U); + params.block_height = std::min(block_height, 5U); + params.block_depth = std::min(block_depth, 5U); params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; + params.block_width = config.memory_layout.block_width; + params.block_height = config.memory_layout.block_height; + params.block_depth = config.memory_layout.block_depth; params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.is_tiled = !config.linear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 16; - while (block_height > 1 && blocks_in_y <= block_height * 4) { - block_height >>= 1; + u32 block_height = 4; + while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { + --block_height; } return block_height; } @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return this->block_depth; } if (is_layered) { - return 1; + return 0; } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 32; - while (block_depth > 1 && depth * 2 <= block_depth) { - block_depth >>= 1; + u32 block_depth = 5; + while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { + --block_depth; } - if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { - return 16; + if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { + return 4; } return block_depth; @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } return size; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -54,12 +54,12 @@ public: constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; - for (std::size_t level = 0; level < num_levels; level++) { + for (u32 level = 0; level < num_levels; ++level) { const std::size_t width = Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); const std::size_t height = Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? depth : GetMipDepth(level); + const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); host_size_in_bytes += width * height * depth * rgb8_bpp; } } else { @@ -96,7 +96,8 @@ public: // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { - return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } /// Returns the offset in bytes in guest memory of a given mipmap level. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,6 +81,9 @@ public: if (!gpu_addr) { return {}; } + if (gpu_addr == 0x1b7ec0000) { + // __debugbreak(); + } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..f45fd175a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { std::array, N> values{}; }; -constexpr u32 gob_size_x = 64; -constexpr u32 gob_size_y = 8; -constexpr u32 gob_size_z = 1; -constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; +constexpr u32 gob_size_x_shift = 6; +constexpr u32 gob_size_y_shift = 3; +constexpr u32 gob_size_z_shift = 0; +constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; + +constexpr u32 gob_size_x = 1U << gob_size_x_shift; +constexpr u32 gob_size_y = 1U << gob_size_y_shift; +constexpr u32 gob_size_z = 1U << gob_size_z_shift; +constexpr u32 gob_size = 1U << gob_size_shift; + constexpr u32 fast_swizzle_align = 16; constexpr auto legacy_swizzle_table = SwizzleTable(); @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { + const u32 block_height_size{1U << block_height}; + const u32 block_depth_size{1U << block_depth}; if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } else { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } } @@ -249,16 +257,18 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; for (u32 line = 0; line < subrect_height; ++line) { const u32 gob_address_y = - (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[line % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 gob_address = - gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; + gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* dest_addr = swizzled_data + swizzled_offset; @@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y) { + const u32 block_height_size{1U << block_height}; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + - ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const u32 gob_address_y = + (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + + ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y2 % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; + const u32 gob_address = + gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* source_addr = swizzled_data + swizzled_offset; @@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y % gob_size_y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height; + gob_address_y + (x / gob_size_x) * gob_size * block_height_size; const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; @@ -356,9 +371,9 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); - const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); - const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); + const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); + const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); + const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..eaec9b5a5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -12,8 +12,8 @@ namespace Tegra::Texture { // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents // an small rect of (64/bytes_per_pixel)X8. -inline std::size_t GetGOBSize() { - return 512; +inline std::size_t GetGOBSizeShift() { + return 9; } /// Unswizzles a swizzled texture without changing its format. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..f22b4e7c7 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -219,20 +219,17 @@ struct TICEntry { u32 BlockWidth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_depth; + return block_depth; } bool IsTiled() const { -- cgit v1.2.3 From 94f2be5473182789ec3f6388b43fcd708a505500 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:12:35 -0400 Subject: texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth --- src/common/bit_util.h | 44 +++++++++++++++++++++++++ src/video_core/texture_cache/surface_params.cpp | 19 ++++------- 2 files changed, 50 insertions(+), 13 deletions(-) (limited to 'src/common') diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { } #endif +#ifdef _MSC_VER + +inline u32 MostSignificantBit32(const u32 value) { + unsigned long result; + _BitScanReverse(&result, value); + return static_cast(result); +} + +inline u32 MostSignificantBit64(const u64 value) { + unsigned long result; + _BitScanReverse64(&result, value); + return static_cast(result); +} + +#else + +inline u32 MostSignificantBit32(const u32 value) { + return 31U - static_cast(__builtin_clz(value)); +} + +inline u32 MostSignificantBit64(const u64 value) { + return 63U - static_cast(__builtin_clzll(value)); +} + +#endif + +inline u32 Log2Floor32(const u32 value) { + return MostSignificantBit32(value); +} + +inline u32 Log2Ceil32(const u32 value) { + const u32 log2_f = Log2Floor32(value); + return log2_f + ((value ^ (1U << log2_f)) != 0U); +} + +inline u32 Log2Floor64(const u64 value) { + return MostSignificantBit64(value); +} + +inline u32 Log2Ceil64(const u64 value) { + const u64 log2_f = static_cast(Log2Floor64(value)); + return static_cast(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); +} + } // namespace Common diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 3a47f404d..e7e671d8c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -5,6 +5,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" @@ -190,11 +191,8 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 4; - while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { - --block_height; - } - return block_height; + const u32 block_height = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height, 3U, 8U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -206,15 +204,10 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 5; - while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { - --block_depth; - } - - if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { - return 4; + const u32 block_depth = Common::Log2Ceil32(depth); + if (block_depth > 4) { + return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; } -- cgit v1.2.3 From 06c4ce86458310870abec90ada68ac393256b9b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 02:07:18 -0300 Subject: shader: Decode SUST and implement backing image functionality --- CMakeModules/GenerateSCMRev.cmake | 1 + src/common/CMakeLists.txt | 1 + src/video_core/CMakeLists.txt | 1 + src/video_core/engines/shader_bytecode.h | 66 +++++++++++++++- .../renderer_opengl/gl_shader_decompiler.cpp | 70 +++++++++++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 7 ++ src/video_core/shader/decode.cpp | 1 + src/video_core/shader/decode/image.cpp | 89 ++++++++++++++++++++++ src/video_core/shader/node.h | 42 +++++++++- src/video_core/shader/shader_ir.h | 9 +++ 10 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 src/video_core/shader/decode/image.cpp (limited to 'src/common') diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 31edeb63d..dd65cfe42 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -70,6 +70,7 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..8ae05137b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9d43f03d2..6839abe71 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -90,6 +90,7 @@ add_library(video_core STATIC shader/decode/conversion.cpp shader/decode/memory.cpp shader/decode/texture.cpp + shader/decode/image.cpp shader/decode/float_set_predicate.cpp shader/decode/integer_set_predicate.cpp shader/decode/half_set_predicate.cpp diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5b32e1249..54a1a04f9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -126,6 +126,15 @@ union Sampler { u64 value{}; }; +union Image { + Image() = default; + + constexpr explicit Image(u64 value) : value{value} {} + + BitField<36, 13, u64> index; + u64 value; +}; + } // namespace Tegra::Shader namespace std { @@ -344,6 +353,26 @@ enum class TextureMiscMode : u64 { PTP, }; +enum class SurfaceDataMode : u64 { + P = 0, + D_BA = 1, +}; + +enum class OutOfBoundsStore : u64 { + Ignore = 0, + Clamp = 1, + Trap = 2, +}; + +enum class ImageType : u64 { + Texture1D = 0, + TextureBuffer = 1, + Texture1DArray = 2, + Texture2D = 3, + Texture2DArray = 4, + Texture3D = 5, +}; + enum class IsberdMode : u64 { None = 0, Patch = 1, @@ -398,7 +427,7 @@ enum class LmemLoadCacheManagement : u64 { CV = 3, }; -enum class LmemStoreCacheManagement : u64 { +enum class StoreCacheManagement : u64 { Default = 0, CG = 1, CS = 2, @@ -811,7 +840,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, LmemStoreCacheManagement> cache_management; + BitField<44, 2, StoreCacheManagement> cache_management; } st_l; union { @@ -1294,6 +1323,35 @@ union Instruction { } } tlds; + union { + BitField<24, 2, StoreCacheManagement> cache_management; + BitField<33, 3, ImageType> image_type; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + BitField<51, 1, u64> is_immediate; + BitField<52, 1, SurfaceDataMode> mode; + + BitField<20, 3, StoreType> store_data_layout; + BitField<20, 4, u64> component_mask_selector; + + bool IsComponentEnabled(std::size_t component) const { + ASSERT(mode == SurfaceDataMode::P); + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array mask = { + 0, (R), (G), (R | G), (B), (R | B), + (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), + (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask_selector)}.test(component); + } + + StoreType GetStoreDataLayout() const { + ASSERT(mode == SurfaceDataMode::D_BA); + return store_data_layout; + } + } sust; + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1385,6 +1443,7 @@ union Instruction { Attribute attribute; Sampler sampler; + Image image; u64 value; }; @@ -1428,6 +1487,7 @@ public: TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level + SUST, // Surface Store EXIT, IPA, OUT_R, // Emit vertex/primitive @@ -1558,6 +1618,7 @@ public: Synch, Memory, Texture, + Image, FloatSet, FloatSetPredicate, IntegerSet, @@ -1703,6 +1764,7 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), + INST("11101011001-----", Id::SUST, Type::Image, "SUST"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ece386cdc..2ae2f1db2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -180,6 +180,7 @@ public: DeclareGlobalMemory(); DeclareSamplers(); DeclarePhysicalAttributeReader(); + DeclareImages(); code.AddLine("void execute_{}() {{", suffix); ++code.scope; @@ -531,6 +532,36 @@ private: code.AddNewLine(); } + void DeclareImages() { + const auto& images{ir.GetImages()}; + for (const auto& image : images) { + const std::string image_type = [&]() { + switch (image.GetType()) { + case Tegra::Shader::ImageType::Texture1D: + return "image1D"; + case Tegra::Shader::ImageType::TextureBuffer: + return "bufferImage"; + case Tegra::Shader::ImageType::Texture1DArray: + return "image1DArray"; + case Tegra::Shader::ImageType::Texture2D: + return "image2D"; + case Tegra::Shader::ImageType::Texture2DArray: + return "image2DArray"; + case Tegra::Shader::ImageType::Texture3D: + return "image3D"; + default: + UNREACHABLE(); + return "image1D"; + } + }(); + code.AddLine("layout (binding = IMAGE_BINDING_" + std::to_string(image.GetIndex()) + + ") coherent volatile writeonly uniform " + image_type + ' ' + + GetImage(image) + ';'); + } + if (!images.empty()) + code.AddNewLine(); + } + void VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { @@ -1478,6 +1509,39 @@ private: return tmp; } + std::string ImageStore(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get(operation.GetMeta())}; + + std::string expr = "imageStore("; + expr += GetImage(meta.image); + expr += ", "; + + const std::size_t coords_count{operation.GetOperandsCount()}; + expr += constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i, Type::Int); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += "), "; + + const std::size_t values_count{meta.values.size()}; + UNIMPLEMENTED_IF(values_count != 4); + expr += "vec4("; + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += "));"; + + code.AddLine(expr); + return {}; + } + std::string Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1718,6 +1782,8 @@ private: &GLSLDecompiler::TextureQueryLod, &GLSLDecompiler::TexelFetch, + &GLSLDecompiler::ImageStore, + &GLSLDecompiler::Branch, &GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PopFlowStack, @@ -1786,6 +1852,10 @@ private: return GetDeclarationWithSuffix(static_cast(sampler.GetIndex()), "sampler"); } + std::string GetImage(const Image& image) const { + return GetDeclarationWithSuffix(static_cast(image.GetIndex()), "image"); + } + void EmitIfdefIsBuffer(const Sampler& sampler) { code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex())); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 33ad9764a..97ce214b1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -935,6 +935,11 @@ private: return {}; } + Id ImageStore(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1326,6 +1331,8 @@ private: &SPIRVDecompiler::TextureQueryLod, &SPIRVDecompiler::TexelFetch, + &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::Branch, &SPIRVDecompiler::PushFlowStack, &SPIRVDecompiler::PopFlowStack, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a0554c97e..2c9ff28f2 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -169,6 +169,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, + {OpCode::Type::Image, &ShaderIR::DecodeImage}, {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..66fdf5714 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp @@ -0,0 +1,89 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +namespace { +std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + case Tegra::Shader::ImageType::TextureBuffer: + return 1; + case Tegra::Shader::ImageType::Texture1DArray: + case Tegra::Shader::ImageType::Texture2D: + return 2; + case Tegra::Shader::ImageType::Texture2DArray: + case Tegra::Shader::ImageType::Texture3D: + return 3; + } + UNREACHABLE(); + return 1; +} +} // Anonymous namespace + +u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::SUST: { + UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); + UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); + UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store + + std::vector values; + constexpr std::size_t hardcoded_size{4}; + for (std::size_t i = 0; i < hardcoded_size; ++i) { + values.push_back(GetRegister(instr.gpr0.Value() + i)); + } + + std::vector coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + ASSERT(instr.sust.is_immediate); + const auto& image{GetImage(instr.image, instr.sust.image_type)}; + MetaImage meta{image, values}; + const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; + bb.push_back(store); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { + const auto offset{static_cast(image.index.Value())}; + + // If this image has already been used, return the existing mapping. + const auto itr{std::find_if(used_images.begin(), used_images.end(), + [=](const Image& entry) { return entry.GetOffset() == offset; })}; + if (itr != used_images.end()) { + ASSERT(itr->GetType() == type); + return *itr; + } + + // Otherwise create a new mapping for this image. + const std::size_t next_index{used_images.size()}; + const Image entry{offset, next_index, type}; + return *used_images.emplace(entry).first; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3cfb911bb..8b8d83ae7 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -146,6 +146,8 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 + ImageStore, /// (MetaImage, float[N] coords) -> void + Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void PopFlowStack, /// () -> void @@ -263,6 +265,39 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; +class Image { +public: + explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) + : offset{offset}, index{index}, type{type}, is_bindless{false} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::ImageType GetType() const { + return type; + } + + bool IsBindless() const { + return is_bindless; + } + + bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + } + +private: + std::size_t offset{}; + std::size_t index{}; + Tegra::Shader::ImageType type{}; + bool is_bindless{}; +}; + struct GlobalMemoryBase { u32 cbuf_index{}; u32 cbuf_offset{}; @@ -289,8 +324,13 @@ struct MetaTexture { u32 element{}; }; +struct MetaImage { + const Image& image; + std::vector values; +}; + /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant; +using Meta = std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1b84c0672..c7f264371 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -104,6 +104,10 @@ public: return used_samplers; } + const std::set& GetImages() const { + return used_images; + } + const std::array& GetClipDistances() const { return used_clip_distances; @@ -154,6 +158,7 @@ private: u32 DecodeConversion(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); u32 DecodeTexture(NodeBlock& bb, u32 pc); + u32 DecodeImage(NodeBlock& bb, u32 pc); u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); @@ -254,6 +259,9 @@ private: Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + /// Accesses an image. + const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -329,6 +337,7 @@ private: std::set used_output_attributes; std::map used_cbufs; std::set used_samplers; + std::set used_images; std::array used_clip_distances{}; std::map used_global_memory; bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes -- cgit v1.2.3 From de982deb25f685dd8fa67680fb8dd6c627f70859 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:47:09 -0300 Subject: common/alignment: Address feedback --- src/common/alignment.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/alignment.h b/src/common/alignment.h index 3379a6967..617b14d9b 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -20,8 +20,9 @@ constexpr T AlignDown(T value, std::size_t size) { } template -constexpr T AlignBits(T value, T align) { - return (value + ((1 << align) - 1)) >> align << align; +constexpr T AlignBits(T value, std::size_t align) { + static_assert(std::is_unsigned_v, "T must be an unsigned value."); + return static_cast((value + ((1ULL << align) - 1)) >> align << align); } template -- cgit v1.2.3 From 3b9d89839dc62e9e63a3cbe9636cf85276babdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 21:10:59 -0400 Subject: texture_cache: Address Feedback --- src/common/CMakeLists.txt | 1 + src/common/binary_find.h | 21 +++++++++++++++++++++ src/common/common_funcs.h | 10 ---------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 6 ++++-- src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 +++--- src/video_core/renderer_opengl/gl_texture_cache.h | 9 ++++----- src/video_core/texture_cache/surface_base.h | 4 ++-- 7 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 src/common/binary_find.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 8ae05137b..2554add28 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -75,6 +75,7 @@ add_library(common STATIC assert.h detached_tasks.cpp detached_tasks.h + binary_find.h bit_field.h bit_util.h cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { + +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +} // namespace Common diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 00a5698f3..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -61,14 +61,4 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } -template > -ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 718703091..1bd182d98 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -103,14 +103,16 @@ constexpr std::tuple GetPrimitiveDescription(GLen /// Calculates the size of a program stream std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; - constexpr u64 key = 0xE2400FFFFF07000FULL; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { const u64 instruction = program[offset]; if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == key) { + if ((instruction & mask) == self_jumping_branch) { // End on Maxwell's "nop" instruction break; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 780526b66..08ae1a429 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -267,7 +267,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTexture(std::vector& staging_buffer) { +void CachedSurface::UploadTexture(const std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -275,7 +275,7 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { +void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); @@ -284,7 +284,7 @@ void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buff const std::size_t mip_offset = compression_type == SurfaceCompression::Converted ? params.GetConvertedMipmapOffset(level) : params.GetHostMipmapLevelOffset(level); - u8* buffer{staging_buffer.data() + mip_offset}; + const u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; switch (params.target) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e7cc66fbb..ff6ab6988 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -39,7 +39,7 @@ public: explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(std::vector& staging_buffer) override; + void UploadTexture(const std::vector& staging_buffer) override; void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { @@ -57,7 +57,7 @@ protected: View CreateViewInner(const ViewParams& view_key, bool is_proxy); private: - void UploadTextureMipmap(u32 level, std::vector& staging_buffer); + void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; @@ -72,14 +72,13 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - const bool is_proxy); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; - GLuint GetTexture() { + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eaed6545d..8ba386a8a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,7 +9,7 @@ #include #include "common/assert.h" -#include "common/common_funcs.h" +#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -191,7 +191,7 @@ private: template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture(std::vector& staging_buffer) = 0; + virtual void UploadTexture(const std::vector& staging_buffer) = 0; virtual void DownloadTexture(std::vector& staging_buffer) = 0; -- cgit v1.2.3