diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_base.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 2 | ||||
| -rw-r--r-- | src/video_core/surface.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/surface.h | 148 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 36 |
9 files changed, 151 insertions, 126 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index d1777b25b..6de07ea56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -121,10 +121,8 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - u32 old = regs.reg_array[method]; - regs.reg_array[method] = value; - - if (value != old) { + if (regs.reg_array[method] != value) { + regs.reg_array[method] = value; if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { dirty_flags.vertex_attrib_format = true; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6d41321fa..bcf0c15a4 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -5,6 +5,7 @@ #pragma once #include <set> +#include <unordered_map> #include <boost/icl/interval_map.hpp> #include <boost/range/iterator_range_core.hpp> @@ -88,29 +89,25 @@ public: /// Invalidates everything in the cache void InvalidateAll() { - while (object_cache.begin() != object_cache.end()) { - Unregister(*object_cache.begin()->second.begin()); + while (interval_cache.begin() != interval_cache.end()) { + Unregister(*interval_cache.begin()->second.begin()); } } protected: /// Tries to get an object from the cache with the specified address T TryGet(VAddr addr) const { - const ObjectInterval interval{addr}; - for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { - for (auto& cached_object : pair.second) { - if (cached_object->GetAddr() == addr) { - return cached_object; - } - } - } + const auto iter = map_cache.find(addr); + if (iter != map_cache.end()) + return iter->second; return nullptr; } /// Register an object into the cache void Register(const T& object) { object->SetIsRegistered(true); - object_cache.add({GetInterval(object), ObjectSet{object}}); + interval_cache.add({GetInterval(object), ObjectSet{object}}); + map_cache.insert({object->GetAddr(), object}); rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); } @@ -118,13 +115,13 @@ protected: void Unregister(const T& object) { object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit if (Settings::values.use_accurate_gpu_emulation) { FlushObject(object); } - object_cache.subtract({GetInterval(object), ObjectSet{object}}); + interval_cache.subtract({GetInterval(object), ObjectSet{object}}); + map_cache.erase(object->GetAddr()); } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -141,7 +138,7 @@ private: std::vector<T> objects; const ObjectInterval interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { + for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { for (auto& cached_object : pair.second) { if (!cached_object) { continue; @@ -167,15 +164,17 @@ private: } using ObjectSet = std::set<T>; - using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>; - using ObjectInterval = typename ObjectCache::interval_type; + using ObjectCache = std::unordered_map<VAddr, T>; + using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; + using ObjectInterval = typename IntervalCache::interval_type; static auto GetInterval(const T& object) { return ObjectInterval::right_open(object->GetAddr(), object->GetAddr() + object->GetSizeInBytes()); } - ObjectCache object_cache; ///< Cache of objects - u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing + ObjectCache map_cache; + IntervalCache interval_cache; ///< Cache of objects + u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing VideoCore::RasterizerInterface& rasterizer; }; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 0df3725c2..1482cdb40 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,7 +5,6 @@ #include "core/frontend/emu_window.h" #include "core/settings.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" namespace VideoCore { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 894f4f294..b44ecfa1c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -314,6 +314,8 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F @@ -456,6 +458,8 @@ static constexpr GLConversionArray morton_to_gl_fns = { MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_5X5>, MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, @@ -526,6 +530,8 @@ static constexpr GLConversionArray gl_to_morton_fns = { nullptr, nullptr, nullptr, + nullptr, + nullptr, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z24S8>, @@ -544,8 +550,8 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params if (params.is_layered) { u64 offset = params.GetMipmapLevelOffset(mip_level); u64 offset_gl = 0; - u64 layer_size = params.LayerMemorySize(); - u64 gl_size = params.LayerSizeGL(mip_level); + const u64 layer_size = params.LayerMemorySize(); + const u64 gl_size = params.LayerSizeGL(mip_level); for (u32 i = 0; i < params.depth; i++) { functions[static_cast<std::size_t>(params.pixel_format)]( params.MipWidth(mip_level), params.MipBlockHeight(mip_level), @@ -555,7 +561,7 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params offset_gl += gl_size; } } else { - u64 offset = params.GetMipmapLevelOffset(mip_level); + const u64 offset = params.GetMipmapLevelOffset(mip_level); functions[static_cast<std::size_t>(params.pixel_format)]( params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), @@ -709,18 +715,18 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); static void CopySurface(const Surface& src_surface, const Surface& dst_surface, - GLuint copy_pbo_handle, GLenum src_attachment = 0, - GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { + const GLuint copy_pbo_handle, const GLenum src_attachment = 0, + const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { MICROPROFILE_SCOPE(OpenGL_CopySurface); ASSERT_MSG(dst_attachment == 0, "Unimplemented"); const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); - auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); - std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); + const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); @@ -744,13 +750,10 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface, LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " "reinterpretation but the texture is tiled."); } - std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; - std::vector<u8> data(remaining_size); - std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes), - data.size()); + const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, - data.data()); + Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -932,7 +935,9 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: case PixelFormat::ASTC_2D_5X4_SRGB: - case PixelFormat::ASTC_2D_5X5_SRGB: { + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; @@ -967,7 +972,11 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm case PixelFormat::ASTC_2D_4X4: case PixelFormat::ASTC_2D_8X8: case PixelFormat::ASTC_2D_4X4_SRGB: - case PixelFormat::ASTC_2D_8X8_SRGB: { + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_5X5: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: { LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", static_cast<u32>(pixel_format)); UNREACHABLE(); @@ -1336,6 +1345,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, break; case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: AccurateCopySurface(old_surface, new_surface); break; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 2635f2b0c..98622a058 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -266,7 +266,8 @@ void OpenGLState::ApplyViewport() const { const auto& updated = viewports[0]; if (updated.x != current.x || updated.y != current.y || updated.width != current.width || updated.height != current.height) { - glViewport(updated.x, updated.y, updated.width, updated.height); + glViewport(static_cast<GLint>(updated.x), static_cast<GLint>(updated.y), + static_cast<GLsizei>(updated.width), static_cast<GLsizei>(updated.height)); } if (updated.depth_range_near != current.depth_range_near || updated.depth_range_far != current.depth_range_far) { @@ -313,7 +314,7 @@ void OpenGLState::ApplyGlobalBlending() const { } } -void OpenGLState::ApplyTargetBlending(int target, bool force) const { +void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; const Blend& current = cur_state.blend[target]; const bool blend_changed = updated.enabled != current.enabled || force; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index eacca0b9c..e5d1baae6 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -208,7 +208,7 @@ private: void ApplyPrimitiveRestart() const; void ApplyStencilTest() const; void ApplyViewport() const; - void ApplyTargetBlending(int target, bool force) const; + void ApplyTargetBlending(std::size_t target, bool force) const; void ApplyGlobalBlending() const; void ApplyBlending() const; void ApplyLogicOp() const; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 051ad3964..9582dd2ca 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -306,6 +306,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8; case Tegra::Texture::TextureFormat::ASTC_2D_8X5: return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; + case Tegra::Texture::TextureFormat::ASTC_2D_10X8: + return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; case Tegra::Texture::TextureFormat::R16_G16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -453,6 +455,8 @@ bool IsPixelFormatASTC(PixelFormat format) { case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_SRGB: return true; default: return false; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index dfdb8d122..0dd3eb2e4 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -74,19 +74,21 @@ enum class PixelFormat { ASTC_2D_5X4_SRGB = 56, ASTC_2D_5X5 = 57, ASTC_2D_5X5_SRGB = 58, + ASTC_2D_10X8 = 59, + ASTC_2D_10X8_SRGB = 60, MaxColorFormat, // Depth formats - Z32F = 59, - Z16 = 60, + Z32F = 61, + Z16 = 62, MaxDepthFormat, // DepthStencil formats - Z24S8 = 61, - S8Z24 = 62, - Z32FS8 = 63, + Z24S8 = 63, + S8Z24 = 64, + Z32FS8 = 65, MaxDepthStencilFormat, @@ -193,6 +195,8 @@ static constexpr u32 GetCompressionFactor(PixelFormat format) { 4, // ASTC_2D_5X4_SRGB 4, // ASTC_2D_5X5 4, // ASTC_2D_5X5_SRGB + 4, // ASTC_2D_10X8 + 4, // ASTC_2D_10X8_SRGB 1, // Z32F 1, // Z16 1, // Z24S8 @@ -208,70 +212,72 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 8, // ASTC_2D_8X8 - 8, // ASTC_2D_8X5 - 5, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 8, // ASTC_2D_8X5_SRGB - 5, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 8, // ASTC_2D_8X8 + 8, // ASTC_2D_8X5 + 5, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 8, // ASTC_2D_8X5_SRGB + 5, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 }}; ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); return block_width_table[static_cast<std::size_t>(format)]; @@ -341,6 +347,8 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { 4, // ASTC_2D_5X4_SRGB 5, // ASTC_2D_5X5 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB 1, // Z32F 1, // Z16 1, // Z24S8 @@ -416,6 +424,8 @@ static constexpr u32 GetFormatBpp(PixelFormat format) { 128, // ASTC_2D_5X4_SRGB 128, // ASTC_2D_5X5 128, // ASTC_2D_5X5_SRGB + 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_SRGB 32, // Z32F 16, // Z16 32, // Z24S8 diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 3066abf61..19f30b1b5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -45,7 +45,7 @@ constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>(); * Instead of going gob by gob, we map the coordinates inside a block and manage from * those. Block_Width is assumed to be 1. */ -void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, +void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end, const u32 z_end, const u32 tile_offset, const u32 xy_block_size, const u32 layer_z, const u32 stride_x, @@ -81,7 +81,7 @@ void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unsw * Instead of going gob by gob, we map the coordinates inside a block and manage from * those. Block_Width is assumed to be 1. */ -void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, +void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, const u32 y_end, const u32 z_end, const u32 tile_offset, const u32 xy_block_size, const u32 layer_z, const u32 stride_x, @@ -90,10 +90,10 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz u32 z_address = tile_offset; const u32 x_startb = x_start * bytes_per_pixel; const u32 x_endb = x_end * bytes_per_pixel; - const u32 copy_size = 16; - const u32 gob_size_x = 64; - const u32 gob_size_y = 8; - const u32 gob_size_z = 1; + constexpr u32 copy_size = 16; + constexpr u32 gob_size_x = 64; + constexpr u32 gob_size_y = 8; + constexpr u32 gob_size_z = 1; const u32 gob_size = gob_size_x * gob_size_y * gob_size_z; for (u32 z = z_start; z < z_end; z++) { u32 y_address = z_address; @@ -126,23 +126,23 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces */ template <bool fast> -void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width, - const u32 height, const u32 depth, const u32 bytes_per_pixel, +void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, + const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) { auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 stride_x = width * out_bytes_per_pixel; const u32 layer_z = height * stride_x; - const u32 gob_x_bytes = 64; + constexpr u32 gob_x_bytes = 64; const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel; - const u32 gob_elements_y = 8; - const u32 gob_elements_z = 1; + constexpr u32 gob_elements_y = 8; + constexpr u32 gob_elements_z = 1; const u32 block_x_elements = gob_elements_x; const u32 block_y_elements = gob_elements_y * block_height; const u32 block_z_elements = gob_elements_z * block_depth; const u32 blocks_on_x = div_ceil(width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z; + constexpr u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z; const u32 xy_block_size = gob_size * block_height; const u32 block_size = xy_block_size * block_depth; u32 tile_offset = 0; @@ -171,7 +171,7 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, } void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth) { if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, @@ -202,6 +202,8 @@ u32 BytesPerPixel(TextureFormat format) { case TextureFormat::ASTC_2D_5X4: case TextureFormat::ASTC_2D_8X8: case TextureFormat::ASTC_2D_8X5: + case TextureFormat::ASTC_2D_10X8: + case TextureFormat::ASTC_2D_5X5: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::BF10GF11RF11: @@ -294,6 +296,8 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::BC6H_SF16: case TextureFormat::ASTC_2D_4X4: case TextureFormat::ASTC_2D_8X8: + case TextureFormat::ASTC_2D_5X5: + case TextureFormat::ASTC_2D_10X8: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A1B5G5R5: @@ -321,9 +325,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 gobs_in_x = 64; - const u32 gobs_in_y = 8; - const u32 gobs_in_z = 1; + constexpr u32 gobs_in_x = 64; + constexpr u32 gobs_in_y = 8; + constexpr u32 gobs_in_z = 1; const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x); const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height); const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth); |
