From 15c0c213b1efb63f1d6f4900409fca8c8984e973 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:07:53 -0400 Subject: astc.h: Move data to cpp implementation Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation. --- src/video_core/textures/astc.cpp | 86 +++++++++++++++++++++++++++++----------- src/video_core/textures/astc.h | 41 ------------------- 2 files changed, 63 insertions(+), 64 deletions(-) (limited to 'src/video_core/textures') diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 3ab500760..26c19d75b 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -521,35 +521,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, - u32 blockHeight) { - // Don't actually care about the void extent, just read the bits... - for (s32 i = 0; i < 4; ++i) { - strm.ReadBits<13>(); +// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] +// is the same as [(num_bits - 1):0] and repeats all the way down. +template +static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { + if (num_bits == 0 || to_bit == 0) { + return 0; } - - // Decode the RGBA components and renormalize them to the range [0, 255] - u16 r = static_cast(strm.ReadBits<16>()); - u16 g = static_cast(strm.ReadBits<16>()); - u16 b = static_cast(strm.ReadBits<16>()); - u16 a = static_cast(strm.ReadBits<16>()); - - u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | - (static_cast(a) & 0xFF00) << 16; - - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = rgba; + const IntType v = val & static_cast((1 << num_bits) - 1); + IntType res = v; + u32 reslen = num_bits; + while (reslen < to_bit) { + u32 comp = 0; + if (num_bits > to_bit - reslen) { + u32 newshift = to_bit - reslen; + comp = num_bits - newshift; + num_bits = newshift; } + res = static_cast(res << num_bits); + res = static_cast(res | (v >> comp)); + reslen += num_bits; } + return res; } -static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; - } +static constexpr std::size_t NumReplicateEntries(u32 num_bits) { + return std::size_t(1) << num_bits; +} + +template +static constexpr auto MakeReplicateTable() { + std::array table{}; + for (IntType value = 0; value < static_cast(std::size(table)); ++value) { + table[value] = Replicate(value, num_bits, to_bit); } + return table; } static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); @@ -572,6 +578,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback /// to the runtime implementation static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { @@ -1316,6 +1325,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } +static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast(strm.ReadBits<16>()); + u16 g = static_cast(strm.ReadBits<16>()); + u16 b = static_cast(strm.ReadBits<16>()); + u16 a = static_cast(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | + (static_cast(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} + static void DecompressBlock(std::span inBuf, const u32 blockWidth, const u32 blockHeight, std::span outBuf) { InputBitStream strm(inBuf); diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 0229ae122..9e148afc4 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -79,47 +79,6 @@ constexpr std::array MakeEncodedValues() { constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); -// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] -// is the same as [(num_bits - 1):0] and repeats all the way down. -template -constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { - if (num_bits == 0 || to_bit == 0) { - return 0; - } - const IntType v = val & static_cast((1 << num_bits) - 1); - IntType res = v; - u32 reslen = num_bits; - while (reslen < to_bit) { - u32 comp = 0; - if (num_bits > to_bit - reslen) { - u32 newshift = to_bit - reslen; - comp = num_bits - newshift; - num_bits = newshift; - } - res = static_cast(res << num_bits); - res = static_cast(res | (v >> comp)); - reslen += num_bits; - } - return res; -} - -constexpr std::size_t NumReplicateEntries(u32 num_bits) { - return std::size_t(1) << num_bits; -} - -template -constexpr auto MakeReplicateTable() { - std::array table{}; - for (IntType value = 0; value < static_cast(std::size(table)); ++value) { - table[value] = Replicate(value, num_bits, to_bit); - } - return table; -} - -constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); -- cgit v1.2.3 From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:48:41 -0400 Subject: astc_decoder: Optimize the use EncodingData This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea. --- src/video_core/host_shaders/astc_decoder.comp | 50 ++++++++-------- src/video_core/renderer_opengl/util_shaders.cpp | 13 ++-- src/video_core/renderer_opengl/util_shaders.h | 1 - src/video_core/renderer_vulkan/vk_compute_pass.cpp | 42 +++---------- src/video_core/textures/astc.cpp | 70 ++++++++++++++++++++++ src/video_core/textures/astc.h | 70 ---------------------- 6 files changed, 108 insertions(+), 138 deletions(-) (limited to 'src/video_core/textures') diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index c37f15bfd..7f4efa31a 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -10,18 +10,16 @@ #define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 -#define BINDING_ENC_BUFFER 1 -#define BINDING_SWIZZLE_BUFFER 2 -#define BINDING_OUTPUT_IMAGE 3 +#define BINDING_SWIZZLE_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS #define UNIFORM(n) layout(location = n) uniform -#define BINDING_SWIZZLE_BUFFER 0 -#define BINDING_INPUT_BUFFER 1 -#define BINDING_ENC_BUFFER 2 +#define BINDING_INPUT_BUFFER 0 +#define BINDING_SWIZZLE_BUFFER 1 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -64,11 +62,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint astc_data[]; }; -// ASTC Encodings data -layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { - EncodingData encoding_values[]; -}; - layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; const uint GOB_SIZE_X = 64; @@ -94,6 +87,19 @@ const int JUST_BITS = 0; const int QUINT = 1; const int TRIT = 2; +// ASTC Encodings data, sorted in ascending order based on their BitLength value +// (see GetBitLength() function) +EncodingData encoding_values[22] = EncodingData[]( + EncodingData(JUST_BITS, 0, 0, 0), EncodingData(JUST_BITS, 1, 0, 0), EncodingData(TRIT, 0, 0, 0), + EncodingData(JUST_BITS, 2, 0, 0), EncodingData(QUINT, 0, 0, 0), EncodingData(TRIT, 1, 0, 0), + EncodingData(JUST_BITS, 3, 0, 0), EncodingData(QUINT, 1, 0, 0), EncodingData(TRIT, 2, 0, 0), + EncodingData(JUST_BITS, 4, 0, 0), EncodingData(QUINT, 2, 0, 0), EncodingData(TRIT, 3, 0, 0), + EncodingData(JUST_BITS, 5, 0, 0), EncodingData(QUINT, 3, 0, 0), EncodingData(TRIT, 4, 0, 0), + EncodingData(JUST_BITS, 6, 0, 0), EncodingData(QUINT, 4, 0, 0), EncodingData(TRIT, 5, 0, 0), + EncodingData(JUST_BITS, 7, 0, 0), EncodingData(QUINT, 5, 0, 0), EncodingData(TRIT, 6, 0, 0), + EncodingData(JUST_BITS, 8, 0, 0) +); + // The following constants are expanded variants of the Replicate() // function calls corresponding to the following arguments: // value: index into the generated table @@ -596,22 +602,16 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) { for (uint i = 0; i < num_partitions; i++) { num_values += ((modes[i] >> 2) + 1) << 1; } - int range = 256; - while (--range > 0) { - EncodingData val = encoding_values[range]; + // Find the largest encoding that's within color_data_bits + // TODO(ameerj): profile with binary search + int range = 0; + while (++range < encoding_values.length()) { uint bit_length = GetBitLength(num_values, range); - if (bit_length <= color_data_bits) { - while (--range > 0) { - EncodingData newval = encoding_values[range]; - if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) { - break; - } - } - ++range; + if (bit_length > color_data_bits) { break; } } - DecodeIntegerSequence(range, num_values); + DecodeIntegerSequence(range - 1, num_values); uint out_index = 0; for (int itr = 0; itr < result_index; ++itr) { if (out_index >= num_values) { @@ -1110,10 +1110,10 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { } weight_index -= 2; if ((mode_layout != 9) && ((mode & 0x200) != 0)) { - const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31); + const int max_weights[6] = int[6](7, 8, 9, 10, 11, 12); params.max_weight = max_weights[weight_index]; } else { - const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7); + const int max_weights[6] = int[6](1, 2, 3, 4, 5, 6); params.max_weight = max_weights[weight_index]; } return params; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 37a4d1d9d..a2b264700 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -60,19 +60,15 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); - astc_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); - glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES, - 0); } UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; - static constexpr GLuint BINDING_INPUT_BUFFER = 1; - static constexpr GLuint BINDING_ENC_BUFFER = 2; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -81,7 +77,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, }; program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); @@ -103,11 +98,11 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glUniform1ui(6, params.block_height); glUniform1ui(7, params.block_height_mask); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, GL_RGBA8); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RGBA8); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 53d65f368..ef881e35f 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -62,7 +62,6 @@ private: ProgramManager& program_manager; OGLBuffer swizzle_table_buffer; - OGLBuffer astc_buffer; OGLProgram astc_decoder_program; OGLProgram block_linear_unswizzle_2d_program; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 561cf5e11..328813a57 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -30,16 +30,13 @@ namespace Vulkan { using Tegra::Texture::SWIZZLE_TABLE; -using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; -using namespace Tegra::Texture::ASTC; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; -constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; -constexpr size_t ASTC_NUM_BINDINGS = 4; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; +constexpr size_t ASTC_NUM_BINDINGS = 3; template inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ @@ -75,7 +72,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ .score = 2, }; -constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ { .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -83,13 +80,6 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -108,12 +98,12 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN constexpr DescriptorBankInfo ASTC_BANK_INFO{ .uniform_buffers = 0, - .storage_buffers = 3, + .storage_buffers = 2, .texture_buffers = 0, .image_buffers = 0, .textures = 0, .images = 1, - .score = 4, + .score = 3, }; constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ @@ -135,14 +125,6 @@ constexpr std::array .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_ENC_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -355,7 +337,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); + constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -369,11 +351,7 @@ void ASTCDecoderPass::MakeDataBuffer() { data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, - sizeof(ASTC_ENCODINGS_VALUES)); - // Tack on the swizzle table at the end of the buffer - std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, - sizeof(SWIZZLE_TABLE)); + std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { @@ -443,9 +421,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); - update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), - sizeof(SWIZZLE_TABLE)); + update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{update_descriptor_queue.UpdateData()}; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 26c19d75b..25161df1f 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -151,6 +151,76 @@ private: const IntType& m_Bits; }; +enum class IntegerEncoding { JustBits, Quint, Trit }; + +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; + + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} + + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; + } + + // Returns the number of bits required to encode num_vals values. + u32 GetBitLength(u32 num_vals) const { + u32 total_bits = num_bits * num_vals; + if (encoding == IntegerEncoding::Trit) { + total_bits += (num_vals * 8 + 4) / 5; + } else if (encoding == IntegerEncoding::Quint) { + total_bits += (num_vals * 7 + 2) / 3; + } + return total_bits; + } + + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 quint_value = 0; + u32 trit_value; + }; +}; + +// Returns a new instance of this struct that corresponds to the +// can take no more than mav_value values +static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { + while (mav_value > 0) { + u32 check = mav_value + 1; + + // Is mav_value a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); + } + + // Is mav_value of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); + } + + // Is mav_value of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); + } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + mav_value--; + } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} + +static constexpr std::array MakeEncodedValues() { + std::array encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast(i)); + } + return encodings; +} + +static constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); + namespace Tegra::Texture::ASTC { using IntegerEncodedVector = boost::container::static_vector< IntegerEncodedValue, 256, diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 9e148afc4..14d2beec0 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -9,76 +9,6 @@ namespace Tegra::Texture::ASTC { -enum class IntegerEncoding { JustBits, Quint, Trit }; - -struct IntegerEncodedValue { - constexpr IntegerEncodedValue() = default; - - constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) - : encoding{encoding_}, num_bits{num_bits_} {} - - constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { - return encoding == other.encoding && num_bits == other.num_bits; - } - - // Returns the number of bits required to encode num_vals values. - u32 GetBitLength(u32 num_vals) const { - u32 total_bits = num_bits * num_vals; - if (encoding == IntegerEncoding::Trit) { - total_bits += (num_vals * 8 + 4) / 5; - } else if (encoding == IntegerEncoding::Quint) { - total_bits += (num_vals * 7 + 2) / 3; - } - return total_bits; - } - - IntegerEncoding encoding{}; - u32 num_bits = 0; - u32 bit_value = 0; - union { - u32 quint_value = 0; - u32 trit_value; - }; -}; - -// Returns a new instance of this struct that corresponds to the -// can take no more than mav_value values -constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { - while (mav_value > 0) { - u32 check = mav_value + 1; - - // Is mav_value a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); - } - - // Is mav_value of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); - } - - // Is mav_value of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); - } - - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - mav_value--; - } - return IntegerEncodedValue(IntegerEncoding::JustBits, 0); -} - -constexpr std::array MakeEncodedValues() { - std::array encodings{}; - for (std::size_t i = 0; i < encodings.size(); ++i) { - encodings[i] = CreateEncoding(static_cast(i)); - } - return encodings; -} - -constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); -- cgit v1.2.3