diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9_types.h | 85 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 2 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 173 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 2 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 10 |
9 files changed, 168 insertions, 124 deletions
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 70030066a..d7e749485 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); + ASSERT(!current_frame_info.segment_enabled); uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 87eafdb03..3b1ed4b3a 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -22,7 +22,7 @@ struct Vp9FrameDimensions { }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); -enum FrameFlags : u32 { +enum class FrameFlags : u32 { IsKeyFrame = 1 << 0, LastFrameIsKeyFrame = 1 << 1, FrameSizeChanged = 1 << 2, @@ -30,6 +30,7 @@ enum FrameFlags : u32 { LastShowFrame = 1 << 4, IntraOnly = 1 << 5, }; +DECLARE_ENUM_FLAG_OPERATORS(FrameFlags) enum class TxSize { Tx4x4 = 0, // 4x4 transform @@ -92,44 +93,34 @@ struct Vp9EntropyProbs { static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame; - bool intra_only; - bool last_frame_was_key; - bool frame_size_changed; - bool error_resilient_mode; - bool last_frame_shown; - bool show_frame; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; std::array<s8, 4> ref_frame_sign_bias; s32 base_q_index; s32 y_dc_delta_q; s32 uv_dc_delta_q; s32 uv_ac_delta_q; - bool lossless; s32 transform_mode; - bool allow_high_precision_mv; s32 interp_filter; s32 reference_mode; - s8 comp_fixed_ref; - std::array<s8, 2> comp_var_ref; s32 log2_tile_cols; s32 log2_tile_rows; - bool segment_enabled; - bool segment_map_update; - bool segment_map_temporal_update; - s32 segment_abs_delta; - std::array<u32, 8> segment_feature_enable; - std::array<std::array<s16, 4>, 8> segment_feature_data; - bool mode_ref_delta_enabled; - bool use_prev_in_find_mv_refs; std::array<s8, 4> ref_deltas; std::array<s8, 2> mode_deltas; Vp9EntropyProbs entropy; Vp9FrameDimensions frame_size; u8 first_level; u8 sharpness_level; - u32 bitstream_size; - std::array<u64, 4> frame_offsets; - std::array<bool, 4> refresh_frame; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + bool lossless; + bool allow_high_precision_mv; + bool segment_enabled; + bool mode_ref_delta_enabled; }; struct Vp9FrameContainer { @@ -145,7 +136,7 @@ struct PictureInfo { Vp9FrameDimensions golden_frame_size; ///< 0x50 Vp9FrameDimensions alt_frame_size; ///< 0x58 Vp9FrameDimensions current_frame_size; ///< 0x60 - u32 vp9_flags; ///< 0x68 + FrameFlags vp9_flags; ///< 0x68 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C u8 first_level; ///< 0x70 u8 sharpness_level; ///< 0x71 @@ -158,60 +149,43 @@ struct PictureInfo { u8 allow_high_precision_mv; ///< 0x78 u8 interp_filter; ///< 0x79 u8 reference_mode; ///< 0x7A - s8 comp_fixed_ref; ///< 0x7B - std::array<s8, 2> comp_var_ref; ///< 0x7C + INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B u8 log2_tile_cols; ///< 0x7E u8 log2_tile_rows; ///< 0x7F Segmentation segmentation; ///< 0x80 LoopFilter loop_filter; ///< 0xE4 - INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB - u32 surface_params; ///< 0xF0 - INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 + INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB [[nodiscard]] Vp9PictureInfo Convert() const { return { - .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, - .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, - .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, - .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, - .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, - .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, - .show_frame = true, + .bitstream_size = bitstream_size, + .frame_offsets{}, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, .uv_dc_delta_q = uv_dc_delta_q, .uv_ac_delta_q = uv_ac_delta_q, - .lossless = lossless != 0, .transform_mode = tx_mode, - .allow_high_precision_mv = allow_high_precision_mv != 0, .interp_filter = interp_filter, .reference_mode = reference_mode, - .comp_fixed_ref = comp_fixed_ref, - .comp_var_ref = comp_var_ref, .log2_tile_cols = log2_tile_cols, .log2_tile_rows = log2_tile_rows, - .segment_enabled = segmentation.enabled != 0, - .segment_map_update = segmentation.update_map != 0, - .segment_map_temporal_update = segmentation.temporal_update != 0, - .segment_abs_delta = segmentation.abs_delta, - .segment_feature_enable = segmentation.feature_mask, - .segment_feature_data = segmentation.feature_data, - .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, - .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && - !(vp9_flags == (FrameFlags::FrameSizeChanged)) && - !(vp9_flags == (FrameFlags::IntraOnly)) && - (vp9_flags == (FrameFlags::LastShowFrame)) && - !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, - .bitstream_size = bitstream_size, - .frame_offsets{}, - .refresh_frame{}, + .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame), + .intra_only = True(vp9_flags & FrameFlags::IntraOnly), + .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame), + .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode), + .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame), + .show_frame = true, + .lossless = lossless != 0, + .allow_high_precision_mv = allow_high_precision_mv != 0, + .segment_enabled = segmentation.enabled != 0, + .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, }; } }; @@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48); ASSERT_POSITION(first_level, 0x70); ASSERT_POSITION(segmentation, 0x80); ASSERT_POSITION(loop_filter, 0xE4); -ASSERT_POSITION(surface_params, 0xF0); #undef ASSERT_POSITION #define ASSERT_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 7f50ac2f8..d03d480b4 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -6,7 +6,7 @@ #include <array> #include <bitset> -#include <xbyak.h> +#include <xbyak/xbyak.h> #include "common/bit_field.h" #include "common/common_types.h" #include "common/x64/xbyak_abi.h" diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 882eff880..c60ed6453 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -463,6 +463,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( ++page_index; page_offset = 0; remaining_size -= num_bytes; + old_page_addr = page_addr; } split(); return result; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a37ca1fdf..f316c4f92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, - .support_int8 = true, + .support_int8 = device.IsInt8Supported(), .support_int16 = device.IsShaderInt16Supported(), .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index d2c4a7fcf..24e943e4c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -76,56 +76,31 @@ template <bool TO_LINEAR> void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { switch (bytes_per_pixel) { - case 1: - return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height, +#define BPP_CASE(x) \ + case x: \ + return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \ block_depth, stride_alignment); - case 2: - return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 3: - return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 4: - return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 6: - return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 8: - return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 12: - return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 16: - return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE default: UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); } } -} // Anonymous namespace - -void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, - u32 stride_alignment) { - Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, - stride_alignment); -} - -void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, - u32 height, u32 depth, u32 block_height, u32 block_depth, - u32 stride_alignment) { - Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, - stride_alignment); -} +template <u32 BYTES_PER_PIXEL> void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y) { + u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, + u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs = - (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; + (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; for (u32 line = 0; line < subrect_height; ++line) { const u32 dst_y = line + offset_y; const u32 gob_address_y = @@ -135,20 +110,21 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = - gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; - const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; - const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; + gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; + const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X]; + const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; const u8* const source_line = unswizzled_data + unswizzled_offset; u8* const dest_addr = swizzled_data + swizzled_offset; - std::memcpy(dest_addr, source_line, bytes_per_pixel); + std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); } } } -void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, - u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { - const u32 stride = width * bytes_per_pixel; +template <u32 BYTES_PER_PIXEL> +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, + u32 origin_x, u32 origin_y, u8* output, const u8* input) { + const u32 stride = width * BYTES_PER_PIXEL; const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); @@ -163,24 +139,25 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, const u32 src_offset_y = (block_y >> block_height) * block_size + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); for (u32 column = 0; column < line_length_in; ++column) { - const u32 src_x = (column + origin_x) * bytes_per_pixel; + const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; - const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel; + const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; - std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel); + std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); } } } +template <u32 BYTES_PER_PIXEL> void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, - u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, - u32 origin_y, u8* output, const u8* input) { + u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, + const u8* input) { UNIMPLEMENTED_IF(origin_x > 0); UNIMPLEMENTED_IF(origin_y > 0); - const u32 stride = width * bytes_per_pixel; + const u32 stride = width * BYTES_PER_PIXEL; const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); @@ -195,11 +172,93 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt for (u32 x = 0; x < line_length_in; ++x) { const u32 dst_offset = ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; - const u32 src_offset = x * bytes_per_pixel + line * pitch; - std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); + const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; + std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); } } } +} // Anonymous namespace + +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); +} + +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); +} + +void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ + swizzled_data, unswizzled_data, block_height_bit, offset_x, \ + offset_y); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} + +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ + origin_x, origin_y, output, input); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} + +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \ + block_height, block_depth, origin_x, origin_y, output, \ + input); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, @@ -220,7 +279,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 u8* dest_addr = swizzle_data + swizzled_offset; count++; - std::memcpy(dest_addr, source_line, 1); + *dest_addr = *source_line; } } } diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 1a9399455..7994cb859 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -159,7 +159,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); return {raw, raw}; } else { const Tegra::Texture::TextureHandle handle{raw}; - return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + return {handle.tic_id, handle.tsc_id}; } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8e56a89e1..86ca4be54 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -368,18 +368,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, demote); - VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; - if (is_float16_supported) { - float16_int8 = { + if (is_int8_supported || is_float16_supported) { + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, .pNext = nullptr, - .shaderFloat16 = true, - .shaderInt8 = false, + .shaderFloat16 = is_float16_supported, + .shaderInt8 = is_int8_supported, }; SetNext(next, float16_int8); - } else { + } + if (!is_float16_supported) { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); } + if (!is_int8_supported) { + LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); + } if (!nv_viewport_swizzle) { LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); @@ -909,6 +912,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { physical.GetFeatures2KHR(features); is_float16_supported = float16_int8_features.shaderFloat16; + is_int8_supported = float16_int8_features.shaderInt8; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c19f40746..234d74129 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -139,11 +139,16 @@ public: return is_optimal_astc_supported; } - /// Returns true if the device supports float16 natively + /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { return is_float16_supported; } + /// Returns true if the device supports int8 natively. + bool IsInt8Supported() const { + return is_int8_supported; + } + /// Returns true if the device warp size can potentially be bigger than guest's warp size. bool IsWarpSizePotentiallyBiggerThanGuest() const { return is_warp_potentially_bigger; @@ -367,7 +372,8 @@ private: u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_float16_supported{}; ///< Support for float16 arithmetic. + bool is_int8_supported{}; ///< Support for int8 arithmetic. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool is_depth_bounds_supported{}; ///< Support for depth bounds. |
