aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp1
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h85
-rw-r--r--src/video_core/macro/macro_jit_x64.h2
-rw-r--r--src/video_core/memory_manager.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp173
-rw-r--r--src/video_core/textures/texture.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp16
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h10
9 files changed, 168 insertions, 124 deletions
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 70030066a..d7e749485 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
+ ASSERT(!current_frame_info.segment_enabled);
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 87eafdb03..3b1ed4b3a 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -22,7 +22,7 @@ struct Vp9FrameDimensions {
};
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
-enum FrameFlags : u32 {
+enum class FrameFlags : u32 {
IsKeyFrame = 1 << 0,
LastFrameIsKeyFrame = 1 << 1,
FrameSizeChanged = 1 << 2,
@@ -30,6 +30,7 @@ enum FrameFlags : u32 {
LastShowFrame = 1 << 4,
IntraOnly = 1 << 5,
};
+DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
enum class TxSize {
Tx4x4 = 0, // 4x4 transform
@@ -92,44 +93,34 @@ struct Vp9EntropyProbs {
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
struct Vp9PictureInfo {
- bool is_key_frame;
- bool intra_only;
- bool last_frame_was_key;
- bool frame_size_changed;
- bool error_resilient_mode;
- bool last_frame_shown;
- bool show_frame;
+ u32 bitstream_size;
+ std::array<u64, 4> frame_offsets;
std::array<s8, 4> ref_frame_sign_bias;
s32 base_q_index;
s32 y_dc_delta_q;
s32 uv_dc_delta_q;
s32 uv_ac_delta_q;
- bool lossless;
s32 transform_mode;
- bool allow_high_precision_mv;
s32 interp_filter;
s32 reference_mode;
- s8 comp_fixed_ref;
- std::array<s8, 2> comp_var_ref;
s32 log2_tile_cols;
s32 log2_tile_rows;
- bool segment_enabled;
- bool segment_map_update;
- bool segment_map_temporal_update;
- s32 segment_abs_delta;
- std::array<u32, 8> segment_feature_enable;
- std::array<std::array<s16, 4>, 8> segment_feature_data;
- bool mode_ref_delta_enabled;
- bool use_prev_in_find_mv_refs;
std::array<s8, 4> ref_deltas;
std::array<s8, 2> mode_deltas;
Vp9EntropyProbs entropy;
Vp9FrameDimensions frame_size;
u8 first_level;
u8 sharpness_level;
- u32 bitstream_size;
- std::array<u64, 4> frame_offsets;
- std::array<bool, 4> refresh_frame;
+ bool is_key_frame;
+ bool intra_only;
+ bool last_frame_was_key;
+ bool error_resilient_mode;
+ bool last_frame_shown;
+ bool show_frame;
+ bool lossless;
+ bool allow_high_precision_mv;
+ bool segment_enabled;
+ bool mode_ref_delta_enabled;
};
struct Vp9FrameContainer {
@@ -145,7 +136,7 @@ struct PictureInfo {
Vp9FrameDimensions golden_frame_size; ///< 0x50
Vp9FrameDimensions alt_frame_size; ///< 0x58
Vp9FrameDimensions current_frame_size; ///< 0x60
- u32 vp9_flags; ///< 0x68
+ FrameFlags vp9_flags; ///< 0x68
std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
u8 first_level; ///< 0x70
u8 sharpness_level; ///< 0x71
@@ -158,60 +149,43 @@ struct PictureInfo {
u8 allow_high_precision_mv; ///< 0x78
u8 interp_filter; ///< 0x79
u8 reference_mode; ///< 0x7A
- s8 comp_fixed_ref; ///< 0x7B
- std::array<s8, 2> comp_var_ref; ///< 0x7C
+ INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B
u8 log2_tile_cols; ///< 0x7E
u8 log2_tile_rows; ///< 0x7F
Segmentation segmentation; ///< 0x80
LoopFilter loop_filter; ///< 0xE4
- INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
- u32 surface_params; ///< 0xF0
- INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
+ INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB
[[nodiscard]] Vp9PictureInfo Convert() const {
return {
- .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
- .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
- .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
- .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
- .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
- .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
- .show_frame = true,
+ .bitstream_size = bitstream_size,
+ .frame_offsets{},
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
.uv_dc_delta_q = uv_dc_delta_q,
.uv_ac_delta_q = uv_ac_delta_q,
- .lossless = lossless != 0,
.transform_mode = tx_mode,
- .allow_high_precision_mv = allow_high_precision_mv != 0,
.interp_filter = interp_filter,
.reference_mode = reference_mode,
- .comp_fixed_ref = comp_fixed_ref,
- .comp_var_ref = comp_var_ref,
.log2_tile_cols = log2_tile_cols,
.log2_tile_rows = log2_tile_rows,
- .segment_enabled = segmentation.enabled != 0,
- .segment_map_update = segmentation.update_map != 0,
- .segment_map_temporal_update = segmentation.temporal_update != 0,
- .segment_abs_delta = segmentation.abs_delta,
- .segment_feature_enable = segmentation.feature_mask,
- .segment_feature_data = segmentation.feature_data,
- .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
- .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
- !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
- !(vp9_flags == (FrameFlags::IntraOnly)) &&
- (vp9_flags == (FrameFlags::LastShowFrame)) &&
- !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
.ref_deltas = loop_filter.ref_deltas,
.mode_deltas = loop_filter.mode_deltas,
.entropy{},
.frame_size = current_frame_size,
.first_level = first_level,
.sharpness_level = sharpness_level,
- .bitstream_size = bitstream_size,
- .frame_offsets{},
- .refresh_frame{},
+ .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
+ .intra_only = True(vp9_flags & FrameFlags::IntraOnly),
+ .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
+ .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
+ .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
+ .show_frame = true,
+ .lossless = lossless != 0,
+ .allow_high_precision_mv = allow_high_precision_mv != 0,
+ .segment_enabled = segmentation.enabled != 0,
+ .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
};
}
};
@@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);
ASSERT_POSITION(first_level, 0x70);
ASSERT_POSITION(segmentation, 0x80);
ASSERT_POSITION(loop_filter, 0xE4);
-ASSERT_POSITION(surface_params, 0xF0);
#undef ASSERT_POSITION
#define ASSERT_POSITION(field_name, position) \
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 7f50ac2f8..d03d480b4 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -6,7 +6,7 @@
#include <array>
#include <bitset>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/x64/xbyak_abi.h"
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 882eff880..c60ed6453 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -463,6 +463,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
++page_index;
page_offset = 0;
remaining_size -= num_bytes;
+ old_page_addr = page_addr;
}
split();
return result;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a37ca1fdf..f316c4f92 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
.supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
.unified_descriptor_binding = true,
.support_descriptor_aliasing = true,
- .support_int8 = true,
+ .support_int8 = device.IsInt8Supported(),
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index d2c4a7fcf..24e943e4c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -76,56 +76,31 @@ template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
switch (bytes_per_pixel) {
- case 1:
- return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height,
+#define BPP_CASE(x) \
+ case x: \
+ return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \
block_depth, stride_alignment);
- case 2:
- return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 3:
- return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 4:
- return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 6:
- return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 8:
- return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 12:
- return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
- case 16:
- return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height,
- block_depth, stride_alignment);
+ BPP_CASE(1)
+ BPP_CASE(2)
+ BPP_CASE(3)
+ BPP_CASE(4)
+ BPP_CASE(6)
+ BPP_CASE(8)
+ BPP_CASE(12)
+ BPP_CASE(16)
+#undef BPP_CASE
default:
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
-} // Anonymous namespace
-
-void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
- u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
- u32 stride_alignment) {
- Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
- stride_alignment);
-}
-
-void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
- u32 height, u32 depth, u32 block_height, u32 block_depth,
- u32 stride_alignment) {
- Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
- stride_alignment);
-}
+template <u32 BYTES_PER_PIXEL>
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
- u32 block_height_bit, u32 offset_x, u32 offset_y) {
+ u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
+ u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs =
- (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
+ (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
@@ -135,20 +110,21 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
- gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
- const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
- const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
+ gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
+ const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X];
+ const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
const u8* const source_line = unswizzled_data + unswizzled_offset;
u8* const dest_addr = swizzled_data + swizzled_offset;
- std::memcpy(dest_addr, source_line, bytes_per_pixel);
+ std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
}
}
}
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
- u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
- const u32 stride = width * bytes_per_pixel;
+template <u32 BYTES_PER_PIXEL>
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
+ u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+ const u32 stride = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
@@ -163,24 +139,25 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
const u32 src_offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
for (u32 column = 0; column < line_length_in; ++column) {
- const u32 src_x = (column + origin_x) * bytes_per_pixel;
+ const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
- const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
+ const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
- std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
+ std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
}
}
}
+template <u32 BYTES_PER_PIXEL>
void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
- u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
- u32 origin_y, u8* output, const u8* input) {
+ u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
+ const u8* input) {
UNIMPLEMENTED_IF(origin_x > 0);
UNIMPLEMENTED_IF(origin_y > 0);
- const u32 stride = width * bytes_per_pixel;
+ const u32 stride = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -195,11 +172,93 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
for (u32 x = 0; x < line_length_in; ++x) {
const u32 dst_offset =
((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
- const u32 src_offset = x * bytes_per_pixel + line * pitch;
- std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
+ const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
+ std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
}
}
}
+} // Anonymous namespace
+
+void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
+}
+
+void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
+}
+
+void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
+ u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
+ u32 block_height_bit, u32 offset_x, u32 offset_y) {
+ switch (bytes_per_pixel) {
+#define BPP_CASE(x) \
+ case x: \
+ return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \
+ swizzled_data, unswizzled_data, block_height_bit, offset_x, \
+ offset_y);
+ BPP_CASE(1)
+ BPP_CASE(2)
+ BPP_CASE(3)
+ BPP_CASE(4)
+ BPP_CASE(6)
+ BPP_CASE(8)
+ BPP_CASE(12)
+ BPP_CASE(16)
+#undef BPP_CASE
+ default:
+ UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
+ }
+}
+
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+ u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+ switch (bytes_per_pixel) {
+#define BPP_CASE(x) \
+ case x: \
+ return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \
+ origin_x, origin_y, output, input);
+ BPP_CASE(1)
+ BPP_CASE(2)
+ BPP_CASE(3)
+ BPP_CASE(4)
+ BPP_CASE(6)
+ BPP_CASE(8)
+ BPP_CASE(12)
+ BPP_CASE(16)
+#undef BPP_CASE
+ default:
+ UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
+ }
+}
+
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+ u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
+ u32 origin_y, u8* output, const u8* input) {
+ switch (bytes_per_pixel) {
+#define BPP_CASE(x) \
+ case x: \
+ return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \
+ block_height, block_depth, origin_x, origin_y, output, \
+ input);
+ BPP_CASE(1)
+ BPP_CASE(2)
+ BPP_CASE(3)
+ BPP_CASE(4)
+ BPP_CASE(6)
+ BPP_CASE(8)
+ BPP_CASE(12)
+ BPP_CASE(16)
+#undef BPP_CASE
+ default:
+ UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
+ }
+}
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
@@ -220,7 +279,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
u8* dest_addr = swizzle_data + swizzled_offset;
count++;
- std::memcpy(dest_addr, source_line, 1);
+ *dest_addr = *source_line;
}
}
}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 1a9399455..7994cb859 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -159,7 +159,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
return {raw, raw};
} else {
const Tegra::Texture::TextureHandle handle{raw};
- return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+ return {handle.tic_id, handle.tsc_id};
}
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 8e56a89e1..86ca4be54 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -368,18 +368,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
SetNext(next, demote);
- VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
- if (is_float16_supported) {
- float16_int8 = {
+ if (is_int8_supported || is_float16_supported) {
+ VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
.pNext = nullptr,
- .shaderFloat16 = true,
- .shaderInt8 = false,
+ .shaderFloat16 = is_float16_supported,
+ .shaderInt8 = is_int8_supported,
};
SetNext(next, float16_int8);
- } else {
+ }
+ if (!is_float16_supported) {
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
}
+ if (!is_int8_supported) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively");
+ }
if (!nv_viewport_swizzle) {
LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
@@ -909,6 +912,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
physical.GetFeatures2KHR(features);
is_float16_supported = float16_int8_features.shaderFloat16;
+ is_int8_supported = float16_int8_features.shaderInt8;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (has_ext_subgroup_size_control) {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index c19f40746..234d74129 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -139,11 +139,16 @@ public:
return is_optimal_astc_supported;
}
- /// Returns true if the device supports float16 natively
+ /// Returns true if the device supports float16 natively.
bool IsFloat16Supported() const {
return is_float16_supported;
}
+ /// Returns true if the device supports int8 natively.
+ bool IsInt8Supported() const {
+ return is_int8_supported;
+ }
+
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
bool IsWarpSizePotentiallyBiggerThanGuest() const {
return is_warp_potentially_bigger;
@@ -367,7 +372,8 @@ private:
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 max_push_descriptors{}; ///< Maximum number of push descriptors
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_float16_supported{}; ///< Support for float16 arithmetic.
+ bool is_int8_supported{}; ///< Support for int8 arithmetic.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool is_depth_bounds_supported{}; ///< Support for depth bounds.