diff options
Diffstat (limited to 'src/video_core')
27 files changed, 336 insertions, 335 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c911c6ec4..45d8eaf23 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -180,3 +180,9 @@ target_link_libraries(video_core PRIVATE glad) if (ENABLE_VULKAN) target_link_libraries(video_core PRIVATE sirit) endif() + +if (MSVC) + target_compile_options(video_core PRIVATE /we4267) +else() + target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion) +endif() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 63b3a8205..4408b5001 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,6 +12,10 @@ #include <utility> #include <vector> +#include <boost/icl/interval_map.hpp> +#include <boost/icl/interval_set.hpp> +#include <boost/range/iterator_range.hpp> + #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2bed6cb38..42ce49a4d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -261,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 executing_macro = 0; // Lookup the macro offset - const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); + const u32 entry = + ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8f6bc76eb..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -616,6 +616,14 @@ union Instruction { } shfl; union { + BitField<44, 1, u64> ftz; + BitField<39, 2, u64> tab5cb8_2; + BitField<38, 1, u64> ndv; + BitField<47, 1, u64> cc; + BitField<28, 8, u64> swizzle; + } fswzadd; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -1478,7 +1486,8 @@ union Instruction { u32 value = static_cast<u32>(target); // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; + return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + + 1; } } bra; @@ -1492,7 +1501,8 @@ union Instruction { u32 value = static_cast<u32>(target); // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; + return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + + 1; } } brx; @@ -1590,6 +1600,7 @@ public: DEPBAR, VOTE, SHFL, + FSWZADD, BFE_C, BFE_R, BFE_IMM, @@ -1851,11 +1862,11 @@ private: const std::size_t bit_position = opcode_bitsize - i - 1; switch (bitstring[i]) { case '0': - mask |= 1 << bit_position; + mask |= static_cast<u16>(1U << bit_position); break; case '1': - expect |= 1 << bit_position; - mask |= 1 << bit_position; + expect |= static_cast<u16>(1U << bit_position); + mask |= static_cast<u16>(1U << bit_position); break; default: // Ignore @@ -1888,6 +1899,7 @@ private: INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), + INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c65b24c69..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -62,6 +62,7 @@ Device::Device() { max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; + has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_variable_aoffi = TestVariableAoffi(); @@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_warp_intrinsics = true; + has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; has_variable_aoffi = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bf35bd0b6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_warp_intrinsics; } + bool HasShaderBallot() const { + return has_shader_ballot; + } + bool HasVertexViewportLayer() const { return has_vertex_viewport_layer; } @@ -68,6 +72,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_warp_intrinsics{}; + bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_variable_aoffi{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0e9338ab6..05f8e511b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -374,7 +374,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); fbkey.colors[index] = std::move(color_surface); } - fbkey.colors_count = regs.rt_control.count; + fbkey.colors_count = static_cast<u16>(regs.rt_control.count); if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f1b89165d..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy std::string source = fmt::format(R"(// {} #version 430 core #extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shader_viewport_layer_array : enable -#extension GL_EXT_shader_image_load_formatted : enable -#extension GL_NV_gpu_shader5 : enable -#extension GL_NV_shader_thread_group : enable -#extension GL_NV_shader_thread_shuffle : enable )", GetShaderId(unique_identifier, program_type)); if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } + if (device.HasShaderBallot()) { + source += "#extension GL_ARB_shader_ballot : require\n"; + } + if (device.HasVertexViewportLayer()) { + source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; + } + if (device.HasImageLoadFormatted()) { + source += "#extension GL_EXT_shader_image_load_formatted : require\n"; + } + if (device.HasWarpIntrinsics()) { + source += "#extension GL_NV_gpu_shader5 : require\n" + "#extension GL_NV_shader_thread_group : require\n" + "#extension GL_NV_shader_thread_shuffle : require\n"; + } source += '\n'; if (!is_compute) { @@ -394,7 +403,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, params.disk_cache.SaveRaw(ShaderDiskCacheRaw( params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); - ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); + ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)), + params.system.GPU().Maxwell3D()); const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); // TODO(Rodrigo): Handle VertexA shaders // std::optional<ShaderIR> ir_b; @@ -410,7 +420,8 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog params.disk_cache.SaveRaw( ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); - ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); + ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, + params.system.GPU().KeplerCompute()); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); return std::shared_ptr<CachedShader>(new CachedShader( params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ce857d8dc..4f2b49170 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1379,6 +1379,26 @@ private: return GenerateUnary(operation, "float", Type::Float, type); } + Expression FSwizzleAdd(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsFloat(); + const std::string op_b = VisitOperand(operation, 1).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {fmt::format("{} + {}", op_a, op_b), Type::Float}; + } + + const std::string instr_mask = VisitOperand(operation, 2).AsUint(); + const std::string mask = code.GenerateTemporary(); + code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, + instr_mask); + + const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); + const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); + return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), + Type::Float}; + } + Expression ICastFloat(Operation operation) { return GenerateUnary(operation, "int", Type::Int, Type::Float); } @@ -1670,7 +1690,7 @@ private: const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; return {GenerateTexture(operation, "Gather", - {TextureArgument{type, meta->component}, TextureAoffi{}}) + + {TextureAoffi{}, TextureArgument{type, meta->component}}) + GetSwizzle(meta->element), Type::Float}; } @@ -1936,34 +1956,24 @@ private: return Vote(operation, "allThreadsEqualNV"); } - template <const std::string_view& func> - Expression Shuffle(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsFloat(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); - // On a "single-thread" device we are either on the same thread or out of bounds. Both - // cases return the passed value. - return {value, Type::Float}; + Expression ThreadId(Operation operation) { + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {"0U", Type::Uint}; } - - const std::string index = VisitOperand(operation, 1).AsUint(); - const std::string width = VisitOperand(operation, 2).AsUint(); - return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + return {"gl_SubGroupInvocationARB", Type::Uint}; } - template <const std::string_view& func> - Expression InRangeShuffle(Operation operation) { - const std::string index = VisitOperand(operation, 0).AsUint(); - const std::string width = VisitOperand(operation, 1).AsUint(); - if (!device.HasWarpIntrinsics()) { - // On a "single-thread" device we are only in bounds when the requested index is 0. - return {fmt::format("({} == 0U)", index), Type::Bool}; + Expression ShuffleIndexed(Operation operation) { + std::string value = VisitOperand(operation, 0).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {std::move(value), Type::Float}; } - const std::string in_range = code.GenerateTemporary(); - code.AddLine("bool {};", in_range); - code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); - return {in_range, Type::Bool}; + const std::string index = VisitOperand(operation, 1).AsUint(); + return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } struct Func final { @@ -1975,11 +1985,6 @@ private: static constexpr std::string_view Or = "Or"; static constexpr std::string_view Xor = "Xor"; static constexpr std::string_view Exchange = "Exchange"; - - static constexpr std::string_view ShuffleIndexed = "shuffleNV"; - static constexpr std::string_view ShuffleUp = "shuffleUpNV"; - static constexpr std::string_view ShuffleDown = "shuffleDownNV"; - static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; }; static constexpr std::array operation_decompilers = { @@ -2010,6 +2015,7 @@ private: &GLSLDecompiler::FTrunc, &GLSLDecompiler::FCastInteger<Type::Int>, &GLSLDecompiler::FCastInteger<Type::Uint>, + &GLSLDecompiler::FSwizzleAdd, &GLSLDecompiler::Add<Type::Int>, &GLSLDecompiler::Mul<Type::Int>, @@ -2145,15 +2151,8 @@ private: &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, - &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::Shuffle<Func::ShuffleUp>, - &GLSLDecompiler::Shuffle<Func::ShuffleDown>, - &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, - - &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, + &GLSLDecompiler::ThreadId, + &GLSLDecompiler::ShuffleIndexed, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2486,6 +2485,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { bvec2 is_nan2 = isnan(pair2); return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); } + +const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); )"; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4bbd17b12..7646cbb0e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -323,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, // (e.g. handheld mode) on a 1920x1080 framebuffer. f32 scale_u = 1.f, scale_v = 1.f; if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width; + scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / + static_cast<f32>(screen_info.texture.width); } if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height; + scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / + static_cast<f32>(screen_info.texture.height); } std::array<ScreenRectVertex, 4> vertices = {{ diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -783,6 +783,11 @@ private: return {}; } + Id FSwizzleAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HNegate(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1195,42 +1200,12 @@ private: return {}; } - Id ShuffleIndexed(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleButterfly(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleIndexed(Operation) { + Id ThreadId(Operation) { UNIMPLEMENTED(); return {}; } - Id InRangeShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleButterfly(Operation) { + Id ShuffleIndexed(Operation) { UNIMPLEMENTED(); return {}; } @@ -1393,6 +1368,7 @@ private: &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, + &SPIRVDecompiler::FSwizzleAdd, &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, @@ -1528,15 +1504,8 @@ private: &SPIRVDecompiler::VoteAny, &SPIRVDecompiler::VoteEqual, + &SPIRVDecompiler::ThreadId, &SPIRVDecompiler::ShuffleIndexed, - &SPIRVDecompiler::ShuffleUp, - &SPIRVDecompiler::ShuffleDown, - &SPIRVDecompiler::ShuffleButterfly, - - &SPIRVDecompiler::InRangeShuffleIndexed, - &SPIRVDecompiler::InRangeShuffleUp, - &SPIRVDecompiler::InRangeShuffleDown, - &SPIRVDecompiler::InRangeShuffleButterfly, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d47c63d9f..b427ac873 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -16,7 +16,9 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { + namespace { + using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -68,15 +70,15 @@ struct CFGRebuildState { const ProgramCode& program_code; ConstBufferLocker& locker; u32 start{}; - std::vector<BlockInfo> block_info{}; - std::list<u32> inspect_queries{}; - std::list<Query> queries{}; - std::unordered_map<u32, u32> registered{}; - std::set<u32> labels{}; - std::map<u32, u32> ssy_labels{}; - std::map<u32, u32> pbk_labels{}; - std::unordered_map<u32, BlockStack> stacks{}; - ASTManager* manager; + std::vector<BlockInfo> block_info; + std::list<u32> inspect_queries; + std::list<Query> queries; + std::unordered_map<u32, u32> registered; + std::set<u32> labels; + std::map<u32, u32> ssy_labels; + std::map<u32, u32> pbk_labels; + std::unordered_map<u32, BlockStack> stacks; + ASTManager* manager{}; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -109,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { } Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(index + (negated ? 8 : 0)); + return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); } /** @@ -136,15 +138,13 @@ struct BranchIndirectInfo { s32 relative_position{}; }; -std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, - u32 start_address, u32 current_position) { - const u32 shader_start = state.start; - u32 pos = current_position; - BranchIndirectInfo result{}; - u64 track_register = 0; +struct BufferInfo { + u32 index; + u32 offset; +}; - // Step 0 Get BRX Info - const Instruction instr = {state.program_code[pos]}; +std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() != OpCode::Id::BRX) { return std::nullopt; @@ -152,86 +152,94 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& if (instr.brx.constant_buffer != 0) { return std::nullopt; } - track_register = instr.gpr8.Value(); - result.relative_position = instr.brx.GetBranchExtend(); - pos--; - bool found_track = false; + --pos; + return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); +} - // Step 1 Track LDC - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; +template <typename Result, typename TestCallable, typename PackCallable> +// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> +// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> +std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, + PackCallable pack) { + for (; pos >= state.start; --pos) { + if (IsSchedInstruction(pos, state.start)) { continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::LD_C) { - if (instr.gpr0.Value() == track_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { - result.buffer = instr.cbuf36.index.Value(); - result.offset = static_cast<u32>(instr.cbuf36.GetOffset()); - track_register = instr.gpr8.Value(); - pos--; - found_track = true; - break; - } + if (!opcode) { + continue; + } + if (test(instr, opcode->get())) { + --pos; + return std::make_optional(pack(instr, opcode->get())); } - pos--; } + return std::nullopt; +} - if (!found_track) { - return std::nullopt; - } - found_track = false; +std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, + u64 brx_tracked_register) { + return TrackInstruction<std::pair<BufferInfo, u64>>( + state, pos, + [brx_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::LD_C && + instr.gpr0.Value() == brx_tracked_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; + }, + [](auto instr, const auto& opcode) { + const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), + static_cast<u32>(instr.cbuf36.GetOffset())}; + return std::make_pair(info, instr.gpr8.Value()); + }); +} - // Step 2 Track SHL - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { - if (instr.gpr0.Value() == track_register) { - track_register = instr.gpr8.Value(); - pos--; - found_track = true; - break; - } - } - pos--; +std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, + u64 ldc_tracked_register) { + return TrackInstruction<u64>(state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); +} + +std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, + u64 shl_tracked_register) { + return TrackInstruction<u32>(state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); +} + +std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { + const auto brx_info = GetBRXInfo(state, pos); + if (!brx_info) { + return std::nullopt; } + const auto [relative_position, brx_tracked_register] = *brx_info; - if (!found_track) { + const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); + if (!ldc_info) { return std::nullopt; } - found_track = false; + const auto [buffer_info, ldc_tracked_register] = *ldc_info; - // Step 3 Track IMNMX - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { - if (instr.gpr0.Value() == track_register) { - track_register = instr.gpr8.Value(); - result.entries = instr.alu.GetSignedImm20_20() + 1; - pos--; - found_track = true; - break; - } - } - pos--; + const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); + if (!shl_tracked_register) { + return std::nullopt; } - if (!found_track) { + const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); + if (!entries) { return std::nullopt; } - return result; + + return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; } std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { @@ -420,30 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { - auto tmp = TrackBranchIndirectInfo(state, address, offset); - if (tmp) { - auto result = *tmp; - std::vector<CaseBranch> branches{}; - s32 pc_target = offset + result.relative_position; - for (u32 i = 0; i < result.entries; i++) { - auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); - if (!k) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *k; - u32 target = static_cast<u32>((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<MultiBranch>( - static_cast<u32>(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } else { + const auto tmp = TrackBranchIndirectInfo(state, offset); + if (!tmp) { LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); + return {ParseResult::AbnormalFlow, parse_info}; } - return {ParseResult::AbnormalFlow, parse_info}; + + const auto result = *tmp; + const s32 pc_target = offset + result.relative_position; + std::vector<CaseBranch> branches; + for (u32 i = 0; i < result.entries; i++) { + auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!key) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *key; + u32 target = static_cast<u32>((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); + } + parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<MultiBranch>( + static_cast<u32>(instr.gpr8.Value()), std::move(branches)); + + return {ParseResult::ControlCaught, parse_info}; } default: break; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 21fb9cb83..22c3e5120 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -154,10 +154,10 @@ void ShaderIR::Decode() { LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); [[fallthrough]]; case CompileDepth::BruteForce: { + const auto shader_end = static_cast<u32>(program_code.size()); coverage_begin = main_offset; - const std::size_t shader_end = program_code.size(); coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; label++) { + for (u32 label = main_offset; label < shader_end; ++label) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); } break; diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 1473c282a..fcedd2af6 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. if (instr.fmul.tab5cb8_2 != 0) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); } if (instr.fmul.tab5c68_0 != 1) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { + LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); + // Currently RRO is only implemented as a register move. op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); SetRegister(bb, instr.gpr0, op_b); - LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index b06cbe441..ee7d9a29d 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - if (instr.alu_half.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 6466fc011..d179b9873 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index ca2f39e8d..5973588d6 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); if (instr.ffma.tab5980_0 != 1) { - LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); } if (instr.ffma.tab5980_1 != 0) { - LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 48ca7a4af..848e46874 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.hset2.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index fec8f2dbe..310655619 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz)); + if (instr.hsetp2.ftz != 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index ca690b58b..bb926a132 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex_b.texture_type}; const bool is_array = instr.tex_b.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - const Node4 components = GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); @@ -107,10 +95,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), "PTP is not implemented"); - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) : instr.tld4.UsesMiscMode(TextureMiscMode::DC); @@ -125,9 +109,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case OpCode::Id::TLD4S: { UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); const Node op_a = GetRegister(instr.gpr8); @@ -164,10 +145,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { is_bindless = true; [[fallthrough]]; case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); - } - // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. @@ -205,10 +182,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; const auto& sampler = @@ -254,10 +227,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - if (instr.tld.nodep_flag) { - LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); - } - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); break; } @@ -269,10 +238,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); - } - const Node4 components = GetTldsCode(instr, texture_type, is_array); if (instr.tlds.fp32_flag) { diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node width = [this, instr] { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); - const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast<u64>(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); + break; + } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); break; } default: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int @@ -181,15 +182,8 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool - ShuffleIndexed, /// (uint value, uint index, uint width) -> uint - ShuffleUp, /// (uint value, uint index, uint width) -> uint - ShuffleDown, /// (uint value, uint index, uint width) -> uint - ShuffleButterfly, /// (uint value, uint index, uint width) -> uint - - InRangeShuffleIndexed, /// (uint index, uint width) -> bool - InRangeShuffleUp, /// (uint index, uint width) -> bool - InRangeShuffleDown, /// (uint index, uint width) -> bool - InRangeShuffleButterfly, /// (uint index, uint width) -> bool + ThreadId, /// () -> uint + ShuffleIndexed, /// (uint value, uint index) -> uint Amount, }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 26c8fde22..76a849818 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -49,7 +49,7 @@ public: } u32 GetSize() const { - return max_offset + sizeof(float); + return max_offset + static_cast<u32>(sizeof(float)); } u32 GetMaxOffset() const { @@ -165,8 +165,8 @@ public: return program_manager.GetVariables(); } - u32 ConvertAddressToNvidiaSpace(const u32 address) const { - return (address - main_offset) * sizeof(Tegra::Shader::Instruction); + u32 ConvertAddressToNvidiaSpace(u32 address) const { + return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); } /// Returns a condition code evaluated from internal flags diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 621136b6e..4b6846113 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -249,6 +249,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::RGBA16U; case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RGBA16F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RGBA16UI; default: break; } diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 58b608a36..33bd31865 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -92,11 +92,11 @@ private: const unsigned int mask = 1 << m_NextBit++; // clear the bit - *m_CurByte &= ~mask; + *m_CurByte &= static_cast<unsigned char>(~mask); // Write the bit, if necessary if (b) - *m_CurByte |= mask; + *m_CurByte |= static_cast<unsigned char>(mask); // Next byte? if (m_NextBit >= 8) { @@ -137,7 +137,7 @@ public: } uint64_t mask = (1 << (end - start + 1)) - 1; - return (m_Bits >> start) & mask; + return (m_Bits >> start) & static_cast<IntType>(mask); } private: @@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { return 0; if (toBit == 0) return 0; - IntType v = val & ((1 << numBits) - 1); + IntType v = val & static_cast<IntType>((1 << numBits) - 1); IntType res = v; uint32_t reslen = numBits; while (reslen < toBit) { @@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { comp = numBits - newshift; numBits = newshift; } - res <<= numBits; - res |= v >> comp; + res = static_cast<IntType>(res << numBits); + res = static_cast<IntType>(res | (v >> comp)); reslen += numBits; } return res; @@ -714,7 +714,7 @@ public: // Do nothing return val; } else if (oldDepth == 0 && newDepth != 0) { - return (1 << newDepth) - 1; + return static_cast<ChannelType>((1 << newDepth) - 1); } else if (newDepth > oldDepth) { return Replicate(val, oldDepth, newDepth); } else { @@ -722,10 +722,11 @@ public: if (newDepth == 0) { return 0xFF; } else { - uint8_t bitsWasted = oldDepth - newDepth; + uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); uint16_t v = static_cast<uint16_t>(val); - v = (v + (1 << (bitsWasted - 1))) >> bitsWasted; - v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1); + v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), + static_cast<uint16_t>((1 << newDepth) - 1)); return static_cast<uint8_t>(v); } } @@ -1191,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); - seed1 *= seed1; - seed2 *= seed2; - seed3 *= seed3; - seed4 *= seed4; - seed5 *= seed5; - seed6 *= seed6; - seed7 *= seed7; - seed8 *= seed8; - seed9 *= seed9; - seed10 *= seed10; - seed11 *= seed11; - seed12 *= seed12; + seed1 = static_cast<uint8_t>(seed1 * seed1); + seed2 = static_cast<uint8_t>(seed2 * seed2); + seed3 = static_cast<uint8_t>(seed3 * seed3); + seed4 = static_cast<uint8_t>(seed4 * seed4); + seed5 = static_cast<uint8_t>(seed5 * seed5); + seed6 = static_cast<uint8_t>(seed6 * seed6); + seed7 = static_cast<uint8_t>(seed7 * seed7); + seed8 = static_cast<uint8_t>(seed8 * seed8); + seed9 = static_cast<uint8_t>(seed9 * seed9); + seed10 = static_cast<uint8_t>(seed10 * seed10); + seed11 = static_cast<uint8_t>(seed11 * seed11); + seed12 = static_cast<uint8_t>(seed12 * seed12); int32_t sh1, sh2, sh3; if (seed & 1) { @@ -1214,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, } sh3 = (seed & 0x10) ? sh1 : sh2; - seed1 >>= sh1; - seed2 >>= sh2; - seed3 >>= sh1; - seed4 >>= sh2; - seed5 >>= sh1; - seed6 >>= sh2; - seed7 >>= sh1; - seed8 >>= sh2; - seed9 >>= sh3; - seed10 >>= sh3; - seed11 >>= sh3; - seed12 >>= sh3; + seed1 = static_cast<uint8_t>(seed1 >> sh1); + seed2 = static_cast<uint8_t>(seed2 >> sh2); + seed3 = static_cast<uint8_t>(seed3 >> sh1); + seed4 = static_cast<uint8_t>(seed4 >> sh2); + seed5 = static_cast<uint8_t>(seed5 >> sh1); + seed6 = static_cast<uint8_t>(seed6 >> sh2); + seed7 = static_cast<uint8_t>(seed7 >> sh1); + seed8 = static_cast<uint8_t>(seed8 >> sh2); + seed9 = static_cast<uint8_t>(seed9 >> sh3); + seed10 = static_cast<uint8_t>(seed10 >> sh3); + seed11 = static_cast<uint8_t>(seed11 >> sh3); + seed12 = static_cast<uint8_t>(seed12 >> sh3); int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); @@ -1558,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, // Make sure that higher non-texel bits are set to zero const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1; + texelWeightData[clearByteStart - 1] = + texelWeightData[clearByteStart - 1] & + static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); std::vector<IntegerEncodedValue> texelWeightValues; diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 27c8ce975..8e82c6748 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -342,13 +342,14 @@ struct TSCEntry { float GetLodBias() const { // Sign extend the 13-bit value. constexpr u32 mask = 1U << (13 - 1); - return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f; + return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } std::array<float, 4> GetBorderColor() const { if (srgb_conversion) { - return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, - srgb_border_color_b / 255.0f, border_color[3]}; + return {static_cast<float>(srgb_border_color_r) / 255.0f, + static_cast<float>(srgb_border_color_g) / 255.0f, + static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; } return border_color; } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 60cda0ca3..8e947394c 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { u16 GetResolutionScaleFactor(const RendererBase& renderer) { return static_cast<u16>( - Settings::values.resolution_factor + Settings::values.resolution_factor != 0 ? Settings::values.resolution_factor : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); } |
