diff options
Diffstat (limited to 'src/video_core/macro')
| -rw-r--r-- | src/video_core/macro/macro.cpp | 60 | ||||
| -rw-r--r-- | src/video_core/macro/macro.h | 22 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 109 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.h | 44 | ||||
| -rw-r--r-- | src/video_core/macro/macro_interpreter.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 142 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 10 |
7 files changed, 291 insertions, 100 deletions
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..cd21a2112 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,32 +2,78 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <optional> +#include <boost/container_hash/hash.hpp> #include "common/assert.h" #include "common/logging/log.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" #include "video_core/macro/macro_jit_x64.h" namespace Tegra { +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) + : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} + +MacroEngine::~MacroEngine() = default; + void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, + const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { - compiled_macro->second->Execute(parameters, method); + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } else { // Macro not compiled, check if it's uploaded and if so, compile it - auto macro_code = uploaded_macro_code.find(method); + std::optional<u32> mid_method; + const auto macro_code = uploaded_macro_code.find(method); if (macro_code == uploaded_macro_code.end()) { - UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); - return; + for (const auto& [method_base, code] : uploaded_macro_code) { + if (method >= method_base && (method - method_base) < code.size()) { + mid_method = method_base; + break; + } + } + if (!mid_method.has_value()) { + UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); + return; + } + } + auto& cache_info = macro_cache[method]; + + if (!mid_method.has_value()) { + cache_info.lle_program = Compile(macro_code->second); + cache_info.hash = boost::hash_value(macro_code->second); + } else { + const auto& macro_cached = uploaded_macro_code[mid_method.value()]; + const auto rebased_method = method - mid_method.value(); + auto& code = uploaded_macro_code[method]; + code.resize(macro_cached.size() - rebased_method); + std::memcpy(code.data(), macro_cached.data() + rebased_method, + code.size() * sizeof(u32)); + cache_info.hash = boost::hash_value(code); + cache_info.lle_program = Compile(code); + } + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (hle_program.has_value()) { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program.value()); + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); } - macro_cache[method] = Compile(macro_code->second); - macro_cache[method]->Execute(parameters, method); } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..31ee3440a 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -11,9 +11,11 @@ #include "common/common_types.h" namespace Tegra { + namespace Engines { class Maxwell3D; } + namespace Macro { constexpr std::size_t NUM_MACRO_REGISTERS = 8; enum class Operation : u32 { @@ -94,33 +96,45 @@ union MethodAddress { } // namespace Macro +class HLEMacro; + class CachedMacro { public: virtual ~CachedMacro() = default; /** * Executes the macro code with the specified input parameters. - * @param code The macro byte code to execute + * * @param parameters The parameters of the macro + * @param method The method to execute */ virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; }; class MacroEngine { public: - virtual ~MacroEngine() = default; + explicit MacroEngine(Engines::Maxwell3D& maxwell3d); + virtual ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, const std::vector<u32>& parameters); + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; private: - std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; + struct CacheInfo { + std::unique_ptr<CachedMacro> lle_program{}; + std::unique_ptr<CachedMacro> hle_program{}; + u64 hash{}; + bool has_hle_program{}; + }; + + std::unordered_map<u32, CacheInfo> macro_cache; std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; + std::unique_ptr<HLEMacro> hle_macros; }; std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..df00b57df --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,109 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +namespace { +// HLE'd functions +void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); + maxwell3d.regs.vb_base_instance = parameters[5]; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.vb_element_base = parameters[3]; + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.index_array.first = parameters[4]; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.Rasterizer().Draw(true, true); + } + maxwell3d.regs.index_array.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { + const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + maxwell3d.regs.vertex_buffer.first = parameters[3]; + maxwell3d.regs.vertex_buffer.count = parameters[1]; + maxwell3d.regs.vb_base_instance = parameters[4]; + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + maxwell3d.mme_draw.instance_count = count; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.Rasterizer().Draw(false, true); + } + maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_array.first = parameters[3]; + maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.vb_element_base = element_base; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, element_base); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + if (maxwell3d.ShouldExecute()) { + maxwell3d.Rasterizer().Draw(true, true); + } + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} +} // Anonymous namespace + +constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ + {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, + {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, + {0x0217920100488FF7, &HLE_0217920100488FF7}, +}}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { + const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), + [hash](const auto& pair) { return pair.first == hash; }); + if (it == hle_funcs.end()) { + return std::nullopt; + } + return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) + : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { + func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <vector> +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + +class HLEMacro { +public: + explicit HLEMacro(Engines::Maxwell3D& maxwell3d); + ~HLEMacro(); + + std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); + ~HLEMacroImpl(); + + void Execute(const std::vector<u32>& parameters, u32 method) override; + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..bd01fd1f2 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,7 +11,8 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); @@ -33,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho this->parameters = std::make_unique<u32[]>(num_parameters); } std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); - this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..954b87515 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,27 +14,22 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; -static const Xbyak::Reg64 STATE = Xbyak::util::r11; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; -static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; -static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; -static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; -static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; +constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; +constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; +constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ - PARAMETERS, - REGISTERS, STATE, - NEXT_PARAMETER, RESULT, + PARAMETERS, METHOD_ADDRESS, BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { return std::make_unique<MacroJITx64Impl>(maxwell3d, code); @@ -53,32 +48,32 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { JITState state{}; state.maxwell3d = &maxwell3d; state.registers = {}; - state.parameters = parameters.data(); - program(&state); + program(&state, parameters.data()); } void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_a_zero = opcode.src_a == 0; const bool is_b_zero = opcode.src_b == 0; const bool valid_operation = !is_a_zero && !is_b_zero; - const bool is_move_operation = !is_a_zero && is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; - Xbyak::Reg64 src_a; + Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; - if (!optimizer.zero_reg_skip) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); - src_b = Compile_GetRegister(opcode.src_b, ebx); + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { + src_a = Compile_GetRegister(opcode.src_a, RESULT); + src_b = Compile_GetRegister(opcode.src_b, eax); } else { if (!is_a_zero) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } } - Xbyak::Label skip_carry{}; bool has_emitted = false; @@ -190,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { if (next_opcode.has_value()) { const auto next = *next_opcode; - if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { return; } } @@ -244,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); - shr(src, al); + shr(src, dst.cvt8()); if (opcode.bf_size != 0 && opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } else if (opcode.bf_size == 0) { @@ -263,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { shr(src, opcode.bf_src_bit); @@ -273,16 +269,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { if (opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } - shl(src, al); - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} + shl(src, dst.cvt8()); -static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { - return maxwell3d->GetRegisterValue(method); -} - -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { @@ -302,22 +291,34 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, qword[STATE]); - mov(Common::X64::ABI_PARAM2, RESULT); - Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); - mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); mov(Common::X64::ABI_PARAM3, value); Common::X64::CallFarFunction(*this, &Send); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Xbyak::Label dont_process{}; // Get increment @@ -329,7 +330,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { and_(METHOD_ADDRESS, 0xfff); shr(ecx, 12); and_(ecx, 0x3f); - lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); sal(ecx, 12); or_(eax, ecx); @@ -418,22 +419,17 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { void MacroJITx64Impl::Compile() { MICROPROFILE_SCOPE(MacroJitCompile); - bool keep_executing = true; labels.fill(Xbyak::Label()); - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); - mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + - static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]); - mov(REGISTERS, Common::X64::ABI_PARAM1); - add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers))); + mov(PARAMETERS, Common::X64::ABI_PARAM2); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); - xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); - mov(dword[REGISTERS + 4], Compile_FetchParameter()); + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); // Track get register for zero registers and mark it as no-op optimizer.zero_reg_skip = true; @@ -446,6 +442,9 @@ void MacroJITx64Impl::Compile() { // one if our register isn't "dirty" optimizer.optimize_for_method_move = true; + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + // Check to see if we can skip emitting certain instructions Optimizer_ScanFlags(); @@ -463,7 +462,7 @@ void MacroJITx64Impl::Compile() { L(end_of_code); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); ret(); ready(); program = getCode<ProgramType>(); @@ -537,8 +536,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { } Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { - mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); - inc(NEXT_PARAMETER); + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); return eax; } @@ -547,41 +546,22 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { // Register 0 is always zero xor_(dst, dst); } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); - } - - return dst; -} - -Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { - if (index == 0) { - // Register 0 is always zero - xor_(dst, dst); - } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); } return dst; } -void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { - Xbyak::Label zero{}, end{}; - xor_(ecx, ecx); - shr(dst, 32); - setne(cl); - mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); -} - void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. if (reg == 0) { return; } - mov(dword[REGISTERS + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; - auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 21ee157cf..a180e7428 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -55,8 +55,6 @@ private: Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); - void Compile_WriteCarry(Xbyak::Reg64 dst); void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); void Compile_Send(Xbyak::Reg32 value); @@ -67,11 +65,10 @@ private: struct JITState { Engines::Maxwell3D* maxwell3d{}; std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; - const u32* parameters{}; u32 carry_flag{}; }; static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*); + using ProgramType = void (*)(JITState*, const u32*); struct OptimizerState { bool can_skip_carry{}; @@ -79,14 +76,15 @@ private: bool zero_reg_skip{}; bool skip_dummy_addimmediate{}; bool optimize_for_method_move{}; + bool enable_asserts{}; }; OptimizerState optimizer{}; std::optional<Macro::Opcode> next_opcode{}; ProgramType program{nullptr}; - std::array<Xbyak::Label, MAX_CODE_SIZE> labels{}; - std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{}; + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; Xbyak::Label end_of_code{}; bool is_delay_slot{}; |
