From b032ebdfee1928c4458eaf15faa0cff299371e65 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 29 May 2020 14:53:27 +1000 Subject: Implement macro JIT --- src/video_core/macro/macro_jit_x64.cpp | 633 +++++++++++++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 src/video_core/macro/macro_jit_x64.cpp (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp new file mode 100644 index 000000000..1b657236a --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -0,0 +1,633 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/x64/xbyak_util.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_interpreter.h" +#include "video_core/macro/macro_jit_x64.h" + +MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47)); +MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); + +namespace Tegra { +using JitFunction = void (MacroJITx64Impl::*)(Macro::Opcode opcode); +const std::array InstructionTable{ + &MacroJITx64Impl::Compile_ALU, + &MacroJITx64Impl::Compile_AddImmediate, + &MacroJITx64Impl::Compile_ExtractInsert, + &MacroJITx64Impl::Compile_ExtractShiftLeftImmediate, + &MacroJITx64Impl::Compile_ExtractShiftLeftRegister, + &MacroJITx64Impl::Compile_Read, + nullptr, + &MacroJITx64Impl::Compile_Branch, +}; + +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; +static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; +static const Xbyak::Reg64 STATE = Xbyak::util::r11; +static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; +static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; +static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; +static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; +static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; + +static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ + PARAMETERS, + REGISTERS, + STATE, + NEXT_PARAMETER, + RESULT, + METHOD_ADDRESS, + BRANCH_HOLDER, +}); + +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +std::unique_ptr MacroJITx64::Compile(const std::vector& code) { + return std::make_unique(maxwell3d, code); +} + +MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector& code) + : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { + Compile(); +} + +MacroJITx64Impl::~MacroJITx64Impl() = default; + +void MacroJITx64Impl::Execute(std::vector& parameters, u32 method) { + MICROPROFILE_SCOPE(MacroJitExecute); + ASSERT_OR_EXECUTE(program != nullptr, { return; }); + JITState state{}; + state.maxwell3d = &maxwell3d; + state.registers = {}; + state.parameters = parameters.data(); + program(&state); +} + +void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { + const bool is_a_zero = opcode.src_a == 0; + const bool is_b_zero = opcode.src_b == 0; + const bool valid_operation = !is_a_zero && !is_b_zero; + const bool is_move_operation = !is_a_zero && is_b_zero; + const bool has_zero_register = is_a_zero || is_b_zero; + + Xbyak::Reg64 src_a; + Xbyak::Reg32 src_b; + + if (!optimizer.zero_reg_skip) { + src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_b = Compile_GetRegister(opcode.src_b, ebx); + } else { + if (!is_a_zero) { + src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + } + if (!is_b_zero) { + src_b = Compile_GetRegister(opcode.src_b, ebx); + } + } + Xbyak::Label skip_carry{}; + + bool has_emitted = false; + + switch (opcode.alu_operation) { + case Macro::ALUOperation::Add: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + add(src_a, src_b); + } + } else { + add(src_a, src_b); + } + + if (!optimizer.can_skip_carry) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::AddWithCarry: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + adc(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Subtract: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + sub(src_a, src_b); + has_emitted = true; + } + } else { + sub(src_a, src_b); + has_emitted = true; + } + if (!optimizer.can_skip_carry && has_emitted) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::SubtractWithBorrow: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + sbb(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Xor: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + xor_(src_a, src_b); + } + } else { + xor_(src_a, src_b); + } + break; + case Macro::ALUOperation::Or: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + or_(src_a, src_b); + } + } else { + or_(src_a, src_b); + } + break; + case Macro::ALUOperation::And: + if (optimizer.zero_reg_skip) { + if (!has_zero_register) { + and_(src_a, src_b); + } + } else { + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::AndNot: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + not_(src_b); + and_(src_a, src_b); + } + } else { + not_(src_b); + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::Nand: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + and_(src_a, src_b); + not_(src_a); + } + } else { + and_(src_a, src_b); + not_(src_a); + } + break; + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", + static_cast(opcode.alu_operation.Value())); + break; + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { + if (optimizer.skip_dummy_addimmediate) { + // Games tend to use this as an exit instruction placeholder. It's to encode an instruction + // without doing anything. In our case we can just not emit anything. + if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { + return; + } + } + // Check for redundant moves + if (optimizer.optimize_for_method_move && + opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next_opcode.has_value()) { + const auto next = *next_opcode; + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + return; + } + } + } + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, RESULT); + auto src = Compile_GetRegister(opcode.src_b, eax); + + if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { + shr(src, opcode.bf_src_bit); + } else if (opcode.bf_src_bit == 31) { + xor_(src, src); + } + // Don't bother masking the whole register since we're using a 32 bit register + if (opcode.bf_size != 31 && opcode.bf_size != 0) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + + const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + if (mask != 0xffffffff) { + and_(dst, mask); + } + or_(dst, src); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, eax); + auto src = Compile_GetRegister(opcode.src_b, RESULT); + + shr(src, al); + if (opcode.bf_size != 0 && opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } else if (opcode.bf_size == 0) { + xor_(src, src); + } + + if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { + shl(src, opcode.bf_dst_bit); + } else if (opcode.bf_dst_bit == 31) { + xor_(src, src); + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, eax); + auto src = Compile_GetRegister(opcode.src_b, RESULT); + + if (opcode.bf_src_bit != 0) { + shr(src, opcode.bf_src_bit); + } + + if (opcode.bf_size != 31) { + and_(src, opcode.GetBitfieldMask()); + } + shl(src, al); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { + return maxwell3d->GetRegisterValue(method); +} + +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + +void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2, RESULT); + Common::X64::CallFarFunction(*this, &Read); + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); + mov(Common::X64::ABI_PARAM3, value); + Common::X64::CallFarFunction(*this, &Send); + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + + Xbyak::Label dont_process{}; + // Get increment + test(METHOD_ADDRESS, 0x3f000); + // If zero, method address doesn't update + je(dont_process); + + mov(ecx, METHOD_ADDRESS); + and_(METHOD_ADDRESS, 0xfff); + shr(ecx, 12); + and_(ecx, 0x3f); + lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + sal(ecx, 12); + or_(eax, ecx); + + mov(METHOD_ADDRESS, eax); + + L(dont_process); +} + +void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + const s32 jump_address = + static_cast(pc) + static_cast(opcode.GetBranchTarget() / sizeof(s32)); + + Xbyak::Label end; + auto value = Compile_GetRegister(opcode.src_a, eax); + test(value, value); + if (optimizer.has_delayed_pc) { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + jne(end, T_NEAR); + break; + case Macro::BranchCondition::NotZero: + je(end, T_NEAR); + break; + } + + if (opcode.branch_annul) { + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } else { + Xbyak::Label handle_post_exit{}; + Xbyak::Label skip{}; + jmp(skip, T_NEAR); + if (opcode.is_exit) { + L(handle_post_exit); + // Execute 1 instruction + mov(BRANCH_HOLDER, end_of_code); + // Jump to next instruction to skip delay slot check + jmp(labels[jump_address], T_NEAR); + } else { + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } + L(skip); + mov(BRANCH_HOLDER, handle_post_exit); + jmp(delay_skip[pc], T_NEAR); + } + } else { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + je(labels[jump_address], T_NEAR); + break; + case Macro::BranchCondition::NotZero: + jne(labels[jump_address], T_NEAR); + break; + } + } + + L(end); +} + +void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { + optimizer.can_skip_carry = true; + optimizer.has_delayed_pc = false; + for (auto raw_op : code) { + Macro::Opcode op{}; + op.raw = raw_op; + + if (op.operation == Macro::Operation::ALU) { + // Scan for any ALU operations which actually use the carry flag, if they don't exist in + // our current code we can skip emitting the carry flag handling operations + if (op.alu_operation == Macro::ALUOperation::AddWithCarry || + op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { + optimizer.can_skip_carry = false; + } + } + + if (op.operation == Macro::Operation::Branch) { + if (!op.branch_annul) { + optimizer.has_delayed_pc = true; + } + } + } +} + +void MacroJITx64Impl::Compile() { + MICROPROFILE_SCOPE(MacroJitCompile); + bool keep_executing = true; + labels.fill(Xbyak::Label()); + + Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + // JIT state + mov(STATE, Common::X64::ABI_PARAM1); + mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + + static_cast(offsetof(JITState, parameters))]); + mov(REGISTERS, Common::X64::ABI_PARAM1); + add(REGISTERS, static_cast(offsetof(JITState, registers))); + xor_(RESULT, RESULT); + xor_(METHOD_ADDRESS, METHOD_ADDRESS); + xor_(NEXT_PARAMETER, NEXT_PARAMETER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + + mov(dword[REGISTERS + 4], Compile_FetchParameter()); + + // Track get register for zero registers and mark it as no-op + optimizer.zero_reg_skip = true; + + // AddImmediate tends to be used as a NOP instruction, if we detect this we can + // completely skip the entire code path and no emit anything + optimizer.skip_dummy_addimmediate = true; + + // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting + // one if our register isn't "dirty" + optimizer.optimize_for_method_move = true; + + // Check to see if we can skip emitting certain instructions + Optimizer_ScanFlags(); + + const u32 op_count = static_cast(code.size()); + for (u32 i = 0; i < op_count; i++) { + if (i < op_count - 1) { + pc = i + 1; + next_opcode = GetOpCode(); + } else { + next_opcode = {}; + } + pc = i; + Compile_NextInstruction(); + } + + L(end_of_code); + + Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + ret(); + ready(); + program = getCode(); +} + +bool MacroJITx64Impl::Compile_NextInstruction() { + const auto opcode = GetOpCode(); + if (labels[pc].getAddress()) { + return false; + } + + L(labels[pc]); + + const std::size_t op = static_cast(opcode.operation.Value()); + + if (InstructionTable[op] == nullptr) { + UNIMPLEMENTED_MSG("Unimplemented opcode {}", op); + } else { + ((*this).*InstructionTable[op])(opcode); + } + + if (optimizer.has_delayed_pc) { + if (opcode.is_exit) { + mov(rax, end_of_code); + test(BRANCH_HOLDER, BRANCH_HOLDER); + cmove(BRANCH_HOLDER, rax); + // Jump to next instruction to skip delay slot check + je(labels[pc + 1], T_NEAR); + } else { + // TODO(ogniK): Optimize delay slot branching + Xbyak::Label no_delay_slot{}; + test(BRANCH_HOLDER, BRANCH_HOLDER); + je(no_delay_slot, T_NEAR); + mov(rax, BRANCH_HOLDER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(rax); + L(no_delay_slot); + } + L(delay_skip[pc]); + if (opcode.is_exit) { + return false; + } + } else { + test(BRANCH_HOLDER, BRANCH_HOLDER); + jne(end_of_code, T_NEAR); + if (opcode.is_exit) { + inc(BRANCH_HOLDER); + return false; + } + } + return true; +} + +Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { + mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); + inc(NEXT_PARAMETER); + return eax; +} + +Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[REGISTERS + index * sizeof(u32)]); + } + + return dst; +} + +Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[REGISTERS + index * sizeof(u32)]); + } + + return dst; +} + +void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { + Xbyak::Label zero{}, end{}; + xor_(ecx, ecx); + shr(dst, 32); + setne(cl); + mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); +} + +void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { + auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (reg == 0) { + return; + } + mov(dword[REGISTERS + reg * sizeof(u32)], result); + }; + auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + SetRegister(reg, Compile_FetchParameter()); + break; + case Macro::ResultOperation::Move: + SetRegister(reg, RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethod: + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, Compile_FetchParameter()); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, RESULT); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, Compile_FetchParameter()); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + Compile_Send(Compile_FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + shr(RESULT, 12); + and_(RESULT, 0b111111); + Compile_Send(RESULT); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast(operation)); + } +} + +Macro::Opcode MacroJITx64Impl::GetOpCode() const { + ASSERT(pc < code.size()); + return {code[pc]}; +} + +std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; +} + +} // namespace Tegra -- cgit v1.2.3 From 8118ea160b194fbcc600c44bff3be556b249c780 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Sat, 30 May 2020 12:23:58 +1000 Subject: Favor switch case over jump table Easier to read and will emit a jump table automatically. --- src/video_core/macro/macro_jit_x64.cpp | 43 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1b657236a..48501e582 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,18 +14,6 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -using JitFunction = void (MacroJITx64Impl::*)(Macro::Opcode opcode); -const std::array InstructionTable{ - &MacroJITx64Impl::Compile_ALU, - &MacroJITx64Impl::Compile_AddImmediate, - &MacroJITx64Impl::Compile_ExtractInsert, - &MacroJITx64Impl::Compile_ExtractShiftLeftImmediate, - &MacroJITx64Impl::Compile_ExtractShiftLeftRegister, - &MacroJITx64Impl::Compile_Read, - nullptr, - &MacroJITx64Impl::Compile_Branch, -}; - static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; @@ -489,12 +477,31 @@ bool MacroJITx64Impl::Compile_NextInstruction() { L(labels[pc]); - const std::size_t op = static_cast(opcode.operation.Value()); - - if (InstructionTable[op] == nullptr) { - UNIMPLEMENTED_MSG("Unimplemented opcode {}", op); - } else { - ((*this).*InstructionTable[op])(opcode); + switch (opcode.operation) { + case Macro::Operation::ALU: + Compile_ALU(opcode); + break; + case Macro::Operation::AddImmediate: + Compile_AddImmediate(opcode); + break; + case Macro::Operation::ExtractInsert: + Compile_ExtractInsert(opcode); + break; + case Macro::Operation::ExtractShiftLeftImmediate: + Compile_ExtractShiftLeftImmediate(opcode); + break; + case Macro::Operation::ExtractShiftLeftRegister: + Compile_ExtractShiftLeftRegister(opcode); + break; + case Macro::Operation::Read: + Compile_Read(opcode); + break; + case Macro::Operation::Branch: + Compile_Branch(opcode); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); + break; } if (optimizer.has_delayed_pc) { -- cgit v1.2.3 From 411f5527d41ba5c4f09b914b4fb4df0c6493f744 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Wed, 3 Jun 2020 16:33:38 +1000 Subject: Mark parameters as const --- src/video_core/macro/macro_jit_x64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 48501e582..11c1cc3be 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -47,7 +47,7 @@ MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vecto MacroJITx64Impl::~MacroJITx64Impl() = default; -void MacroJITx64Impl::Execute(std::vector& parameters, u32 method) { +void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); ASSERT_OR_EXECUTE(program != nullptr, { return; }); JITState state{}; -- cgit v1.2.3 From d563017dfe63aaa26e7c08369995838f8b9fdafb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 18:59:54 +0100 Subject: xbyak_abi: Remove *GPS variants of stack manipulation functions --- src/video_core/macro/macro_jit_x64.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..2d82c8cff 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -302,22 +302,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, RESULT); Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(RESULT, Common::X64::ABI_RETURN.cvt32()); Compile_ProcessResult(opcode.result_operation, opcode.dst); } void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); mov(Common::X64::ABI_PARAM3, value); Common::X64::CallFarFunction(*this, &Send); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Xbyak::Label dont_process{}; // Get increment @@ -421,7 +421,7 @@ void MacroJITx64Impl::Compile() { bool keep_executing = true; labels.fill(Xbyak::Label()); - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + @@ -463,7 +463,7 @@ void MacroJITx64Impl::Compile() { L(end_of_code); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); ret(); ready(); program = getCode(); -- cgit v1.2.3 From a6a43a5ae047404ca0b03aa647ed5b17400ca7b6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:28:30 +0100 Subject: macro_jit_x64: Remove RESULT_64 This Reg64 codepath has the exact same behaviour as the Reg32 one. --- src/video_core/macro/macro_jit_x64.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..9a9d50866 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -19,7 +19,6 @@ static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; -static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -64,15 +63,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; - Xbyak::Reg64 src_a; + Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; if (!optimizer.zero_reg_skip) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, ebx); } else { if (!is_a_zero) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { src_b = Compile_GetRegister(opcode.src_b, ebx); @@ -553,17 +552,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { - if (index == 0) { - // Register 0 is always zero - xor_(dst, dst); - } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); - } - - return dst; -} - void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { Xbyak::Label zero{}, end{}; xor_(ecx, ecx); -- cgit v1.2.3 From 389549b80d7cd7054ec622f4038ff599386e1c04 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:51:33 +0100 Subject: macro_jit_x64: Remove METHOD_ADDRESS_64 Unnecessary variable. --- src/video_core/macro/macro_jit_x64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 9a9d50866..1dcf9957c 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -20,7 +20,6 @@ static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ @@ -328,7 +327,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { and_(METHOD_ADDRESS, 0xfff); shr(ecx, 12); and_(ecx, 0x3f); - lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); sal(ecx, 12); or_(eax, ecx); -- cgit v1.2.3 From 35db6e1c68f18f401bcae8bd8e8937648c7c67c6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:55:02 +0100 Subject: macro_jit_x64: Remove JITState::parameters This can be passed in as an argument instead. --- src/video_core/macro/macro_jit_x64.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1dcf9957c..f1d123f51 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -51,8 +51,7 @@ void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { JITState state{}; state.maxwell3d = &maxwell3d; state.registers = {}; - state.parameters = parameters.data(); - program(&state); + program(&state, parameters.data()); } void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { @@ -422,8 +421,7 @@ void MacroJITx64Impl::Compile() { Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); - mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + - static_cast(offsetof(JITState, parameters))]); + mov(PARAMETERS, Common::X64::ABI_PARAM2); mov(REGISTERS, Common::X64::ABI_PARAM1); add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); -- cgit v1.2.3 From 79aa7b3aceeecadfb5b15bc25431db7768434f23 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:00:59 +0100 Subject: macro_jit_x64: Remove REGISTERS Unnecessary since this is just an offset from STATE. --- src/video_core/macro/macro_jit_x64.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index f1d123f51..da3b86d3d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -15,7 +15,6 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255 namespace Tegra { static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; @@ -24,7 +23,6 @@ static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ PARAMETERS, - REGISTERS, STATE, NEXT_PARAMETER, RESULT, @@ -422,14 +420,12 @@ void MacroJITx64Impl::Compile() { // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, Common::X64::ABI_PARAM2); - mov(REGISTERS, Common::X64::ABI_PARAM1); - add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); - mov(dword[REGISTERS + 4], Compile_FetchParameter()); + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); // Track get register for zero registers and mark it as no-op optimizer.zero_reg_skip = true; @@ -543,7 +539,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { // Register 0 is always zero xor_(dst, dst); } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); } return dst; @@ -564,7 +560,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 if (reg == 0) { return; } - mov(dword[REGISTERS + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; -- cgit v1.2.3 From c09a9e5cc7f53280218cdfbfd7d7ff056f1c2ff5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:12:53 +0100 Subject: macro_jit_x64: Select better registers All registers are now callee-save registers. RBX and RBP selected for STATE and RESULT because these are most commonly accessed; this is to avoid the REX prefix. RBP not used for STATE because there are some SIB restrictions, RBX emits smaller code. --- src/video_core/macro/macro_jit_x64.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index da3b86d3d..1e7b05ac9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,18 +14,18 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 STATE = Xbyak::util::r11; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; -static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; +static const Xbyak::Reg64 STATE = Xbyak::util::rbx; +static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ - PARAMETERS, STATE, - NEXT_PARAMETER, RESULT, + PARAMETERS, + NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -64,13 +64,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { if (!optimizer.zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } else { if (!is_a_zero) { src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } } Xbyak::Label skip_carry{}; -- cgit v1.2.3 From 1799f4e7743557c8e41c15201c42431f8d6d6dde Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:14:10 +0100 Subject: macro_jit_x64: Remove unused function Compile_WriteCarry --- src/video_core/macro/macro_jit_x64.cpp | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1e7b05ac9..b703daad9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -545,14 +545,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { - Xbyak::Label zero{}, end{}; - xor_(ecx, ecx); - shr(dst, 32); - setne(cl); - mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); -} - void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero -- cgit v1.2.3 From cf0aad7d6a22024362c7adf04b605108141453f6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:16:47 +0100 Subject: macro_jit_x64: Remove NEXT_PARAMETER Not required, as PARAMETERS can just be incremented directly. --- src/video_core/macro/macro_jit_x64.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index b703daad9..2eb98173d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -17,7 +17,6 @@ namespace Tegra { static const Xbyak::Reg64 STATE = Xbyak::util::rbx; static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -25,7 +24,6 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, - NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -422,7 +420,6 @@ void MacroJITx64Impl::Compile() { mov(PARAMETERS, Common::X64::ABI_PARAM2); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); - xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); @@ -529,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { } Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { - mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); - inc(NEXT_PARAMETER); + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); return eax; } -- cgit v1.2.3 From a6ddd7c382e0362d5e86c1622c85c78c59c5aa3b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 22:01:00 +0100 Subject: macro_jit_x64: Should not skip zero registers for certain ALU ops The code generated for these ALU ops assume src_a and src_b are always valid. --- src/video_core/macro/macro_jit_x64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 2eb98173d..08279b9bc 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -56,11 +56,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool valid_operation = !is_a_zero && !is_b_zero; const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; - if (!optimizer.zero_reg_skip) { + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, eax); } else { -- cgit v1.2.3 From c409722435bdb1f2eae4d192c89278e3b07fd2ed Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 22:01:25 +0100 Subject: macro_jit_x64: Optimization implicitly assumes same destination --- src/video_core/macro/macro_jit_x64.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 08279b9bc..30a7e1fe9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -185,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { if (next_opcode.has_value()) { const auto next = *next_opcode; - if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { return; } } -- cgit v1.2.3 From 44f10d9b9f4ac6fb718718a85a5916721e7944e4 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:03:32 +0100 Subject: macro_jit_x64: Inline Engines::Maxwell3D::GetRegisterValue --- src/video_core/macro/macro_jit_x64.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index d4a97ec7b..0b2918388 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -295,12 +295,20 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, qword[STATE]); - mov(Common::X64::ABI_PARAM2, RESULT); - Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(RESULT, Common::X64::ABI_RETURN.cvt32()); + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -435,6 +443,9 @@ void MacroJITx64Impl::Compile() { // one if our register isn't "dirty" optimizer.optimize_for_method_move = true; + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + // Check to see if we can skip emitting certain instructions Optimizer_ScanFlags(); -- cgit v1.2.3 From 977ceb405627adfec8be6240521d1db8842b8fc2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 19 Jun 2020 11:39:41 +0100 Subject: macro_jit_x64: Remove unused function Read --- src/video_core/macro/macro_jit_x64.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 0b2918388..80a00ba77 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -270,14 +270,6 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { - return maxwell3d->GetRegisterValue(method); -} - -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); -} - void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { if (optimizer.zero_reg_skip && opcode.src_a == 0) { if (opcode.immediate == 0) { @@ -312,6 +304,10 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } +static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethodFromMME(method_address.address, value); +} + void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); -- cgit v1.2.3 From 811bff009eca0d0fa2ddb1455fc73fdaec4474da Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 21:57:41 -0400 Subject: macro_jit_x64: Eliminate variable shadowing in Compile_ProcessResult() We can reduce the capture scope so that it's not possible for both "reg" variables to clash with one another. While we're at it, we can prevent unnecessary copies while we're at it. --- src/video_core/macro/macro_jit_x64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..9eface47e 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -546,7 +546,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { } void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { + const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. if (reg == 0) { @@ -554,7 +554,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 } mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; - auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: -- cgit v1.2.3 From 8ea749c1ca50b3584df8c8d7019933fe80e31d9f Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:10:43 -0400 Subject: macro_jit_x64: Remove unused variable Removes a completely unused label and marks another variable as unused, given it seems like it has potential uses in the future. --- src/video_core/macro/macro_jit_x64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..3515ea43b 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -54,7 +54,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_a_zero = opcode.src_a == 0; const bool is_b_zero = opcode.src_b == 0; const bool valid_operation = !is_a_zero && !is_b_zero; - const bool is_move_operation = !is_a_zero && is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; @@ -73,7 +73,6 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { src_b = Compile_GetRegister(opcode.src_b, eax); } } - Xbyak::Label skip_carry{}; bool has_emitted = false; -- cgit v1.2.3 From 140f953b6a70fa2eaf3f2711993913f6f0ca7a75 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:33:01 -0400 Subject: macro_jit_x64: Correct readability of Compile_ExtractShiftLeftRegister() Previously dst wasn't being used. --- src/video_core/macro/macro_jit_x64.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index bee34a7c0..1ecf1d27f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -259,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { shr(src, opcode.bf_src_bit); @@ -269,7 +269,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { if (opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } - shl(src, al); + shl(src, dst.cvt8()); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } -- cgit v1.2.3 From 5a4e89b9018eec802ec445b5c7df7d270d35b4c1 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 19 Jun 2020 22:56:34 -0400 Subject: macro_jit_x64: Correct readability of Compile_ExtractShiftLeftImmediate() Previously dst wasn't being used. --- src/video_core/macro/macro_jit_x64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1ecf1d27f..202fbbc21 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -240,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, eax); - auto src = Compile_GetRegister(opcode.src_b, RESULT); + const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); - shr(src, al); + shr(src, dst.cvt8()); if (opcode.bf_size != 0 && opcode.bf_size != 31) { and_(src, opcode.GetBitfieldMask()); } else if (opcode.bf_size == 0) { -- cgit v1.2.3 From c12eb814b41b5b354df2548d5d48b9ae529ad4b8 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 20 Jun 2020 22:23:58 +0100 Subject: macro_jit_x64: Use ecx for shift register shl/shr only accept cl as their second argument --- src/video_core/macro/macro_jit_x64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 4eef342ec..389b58989 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -239,7 +239,7 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); shr(src, dst.cvt8()); @@ -258,7 +258,7 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { } void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, eax); + const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); if (opcode.bf_src_bit != 0) { -- cgit v1.2.3 From 6ce5f3120be6a65a798d3abc6fda0fe6171d0296 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Fri, 5 Jun 2020 01:42:19 +1000 Subject: Macro HLE support --- src/video_core/macro/macro_jit_x64.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/macro/macro_jit_x64.cpp') diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 30abb66e5..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr MacroJITx64::Compile(const std::vector& code) { return std::make_unique(maxwell3d, code); -- cgit v1.2.3