aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/macro
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/macro')
-rw-r--r--src/video_core/macro/macro.cpp32
-rw-r--r--src/video_core/macro/macro.h3
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp24
3 files changed, 56 insertions, 3 deletions
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index a033d03be..e7279efcd 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,11 +2,15 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
+#include <fstream>
#include <optional>
+#include <span>
#include <boost/container_hash/hash.hpp>
#include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/settings.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
@@ -15,6 +19,23 @@
namespace Tegra {
+static void Dump(u64 hash, std::span<const u32> code) {
+ const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
+ const auto macro_dir{base_dir / "macros"};
+ if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
+ return;
+ }
+ const auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
+ std::fstream macro_file(name, std::ios::out | std::ios::binary);
+ if (!macro_file) {
+ LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
+ Common::FS::PathToUTF8String(name));
+ return;
+ }
+ macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
+}
+
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
@@ -24,6 +45,11 @@ void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
+void MacroEngine::ClearCode(u32 method) {
+ macro_cache.erase(method);
+ uploaded_macro_code.erase(method);
+}
+
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
@@ -54,6 +80,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
if (!mid_method.has_value()) {
cache_info.lle_program = Compile(macro_code->second);
cache_info.hash = boost::hash_value(macro_code->second);
+ if (Settings::values.dump_macros) {
+ Dump(cache_info.hash, macro_code->second);
+ }
} else {
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
const auto rebased_method = method - mid_method.value();
@@ -63,6 +92,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
code.size() * sizeof(u32));
cache_info.hash = boost::hash_value(code);
cache_info.lle_program = Compile(code);
+ if (Settings::values.dump_macros) {
+ Dump(cache_info.hash, code);
+ }
}
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 7e12c16dc..07d97ba39 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -117,6 +117,9 @@ public:
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
+ // Clear the code associated with a method.
+ void ClearCode(u32 method);
+
// Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(u32 method, const std::vector<u32>& parameters);
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index dc2b490d4..dc5376501 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -23,7 +23,8 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255
namespace Tegra {
namespace {
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
-constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
+constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
@@ -31,6 +32,7 @@ constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
RESULT,
+ MAX_PARAMETER,
PARAMETERS,
METHOD_ADDRESS,
BRANCH_HOLDER,
@@ -80,7 +82,7 @@ private:
u32 carry_flag{};
};
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
- using ProgramType = void (*)(JITState*, const u32*);
+ using ProgramType = void (*)(JITState*, const u32*, const u32*);
struct OptimizerState {
bool can_skip_carry{};
@@ -112,7 +114,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
JITState state{};
state.maxwell3d = &maxwell3d;
state.registers = {};
- program(&state, parameters.data());
+ program(&state, parameters.data(), parameters.data() + parameters.size());
}
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -488,6 +490,7 @@ void MacroJITx64Impl::Compile() {
// JIT state
mov(STATE, Common::X64::ABI_PARAM1);
mov(PARAMETERS, Common::X64::ABI_PARAM2);
+ mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
xor_(RESULT, RESULT);
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
@@ -598,7 +601,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
return true;
}
+static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
+ LOG_CRITICAL(HW_GPU,
+ "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
+ parameter, max_parameter - sizeof(u32));
+}
+
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
+ Xbyak::Label parameter_ok{};
+ cmp(PARAMETERS, MAX_PARAMETER);
+ jb(parameter_ok, T_NEAR);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(Common::X64::ABI_PARAM1, PARAMETERS);
+ mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
+ Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ L(parameter_ok);
mov(eax, dword[PARAMETERS]);
add(PARAMETERS, sizeof(u32));
return eax;