aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h40
-rw-r--r--src/video_core/gpu.cpp14
-rw-r--r--src/video_core/gpu.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp104
7 files changed, 173 insertions, 22 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 625ecdfcd..65e0c469f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -213,6 +213,18 @@ enum class XmadMode : u64 {
CBcc = 4,
};
+enum class IAdd3Mode : u64 {
+ None = 0,
+ RightShift = 1,
+ LeftShift = 2,
+};
+
+enum class IAdd3Height : u64 {
+ None = 0,
+ LowerHalfWord = 1,
+ UpperHalfWord = 2,
+};
+
enum class FlowCondition : u64 {
Always = 0xF,
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
@@ -230,6 +242,8 @@ enum class TextureType : u64 {
TextureCube = 3,
};
+enum class IpaMode : u64 { Pass = 0, None = 1, Constant = 2, Sc = 3 };
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -313,6 +327,10 @@ union Instruction {
} alu;
union {
+ BitField<54, 3, IpaMode> mode;
+ } ipa;
+
+ union {
BitField<48, 1, u64> negate_b;
} fmul;
@@ -327,6 +345,10 @@ union Instruction {
} alu_integer;
union {
+ BitField<40, 1, u64> invert;
+ } popc;
+
+ union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> neg_pred;
} sel;
@@ -339,6 +361,16 @@ union Instruction {
} imnmx;
union {
+ BitField<31, 2, IAdd3Height> height_c;
+ BitField<33, 2, IAdd3Height> height_b;
+ BitField<35, 2, IAdd3Height> height_a;
+ BitField<37, 2, IAdd3Mode> mode;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> neg_b;
+ BitField<51, 1, u64> neg_a;
+ } iadd3;
+
+ union {
BitField<54, 1, u64> saturate;
BitField<56, 1, u64> negate_a;
} iadd32i;
@@ -636,13 +668,16 @@ public:
IADD_C,
IADD_R,
IADD_IMM,
- IADD3_C,
+ IADD3_C, // Add 3 Integers
IADD3_R,
IADD3_IMM,
IADD32I,
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
+ POPC_C,
+ POPC_R,
+ POPC_IMM,
SEL_C,
SEL_R,
SEL_IMM,
@@ -864,6 +899,9 @@ private:
INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
+ INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
+ INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
+ INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 9758adcfd..e6d8e65c6 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -22,7 +22,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
}
GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
- memory_manager = std::make_unique<MemoryManager>();
+ memory_manager = std::make_unique<Tegra::MemoryManager>();
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
@@ -31,14 +31,22 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
GPU::~GPU() = default;
-const Engines::Maxwell3D& GPU::Maxwell3D() const {
+Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
-Engines::Maxwell3D& GPU::Maxwell3D() {
+const Engines::Maxwell3D& GPU::Maxwell3D() const {
return *maxwell_3d;
}
+MemoryManager& GPU::MemoryManager() {
+ return *memory_manager;
+}
+
+const MemoryManager& GPU::MemoryManager() const {
+ return *memory_manager;
+}
+
u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
ASSERT(format != RenderTargetFormat::NONE);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2697e1c27..2c3dbd97b 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -117,18 +117,24 @@ public:
/// Processes a command list stored at the specified address in GPU memory.
void ProcessCommandList(GPUVAddr address, u32 size);
+ /// Returns a reference to the Maxwell3D GPU engine.
+ Engines::Maxwell3D& Maxwell3D();
+
/// Returns a const reference to the Maxwell3D GPU engine.
const Engines::Maxwell3D& Maxwell3D() const;
- /// Returns a reference to the Maxwell3D GPU engine.
- Engines::Maxwell3D& Maxwell3D();
+ /// Returns a reference to the GPU memory manager.
+ Tegra::MemoryManager& MemoryManager();
- std::unique_ptr<MemoryManager> memory_manager;
+ /// Returns a const reference to the GPU memory manager.
+ const Tegra::MemoryManager& MemoryManager() const;
private:
/// Writes a single register in the engine bound to the specified subchannel
void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
+
/// Mapping of command subchannels to their bound engine ids.
std::unordered_map<u32, EngineID> bound_engines;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9951d8178..f014183b8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -211,7 +211,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
buffer_ptr += sizeof(ubo);
buffer_offset += sizeof(ubo);
- const Tegra::GPUVAddr addr{gpu.regs.code_address.CodeAddress() + shader_config.offset};
Shader shader{shader_cache.GetStageProgram(program)};
switch (program) {
@@ -425,8 +424,8 @@ std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_pt
std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
GLintptr uploaded_offset = buffer_offset;
- const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
- const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)};
+ auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
+ const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
buffer_ptr += size;
@@ -459,7 +458,6 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
- const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 65305000c..d87f90a62 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -168,8 +168,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
}
VAddr SurfaceParams::GetCpuAddr() const {
- const auto& gpu = Core::System::GetInstance().GPU();
- return *gpu.memory_manager->GpuToCpuAddress(addr);
+ auto& gpu = Core::System::GetInstance().GPU();
+ return *gpu.MemoryManager().GpuToCpuAddress(addr);
}
static bool IsPixelFormatASTC(PixelFormat format) {
@@ -220,14 +220,14 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
Tegra::GPUVAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
- const auto& gpu = Core::System::GetInstance().GPU();
+ auto& gpu = Core::System::GetInstance().GPU();
if (morton_to_gl) {
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data =
- Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size,
+ Tegra::Texture::UnswizzleTexture(*gpu.MemoryManager().GpuToCpuAddress(addr), tile_size,
bytes_per_pixel, stride, height, block_height);
const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
@@ -237,7 +237,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(
stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(),
+ Memory::GetPointer(*gpu.MemoryManager().GpuToCpuAddress(addr)), gl_buffer.data(),
morton_to_gl);
}
}
@@ -754,9 +754,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
return {};
}
- const auto& gpu = Core::System::GetInstance().GPU();
+ auto& gpu = Core::System::GetInstance().GPU();
// Don't try to create any entries in the cache if the address of the texture is invalid.
- if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
+ if (gpu.MemoryManager().GpuToCpuAddress(params.addr) == boost::none)
return {};
// Look up surface in the cache based on address
@@ -848,7 +848,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
"reinterpretation but the texture is tiled.");
}
size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
- auto address = Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(
+ auto address = Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(
new_params.addr + params.SizeInBytes());
std::vector<u8> data(remaining_size);
Memory::ReadBlock(*address, data.data(), data.size());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3c3d1d35e..326a901ba 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -14,9 +14,8 @@ namespace OpenGL {
/// Gets the address for the specified shader stage program
static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-
- GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 94e318966..842bfa0b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1287,6 +1287,67 @@ private:
instr.alu.saturate_d);
break;
}
+ case OpCode::Id::IADD3_C:
+ case OpCode::Id::IADD3_R:
+ case OpCode::Id::IADD3_IMM: {
+ std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+
+ auto apply_height = [](auto height, auto& oprand) {
+ switch (height) {
+ case Tegra::Shader::IAdd3Height::None:
+ break;
+ case Tegra::Shader::IAdd3Height::LowerHalfWord:
+ oprand = "((" + oprand + ") & 0xFFFF)";
+ break;
+ case Tegra::Shader::IAdd3Height::UpperHalfWord:
+ oprand = "((" + oprand + ") >> 16)";
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled IADD3 height: {}",
+ static_cast<u32>(height.Value()));
+ UNREACHABLE();
+ }
+ };
+
+ if (opcode->GetId() == OpCode::Id::IADD3_R) {
+ apply_height(instr.iadd3.height_a, op_a);
+ apply_height(instr.iadd3.height_b, op_b);
+ apply_height(instr.iadd3.height_c, op_c);
+ }
+
+ if (instr.iadd3.neg_a)
+ op_a = "-(" + op_a + ')';
+
+ if (instr.iadd3.neg_b)
+ op_b = "-(" + op_b + ')';
+
+ if (instr.iadd3.neg_c)
+ op_c = "-(" + op_c + ')';
+
+ std::string result;
+ if (opcode->GetId() == OpCode::Id::IADD3_R) {
+ switch (instr.iadd3.mode) {
+ case Tegra::Shader::IAdd3Mode::RightShift:
+ // TODO(tech4me): According to
+ // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
+ // The addition between op_a and op_b should be done in uint33, more
+ // investigation required
+ result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')';
+ break;
+ case Tegra::Shader::IAdd3Mode::LeftShift:
+ result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')';
+ break;
+ default:
+ result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
+ break;
+ }
+ } else {
+ result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
+ }
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1);
+ break;
+ }
case OpCode::Id::ISCADD_C:
case OpCode::Id::ISCADD_R:
case OpCode::Id::ISCADD_IMM: {
@@ -1302,6 +1363,15 @@ private:
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
break;
}
+ case OpCode::Id::POPC_C:
+ case OpCode::Id::POPC_R:
+ case OpCode::Id::POPC_IMM: {
+ if (instr.popc.invert) {
+ op_b = "~(" + op_b + ')';
+ }
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1);
+ break;
+ }
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
@@ -2039,7 +2109,39 @@ private:
}
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
- regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
+ const auto& reg = instr.gpr0;
+ switch (instr.ipa.mode) {
+ case Tegra::Shader::IpaMode::Pass:
+ if (stage == Maxwell3D::Regs::ShaderStage::Fragment &&
+ attribute.index == Attribute::Index::Position) {
+ switch (attribute.element) {
+ case 0:
+ shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;");
+ break;
+ case 1:
+ shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;");
+ break;
+ case 2:
+ shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;");
+ break;
+ case 3:
+ shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;");
+ break;
+ }
+ } else {
+ regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+ }
+ break;
+ case Tegra::Shader::IpaMode::None:
+ regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}",
+ static_cast<u32>(instr.ipa.mode.Value()));
+ UNREACHABLE();
+ regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+ }
+
break;
}
case OpCode::Id::SSY: {