diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 40 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 104 |
7 files changed, 173 insertions, 22 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 625ecdfcd..65e0c469f 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -213,6 +213,18 @@ enum class XmadMode : u64 { CBcc = 4, }; +enum class IAdd3Mode : u64 { + None = 0, + RightShift = 1, + LeftShift = 2, +}; + +enum class IAdd3Height : u64 { + None = 0, + LowerHalfWord = 1, + UpperHalfWord = 2, +}; + enum class FlowCondition : u64 { Always = 0xF, Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? @@ -230,6 +242,8 @@ enum class TextureType : u64 { TextureCube = 3, }; +enum class IpaMode : u64 { Pass = 0, None = 1, Constant = 2, Sc = 3 }; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -313,6 +327,10 @@ union Instruction { } alu; union { + BitField<54, 3, IpaMode> mode; + } ipa; + + union { BitField<48, 1, u64> negate_b; } fmul; @@ -327,6 +345,10 @@ union Instruction { } alu_integer; union { + BitField<40, 1, u64> invert; + } popc; + + union { BitField<39, 3, u64> pred; BitField<42, 1, u64> neg_pred; } sel; @@ -339,6 +361,16 @@ union Instruction { } imnmx; union { + BitField<31, 2, IAdd3Height> height_c; + BitField<33, 2, IAdd3Height> height_b; + BitField<35, 2, IAdd3Height> height_a; + BitField<37, 2, IAdd3Mode> mode; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3; + + union { BitField<54, 1, u64> saturate; BitField<56, 1, u64> negate_a; } iadd32i; @@ -636,13 +668,16 @@ public: IADD_C, IADD_R, IADD_IMM, - IADD3_C, + IADD3_C, // Add 3 Integers IADD3_R, IADD3_IMM, IADD32I, ISCADD_C, // Scale and Add ISCADD_R, ISCADD_IMM, + POPC_C, + POPC_R, + POPC_IMM, SEL_C, SEL_R, SEL_IMM, @@ -864,6 +899,9 @@ private: INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), + INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), + INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), + INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 9758adcfd..e6d8e65c6 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -22,7 +22,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { } GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { - memory_manager = std::make_unique<MemoryManager>(); + memory_manager = std::make_unique<Tegra::MemoryManager>(); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); @@ -31,14 +31,22 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { GPU::~GPU() = default; -const Engines::Maxwell3D& GPU::Maxwell3D() const { +Engines::Maxwell3D& GPU::Maxwell3D() { return *maxwell_3d; } -Engines::Maxwell3D& GPU::Maxwell3D() { +const Engines::Maxwell3D& GPU::Maxwell3D() const { return *maxwell_3d; } +MemoryManager& GPU::MemoryManager() { + return *memory_manager; +} + +const MemoryManager& GPU::MemoryManager() const { + return *memory_manager; +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2697e1c27..2c3dbd97b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -117,18 +117,24 @@ public: /// Processes a command list stored at the specified address in GPU memory. void ProcessCommandList(GPUVAddr address, u32 size); + /// Returns a reference to the Maxwell3D GPU engine. + Engines::Maxwell3D& Maxwell3D(); + /// Returns a const reference to the Maxwell3D GPU engine. const Engines::Maxwell3D& Maxwell3D() const; - /// Returns a reference to the Maxwell3D GPU engine. - Engines::Maxwell3D& Maxwell3D(); + /// Returns a reference to the GPU memory manager. + Tegra::MemoryManager& MemoryManager(); - std::unique_ptr<MemoryManager> memory_manager; + /// Returns a const reference to the GPU memory manager. + const Tegra::MemoryManager& MemoryManager() const; private: /// Writes a single register in the engine bound to the specified subchannel void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params); + std::unique_ptr<Tegra::MemoryManager> memory_manager; + /// Mapping of command subchannels to their bound engine ids. std::unordered_map<u32, EngineID> bound_engines; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9951d8178..f014183b8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -211,7 +211,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_ptr += sizeof(ubo); buffer_offset += sizeof(ubo); - const Tegra::GPUVAddr addr{gpu.regs.code_address.CodeAddress() + shader_config.offset}; Shader shader{shader_cache.GetStageProgram(program)}; switch (program) { @@ -425,8 +424,8 @@ std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_pt std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); GLintptr uploaded_offset = buffer_offset; - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; - const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; Memory::ReadBlock(*cpu_addr, buffer_ptr, size); buffer_ptr += size; @@ -459,7 +458,6 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; - const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 65305000c..d87f90a62 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -168,8 +168,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType } VAddr SurfaceParams::GetCpuAddr() const { - const auto& gpu = Core::System::GetInstance().GPU(); - return *gpu.memory_manager->GpuToCpuAddress(addr); + auto& gpu = Core::System::GetInstance().GPU(); + return *gpu.MemoryManager().GpuToCpuAddress(addr); } static bool IsPixelFormatASTC(PixelFormat format) { @@ -220,14 +220,14 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - const auto& gpu = Core::System::GetInstance().GPU(); + auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual // pixel values. const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; const std::vector<u8> data = - Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, + Tegra::Texture::UnswizzleTexture(*gpu.MemoryManager().GpuToCpuAddress(addr), tile_size, bytes_per_pixel, stride, height, block_height); const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; gl_buffer.assign(data.begin(), data.begin() + size_to_copy); @@ -237,7 +237,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(), + Memory::GetPointer(*gpu.MemoryManager().GpuToCpuAddress(addr)), gl_buffer.data(), morton_to_gl); } } @@ -754,9 +754,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres return {}; } - const auto& gpu = Core::System::GetInstance().GPU(); + auto& gpu = Core::System::GetInstance().GPU(); // Don't try to create any entries in the cache if the address of the texture is invalid. - if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none) + if (gpu.MemoryManager().GpuToCpuAddress(params.addr) == boost::none) return {}; // Look up surface in the cache based on address @@ -848,7 +848,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, "reinterpretation but the texture is tiled."); } size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); - auto address = Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress( + auto address = Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress( new_params.addr + params.SizeInBytes()); std::vector<u8> data(remaining_size); Memory::ReadBlock(*address, data.data(), data.size()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c3d1d35e..326a901ba 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -14,9 +14,8 @@ namespace OpenGL { /// Gets the address for the specified shader stage program static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - - GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 94e318966..842bfa0b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1287,6 +1287,67 @@ private: instr.alu.saturate_d); break; } + case OpCode::Id::IADD3_C: + case OpCode::Id::IADD3_R: + case OpCode::Id::IADD3_IMM: { + std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); + + auto apply_height = [](auto height, auto& oprand) { + switch (height) { + case Tegra::Shader::IAdd3Height::None: + break; + case Tegra::Shader::IAdd3Height::LowerHalfWord: + oprand = "((" + oprand + ") & 0xFFFF)"; + break; + case Tegra::Shader::IAdd3Height::UpperHalfWord: + oprand = "((" + oprand + ") >> 16)"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled IADD3 height: {}", + static_cast<u32>(height.Value())); + UNREACHABLE(); + } + }; + + if (opcode->GetId() == OpCode::Id::IADD3_R) { + apply_height(instr.iadd3.height_a, op_a); + apply_height(instr.iadd3.height_b, op_b); + apply_height(instr.iadd3.height_c, op_c); + } + + if (instr.iadd3.neg_a) + op_a = "-(" + op_a + ')'; + + if (instr.iadd3.neg_b) + op_b = "-(" + op_b + ')'; + + if (instr.iadd3.neg_c) + op_c = "-(" + op_c + ')'; + + std::string result; + if (opcode->GetId() == OpCode::Id::IADD3_R) { + switch (instr.iadd3.mode) { + case Tegra::Shader::IAdd3Mode::RightShift: + // TODO(tech4me): According to + // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 + // The addition between op_a and op_b should be done in uint33, more + // investigation required + result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')'; + break; + case Tegra::Shader::IAdd3Mode::LeftShift: + result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')'; + break; + default: + result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; + break; + } + } else { + result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; + } + + regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1); + break; + } case OpCode::Id::ISCADD_C: case OpCode::Id::ISCADD_R: case OpCode::Id::ISCADD_IMM: { @@ -1302,6 +1363,15 @@ private: "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); break; } + case OpCode::Id::POPC_C: + case OpCode::Id::POPC_R: + case OpCode::Id::POPC_IMM: { + if (instr.popc.invert) { + op_b = "~(" + op_b + ')'; + } + regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1); + break; + } case OpCode::Id::SEL_C: case OpCode::Id::SEL_R: case OpCode::Id::SEL_IMM: { @@ -2039,7 +2109,39 @@ private: } case OpCode::Id::IPA: { const auto& attribute = instr.attribute.fmt28; - regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index); + const auto& reg = instr.gpr0; + switch (instr.ipa.mode) { + case Tegra::Shader::IpaMode::Pass: + if (stage == Maxwell3D::Regs::ShaderStage::Fragment && + attribute.index == Attribute::Index::Position) { + switch (attribute.element) { + case 0: + shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;"); + break; + case 1: + shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;"); + break; + case 2: + shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;"); + break; + case 3: + shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;"); + break; + } + } else { + regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); + } + break; + case Tegra::Shader::IpaMode::None: + regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}", + static_cast<u32>(instr.ipa.mode.Value())); + UNREACHABLE(); + regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index); + } + break; } case OpCode::Id::SSY: { |
