diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 22 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 2 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 127 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/decode/bfi.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/decode/shift.cpp | 113 |
12 files changed, 187 insertions, 149 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ab9bbf2e7..7b1912a66 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -704,8 +704,8 @@ public: INSERT_UNION_PADDING_WORDS(0x15); s32 stencil_back_func_ref; - u32 stencil_back_func_mask; u32 stencil_back_mask; + u32 stencil_back_func_mask; INSERT_UNION_PADDING_WORDS(0xC); @@ -1462,8 +1462,8 @@ ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); -ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6); -ASSERT_REG_POSITION(stencil_back_mask, 0x3D7); +ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); +ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(depth_bounds, 0x3E7); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cbb201114..402869fde 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -624,6 +624,19 @@ enum class ShuffleOperation : u64 { Bfly = 3, // shuffleXorNV }; +enum class ShfType : u64 { + Bits32 = 0, + U64 = 2, + S64 = 3, +}; + +enum class ShfXmode : u64 { + None = 0, + HI = 1, + X = 2, + XHI = 3, +}; + union Instruction { constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -776,6 +789,13 @@ union Instruction { } shr; union { + BitField<37, 2, ShfType> type; + BitField<48, 2, ShfXmode> xmode; + BitField<50, 1, u64> wrap; + BitField<20, 6, u64> immediate; + } shf; + + union { BitField<39, 5, u64> shift_amount; BitField<48, 1, u64> negate_b; BitField<49, 1, u64> negate_a; @@ -1708,6 +1728,7 @@ public: BFE_C, BFE_R, BFE_IMM, + BFI_RC, BFI_IMM_R, BRA, BRX, @@ -2135,6 +2156,7 @@ private: INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), + INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b9c5c41a2..062ca83b8 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -23,7 +23,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); + memory_manager = std::make_unique<Tegra::MemoryManager>(system); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 08dc96bb3..882e2d9c7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -86,7 +86,7 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; + using CommandQueue = Common::MPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; std::atomic<u64> signaled_fence{}; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 11848fbce..f1d50be3e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -9,13 +9,12 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" namespace Tegra { -MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : rasterizer{rasterizer}, system{system} { +MemoryManager::MemoryManager(Core::System& system) : system{system} { std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); std::fill(page_table.attributes.begin(), page_table.attributes.end(), Common::PageType::Unmapped); @@ -84,7 +83,8 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); - rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); + system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); + UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() ->VMManager() @@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); + system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); std::memcpy(dest_buffer, src_ptr, copy_amount); break; } @@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { u8* dest_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); + system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); std::memcpy(dest_ptr, src_buffer, copy_amount); break; } @@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std:: switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); + system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); WriteBlock(dest_addr, src_ptr, copy_amount); break; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index aea010087..393447eb4 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,10 +10,6 @@ #include "common/common_types.h" #include "common/page_table.h" -namespace VideoCore { -class RasterizerInterface; -} - namespace Core { class System; } @@ -51,7 +47,7 @@ struct VirtualMemoryArea { class MemoryManager final { public: - explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(Core::System& system); ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); @@ -176,7 +172,6 @@ private: Common::PageTable page_table{page_bits}; VMAMap vma_map; - VideoCore::RasterizerInterface& rasterizer; Core::System& system; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0537a2abe..b0eb14c8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -248,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { } GLintptr RasterizerOpenGL::SetupIndexBuffer() { - if (accelerate_draw != AccelDraw::Indexed) { - return 0; - } MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); @@ -546,7 +543,8 @@ void RasterizerOpenGL::Clear() { } } -void RasterizerOpenGL::DrawPrelude() { +void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { + MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); SyncRasterizeEnable(state); @@ -567,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() { buffer_cache.Acquire(); - // Draw the vertex batch - const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - std::size_t buffer_size = CalculateVertexArraysSize(); // Add space for index buffer @@ -596,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() { // Upload vertex and index data. SetupVertexBuffer(vao); SetupVertexInstances(vao); - index_buffer_offset = SetupIndexBuffer(); + + GLintptr index_buffer_offset; + if (is_indexed) { + index_buffer_offset = SetupIndexBuffer(); + } // Prepare packed bindings. bind_ubo_pushbuffer.Setup(); @@ -630,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() { // As all cached buffers are invalidated, we need to recheck their state. gpu.dirty.ResetVertexArrays(); } + gpu.dirty.memory_general = false; shader_program_manager->ApplyTo(state); state.Apply(); @@ -637,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() { if (texture_cache.TextureBarrier()) { glTextureBarrier(); } -} - -struct DrawParams { - bool is_indexed{}; - bool is_instanced{}; - GLenum primitive_mode{}; - GLint count{}; - GLint base_vertex{}; - - // Indexed settings - GLenum index_format{}; - GLintptr index_buffer_offset{}; - - // Instanced setting - GLint num_instances{}; - GLint base_instance{}; - - void DispatchDraw() { - if (is_indexed) { - const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); - if (is_instanced) { - glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, - index_buffer_ptr, num_instances, - base_vertex, base_instance); - } else { - glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, - base_vertex); - } - } else { - if (is_instanced) { - glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances, - base_instance); - } else { - glDrawArrays(primitive_mode, base_vertex, count); - } - } - } -}; - -bool RasterizerOpenGL::DrawBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto current_instance = maxwell3d.state.current_instance; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(1); - draw_call.base_instance = static_cast<GLint>(current_instance); - draw_call.is_instanced = current_instance > 0; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; + const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); + const GLsizei num_instances = + static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); + if (is_indexed) { + const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); + glDrawElementsInstancedBaseVertexBaseInstance( + primitive_mode, num_vertices, index_format, + reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex, + base_instance); } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); + const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); + glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances, + base_instance); } - draw_call.DispatchDraw(); +} - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; +bool RasterizerOpenGL::DrawBatch(bool is_indexed) { + Draw(is_indexed, false); return true; } bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto& draw_setup = maxwell3d.mme_draw; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count); - draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance); - draw_call.is_instanced = draw_setup.instance_count > 1; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; - } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); - } - draw_call.DispatchDraw(); - - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; + Draw(is_indexed, true); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a27cf497..0501f3828 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -103,7 +103,7 @@ private: std::size_t size); /// Syncs all the state, shaders, render targets and textures setting before a draw call. - void DrawPrelude(); + void Draw(bool is_indexed, bool is_instanced); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, const Shader& shader); @@ -220,12 +220,7 @@ private: GLintptr SetupIndexBuffer(); - GLintptr index_buffer_offset; - void SetupShaders(GLenum primitive_mode); - - enum class AccelDraw { Disabled, Arrays, Indexed }; - AccelDraw accelerate_draw = AccelDraw::Disabled; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ea4f35663..7ed505628 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::SignedInt: @@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::Float: @@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::UnsignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8: + return GL_UNSIGNED_BYTE; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); return {}; } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d2c6b1189..aada38702 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -571,7 +571,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { - texceptions.set(rt); + texceptions[rt] = true; } } @@ -579,7 +579,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { zeta_attachment = texture_cache.GetDepthBufferSurface(true); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { - texceptions.set(ZETA_TEXCEPTION_INDEX); + texceptions[ZETA_TEXCEPTION_INDEX] = true; } texture_cache.GuardRenderTargets(false); @@ -1122,11 +1122,12 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { const auto& rendertarget = regs.rt[rt]; - if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) + if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { continue; + } renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), - texceptions.test(rt)}); + texceptions[rt]}); } renderpass_params.has_zeta = regs.zeta_enable; diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 8be1119df..f992bbe2a 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { + const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { + case OpCode::Id::BFI_RC: + return {GetRegister(instr.gpr39), + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; case OpCode::Id::BFI_IMM_R: - return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; + return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: UNREACHABLE(); return {Immediate(0), Immediate(0)}; diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index d419e9c45..3b391d3e6 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -10,8 +10,80 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::ShfType; +using Tegra::Shader::ShfXmode; + +namespace { + +Node IsFull(Node shift) { + return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); +} + +Node Shift(OperationCode opcode, Node value, Node shift) { + Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); + Node shifted = Operation(opcode, move(value), shift); + return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); +} + +Node ClampShift(Node shift, s32 size = 32) { + shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); + return Operation(OperationCode::IMin, move(shift), Immediate(size)); +} + +Node WrapShift(Node shift, s32 size = 32) { + return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); +} + +Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); + Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); + } + + // And these when it's larger than or 32 + const bool is_signed = type == ShfType::S64; + const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(opcode, high, move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); + Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); + } + + // And these when it's larger than or 32 + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +} // Anonymous namespace u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { } }(); - switch (opcode->get().GetId()) { + switch (const auto opid = opcode->get().GetId(); opid) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - if (instr.shr.wrap) { - op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); - } else { - op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); - op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); - } + op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - std::move(op_a), std::move(op_b)); + move(op_a), move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); + Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, move(value)); + break; + } + case OpCode::Id::SHF_RIGHT_R: + case OpCode::Id::SHF_RIGHT_IMM: + case OpCode::Id::SHF_LEFT_R: + case OpCode::Id::SHF_LEFT_IMM: { + UNIMPLEMENTED_IF(instr.generates_cc); + UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", + static_cast<int>(instr.shf.xmode.Value())); + + if (instr.is_b_imm) { + op_b = Immediate(static_cast<u32>(instr.shf.immediate)); + } + const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; + Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); + + Node negated_shift = Operation(OperationCode::INegate, shift); + Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); + + const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; + Node value = (is_right ? ShiftRight : ShiftLeft)( + move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); + + SetRegister(bb, instr.gpr0, move(value)); break; } default: |
