diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 95 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 107 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 2 |
10 files changed, 203 insertions, 102 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { Exch = 8, }; +enum class GlobalAtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32_FTZ_RN = 3, + F16x2_FTZ_RN = 4, + S64 = 5, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -958,6 +980,12 @@ union Instruction { } stg; union { + BitField<52, 4, GlobalAtomicOp> operation; + BitField<49, 3, GlobalAtomicType> type; + BitField<28, 20, s64> offset; + } atom; + + union { BitField<52, 4, AtomicOp> operation; BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; @@ -1690,6 +1718,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1994,6 +2023,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..a1ac3d7a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1019,7 +1019,6 @@ private: } return {{"gl_ViewportIndex", Type::Int}}; case 3: - UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); return {{"gl_PointSize", Type::Float}}; } return {}; @@ -1858,10 +1857,7 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { - ASSERT(stage == ShaderType::Compute); - auto& smem = std::get<SmemNode>(*operation[0]); - - return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e95eb069e..d4b81cd87 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) { return GL_NONE; } +GLenum GetComponent(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8Z24: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { if (params.IsBuffer()) { return; @@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } @@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou if (new_swizzle == swizzle) return; swizzle = new_swizzle; - const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), - GetSwizzleSource(z_source), - GetSwizzleSource(w_source)}; + const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), + GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; const GLuint handle = GetTexture(); - glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + const PixelFormat format = surface.GetSurfaceParams().pixel_format; + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + case PixelFormat::S8Z24: + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + GetComponent(format, x_source == SwizzleSource::R)); + break; + default: + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + break; + } } OGLTextureView CachedSurfaceView::CreateTextureView() const { @@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), + static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), + static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), + static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), + buffers, is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 48e23d4cd..7ddf7d3ee 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,9 +325,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.tessellation.primitive = fixed_state.tessellation.primitive; specialization.tessellation.spacing = fixed_state.tessellation.spacing; specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; - for (const auto& rt : key.renderpass_params.color_attachments) { - specialization.enabled_rendertargets.set(rt.index); - } SPIRVProgram program; std::vector<vk::DescriptorSetLayoutBinding> bindings; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index dd6d2ef03..1ab22251e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -542,11 +542,10 @@ private: return; } - for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { - if (!specialization.enabled_rendertargets[rt]) { + for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) { + if (!IsRenderTargetEnabled(rt)) { continue; } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); Name(id, fmt::format("frag_color{}", rt)); Decorate(id, spv::Decoration::Location, rt); @@ -852,6 +851,15 @@ private: return binding; } + bool IsRenderTargetEnabled(u32 rt) const { + for (u32 component = 0; component < 4; ++component) { + if (header.ps.IsColorComponentOutputEnabled(rt, component)) { + return true; + } + } + return false; + } + bool IsInputAttributeArray() const { return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || stage == ShaderType::Geometry; @@ -1115,15 +1123,7 @@ private: } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - - Id offset = OpISub(t_uint, real, base); - offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); - return {OpLoad(t_float, - OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), - Type::Float}; + return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1134,10 +1134,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); - return {OpLoad(t_uint, pointer), Type::Uint}; + return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1331,20 +1328,10 @@ private: target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; + target = {GetSharedMemoryPointer(*smem), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), - Type::Float}; + target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; } else { UNIMPLEMENTED(); @@ -1796,11 +1783,16 @@ private: return {}; } - Expression UAtomicAdd(Operation operation) { - const auto& smem = std::get<SmemNode>(*operation[0]); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); + Expression AtomicAdd(Operation operation) { + Id pointer; + if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + pointer = GetSharedMemoryPointer(*smem); + } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + pointer = GetGlobalMemoryPointer(*gmem); + } else { + UNREACHABLE(); + return {Constant(t_uint, 0), Type::Uint}; + } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id semantics = Constant(t_uint, 0U); @@ -1889,19 +1881,14 @@ private: // rendertargets/components are skipped in the register assignment. u32 current_reg = 0; for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - if (!specialization.enabled_rendertargets[rt]) { - // Skip rendertargets that are not enabled - continue; - } // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { - const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component); - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - OpStore(pointer, SafeGetRegister(current_reg)); - ++current_reg; - } else { - OpStore(pointer, component == 3 ? v_float_one : v_float_zero); + if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { + continue; } + const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); + OpStore(pointer, SafeGetRegister(current_reg)); + ++current_reg; } } if (header.ps.omap.depth) { @@ -2240,6 +2227,22 @@ private: return {}; } + Id GetGlobalMemoryPointer(const GmemNode& gmem) { + const Id real = AsUint(Visit(gmem.GetRealAddress())); + const Id base = AsUint(Visit(gmem.GetBaseAddress())); + const Id diff = OpISub(t_uint, real, base); + const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + const Id buffer = global_buffers.at(gmem.GetDescriptor()); + return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); + } + + Id GetSharedMemoryPointer(const SmemNode& smem) { + ASSERT(stage == ShaderType::Compute); + Id address = AsUint(Visit(smem.GetAddress())); + address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); + return OpAccessChain(t_smem_uint, shared_memory, address); + } + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, @@ -2386,7 +2389,7 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::AtomicAdd, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -2482,9 +2485,9 @@ private: Id t_smem_uint{}; - const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); + Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); const Id t_gmem_struct = MemberDecorate( Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 10794be1c..f5dc14d9e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -102,9 +102,6 @@ struct Specialization final { Maxwell::TessellationSpacing spacing{}; bool clockwise{}; } tessellation; - - // Fragment specific - std::bitset<8> enabled_rendertargets; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v<Specialization>); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,9 +19,12 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { @@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { // To handle unaligned loads get the bytes used to dereference global memory and extract // those bytes from the loaded u32. if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), - std::move(offset), Immediate(size)); + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); @@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node value = GetRegister(instr.gpr0.Value() + i); if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, - Immediate(size)); + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", static_cast<int>(instr.atoms.operation.Value())); @@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node memory = GetSharedMemory(std::move(address)); Node data = GetRegister(instr.gpr20); - Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cd984f763..0b567e39d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -161,16 +161,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case OpCode::Id::TXD: { UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented"); + const bool is_array = instr.txd.is_array != 0; u64 base_reg = instr.gpr8.Value(); const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - const Sampler* sampler = is_bindless - ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) - : GetSampler(instr.sampler, {{texture_type, false, false}}); + const Sampler* sampler = + is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -179,6 +179,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexInstructionFloat(bb, instr, values); break; } + if (is_bindless) { base_reg++; } @@ -192,8 +193,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { derivates.push_back(GetRegister(derivate_reg + derivate + 1)); } + Node array_node = {}; + if (is_array) { + const Node info_reg = GetRegister(base_reg + coord_count); + array_node = BitfieldExtract(info_reg, 0, 16); + } + for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - UAtomicAdd, /// (smem, uint) -> uint + AtomicAdd, /// (memory, {u}int) -> {u}int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 829268b4c..84469b7ba 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i for (u32 level = 0; level < mipmaps; level++) { const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); + result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); } } return result; |
