diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 10 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 |
10 files changed, 94 insertions, 14 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 16f95b77d..ee79260fc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1018,7 +1018,14 @@ public: } } instanced_arrays; - INSERT_UNION_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x4); + + union { + BitField<0, 1, u32> enable; + BitField<4, 8, u32> unk4; + } vp_point_size; + + INSERT_UNION_PADDING_WORDS(1); Cull cull; @@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); +ASSERT_REG_POSITION(vp_point_size, 0x644); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 { Trunc = 11, }; +enum class AtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -236,6 +248,13 @@ enum class StoreType : u64 { Bits128 = 6, }; +enum class AtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + S64 = 3, +}; + enum class IMinMaxExchange : u64 { None = 0, XLo = 1, @@ -939,6 +958,16 @@ union Instruction { } stg; union { + BitField<52, 4, AtomicOp> operation; + BitField<28, 2, AtomicType> type; + BitField<30, 22, s64> offset; + + s32 GetImmediateOffset() const { + return static_cast<s32>(offset << 2); + } + } atoms; + + union { BitField<32, 1, PhysicalAttributeDirection> direction; BitField<47, 3, AttributeSize> size; BitField<20, 11, u64> address; @@ -1659,9 +1688,10 @@ public: ST_A, ST_L, ST_S, - ST, // Store in generic memory - STG, // Store in global memory - AL2P, // Transforms attribute memory into physical memory + ST, // Store in generic memory + STG, // Store in global memory + ATOMS, // Atomic operation on shared memory + AL2P, // Transforms attribute memory into physical memory TEX, TEX_B, // Texture Load Bindless TXQ, // Texture Query @@ -1964,6 +1994,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 672051102..c428f06e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() { const auto& regs = system.GPU().Maxwell3D().regs; // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). + state.point.program_control = regs.vp_point_size.enable != 0; state.point.size = std::max(1.0f, regs.point_size); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9ceca768..2996aaf08 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1856,6 +1856,16 @@ private: Type::Uint}; } + template <const std::string_view& opname, Type type> + Expression Atomic(Operation operation) { + ASSERT(stage == ShaderType::Compute); + auto& smem = std::get<SmemNode>(*operation[0]); + + return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + Visit(operation[1]).As(type)), + type}; + } + Expression Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -2194,6 +2204,8 @@ private: &GLSLDecompiler::AtomicImage<Func::Xor>, &GLSLDecompiler::AtomicImage<Func::Exchange>, + &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index df2e2395a..cc185e9e1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() { } void OpenGLState::ApplyPointSize() { + Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fb180f302..678e5cd89 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -131,7 +131,8 @@ public: std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; struct { - float size = 1.0f; // GL_POINT_SIZE + bool program_control = false; // GL_PROGRAM_POINT_SIZE + GLfloat size = 1.0f; // GL_POINT_SIZE } point; struct { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b790b0ef4..e95eb069e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -44,7 +44,7 @@ struct FormatTuple { constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U - {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U @@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S + {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI - {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 @@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + u8* const mip_data = staging_buffer.data() + mip_offset; + const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, - static_cast<GLsizei>(params.GetHostMipmapSize(level)), - staging_buffer.data() + mip_offset); + glGetCompressedTextureImage(texture.handle, level, size, mip_data); } else { - glGetTextureImage(texture.handle, level, format, type, - static_cast<GLsizei>(params.GetHostMipmapSize(level)), - staging_buffer.data() + mip_offset); + glGetTextureImage(texture.handle, level, format, type, size, mip_data); } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8fe852ce8..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1796,6 +1796,11 @@ private: return {}; } + Expression UAtomicAdd(Operation) { + UNIMPLEMENTED(); + return {}; + } + Expression Branch(Operation operation) { const auto& target = std::get<ImmediateNode>(*operation[0]); OpStore(jmp_to, Constant(t_uint, target.GetValue())); @@ -2373,6 +2378,8 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, + &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::PushFlowStack, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cc84e935..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -16,6 +16,8 @@ namespace VideoCommon::Shader { +using Tegra::Shader::AtomicOp; +using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ATOMS: { + UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", + static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", + static_cast<int>(instr.atoms.type.Value())); + + const s32 offset = instr.atoms.GetImmediateOffset(); + Node address = GetRegister(instr.gpr8); + address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); + + Node memory = GetSharedMemory(std::move(address)); + Node data = GetRegister(instr.gpr20); + + Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4e155542a..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,6 +162,8 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void + UAtomicAdd, /// (smem, uint) -> uint + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void |
