aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.h10
-rw-r--r--src/video_core/engines/shader_bytecode.h37
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp7
-rw-r--r--src/video_core/shader/decode/memory.cpp19
-rw-r--r--src/video_core/shader/node.h2
10 files changed, 94 insertions, 14 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 16f95b77d..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
}
} instanced_arrays;
- INSERT_UNION_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x4);
+
+ union {
+ BitField<0, 1, u32> enable;
+ BitField<4, 8, u32> unk4;
+ } vp_point_size;
+
+ INSERT_UNION_PADDING_WORDS(1);
Cull cull;
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..6f98bd827 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
Trunc = 11,
};
+enum class AtomicOp : u64 {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+};
+
enum class UniformType : u64 {
UnsignedByte = 0,
SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
Bits128 = 6,
};
+enum class AtomicType : u64 {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ S64 = 3,
+};
+
enum class IMinMaxExchange : u64 {
None = 0,
XLo = 1,
@@ -939,6 +958,16 @@ union Instruction {
} stg;
union {
+ BitField<52, 4, AtomicOp> operation;
+ BitField<28, 2, AtomicType> type;
+ BitField<30, 22, s64> offset;
+
+ s32 GetImmediateOffset() const {
+ return static_cast<s32>(offset << 2);
+ }
+ } atoms;
+
+ union {
BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size;
BitField<20, 11, u64> address;
@@ -1659,9 +1688,10 @@ public:
ST_A,
ST_L,
ST_S,
- ST, // Store in generic memory
- STG, // Store in global memory
- AL2P, // Transforms attribute memory into physical memory
+ ST, // Store in generic memory
+ STG, // Store in global memory
+ ATOMS, // Atomic operation on shared memory
+ AL2P, // Transforms attribute memory into physical memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
@@ -1964,6 +1994,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+ INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..c428f06e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
+ state.point.program_control = regs.vp_point_size.enable != 0;
state.point.size = std::max(1.0f, regs.point_size);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e9ceca768..2996aaf08 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1856,6 +1856,16 @@ private:
Type::Uint};
}
+ template <const std::string_view& opname, Type type>
+ Expression Atomic(Operation operation) {
+ ASSERT(stage == ShaderType::Compute);
+ auto& smem = std::get<SmemNode>(*operation[0]);
+
+ return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+ Visit(operation[1]).As(type)),
+ type};
+ }
+
Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -2194,6 +2204,8 @@ private:
&GLSLDecompiler::AtomicImage<Func::Xor>,
&GLSLDecompiler::AtomicImage<Func::Exchange>,
+ &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
}
void OpenGLState::ApplyPointSize() {
+ Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..678e5cd89 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
- float size = 1.0f; // GL_POINT_SIZE
+ bool program_control = false; // GL_PROGRAM_POINT_SIZE
+ GLfloat size = 1.0f; // GL_POINT_SIZE
} point;
struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..e95eb069e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
- {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
- {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S
+ {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
- {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+ u8* const mip_data = staging_buffer.data() + mip_offset;
+ const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
if (is_compressed) {
- glGetCompressedTextureImage(texture.handle, level,
- static_cast<GLsizei>(params.GetHostMipmapSize(level)),
- staging_buffer.data() + mip_offset);
+ glGetCompressedTextureImage(texture.handle, level, size, mip_data);
} else {
- glGetTextureImage(texture.handle, level, format, type,
- static_cast<GLsizei>(params.GetHostMipmapSize(level)),
- staging_buffer.data() + mip_offset);
+ glGetTextureImage(texture.handle, level, format, type, size, mip_data);
}
}
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8fe852ce8..0cf97cafa 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1796,6 +1796,11 @@ private:
return {};
}
+ Expression UAtomicAdd(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Expression Branch(Operation operation) {
const auto& target = std::get<ImmediateNode>(*operation[0]);
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2373,6 +2378,8 @@ private:
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
+ &SPIRVDecompiler::UAtomicAdd,
+
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
&SPIRVDecompiler::PushFlowStack,
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 8cc84e935..7591a715f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -16,6 +16,8 @@
namespace VideoCommon::Shader {
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::ATOMS: {
+ UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+ static_cast<int>(instr.atoms.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+ static_cast<int>(instr.atoms.type.Value()));
+
+ const s32 offset = instr.atoms.GetImmediateOffset();
+ Node address = GetRegister(instr.gpr8);
+ address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+ Node memory = GetSharedMemory(std::move(address));
+ Node data = GetRegister(instr.gpr20);
+
+ Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4e155542a..075c7d07c 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
+ UAtomicAdd, /// (smem, uint) -> uint
+
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void