aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp1
-rw-r--r--src/video_core/engines/shader_bytecode.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp135
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h40
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp29
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp3
-rw-r--r--src/video_core/textures/decoders.cpp7
7 files changed, 137 insertions, 79 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index bca014a4a..78ba29fc1 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -155,7 +155,6 @@ void Maxwell3D::ProcessQueryGet() {
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u32 value = Memory::Read32(*address);
u64 result = 0;
// TODO(Subv): Support the other query variables
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6cd08d28b..af7756266 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -79,6 +79,7 @@ union Attribute {
constexpr explicit Attribute(u64 value) : value(value) {}
enum class Index : u64 {
+ PointSize = 6,
Position = 7,
Attribute_0 = 8,
Attribute_31 = 39,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 9c8925383..591ec7998 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -78,6 +78,29 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
}
}
+std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
+ const u32 compression_factor{GetCompressionFactor(pixel_format)};
+ const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
+ u32 m_depth = (layer_only ? 1U : depth);
+ u32 m_width = std::max(1U, width / compression_factor);
+ u32 m_height = std::max(1U, height / compression_factor);
+ std::size_t size = Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height,
+ m_depth, block_height, block_depth);
+ u32 m_block_height = block_height;
+ u32 m_block_depth = block_depth;
+ std::size_t block_size_bytes = 512 * block_height * block_depth; // 512 is GOB size
+ for (u32 i = 1; i < max_mip_level; i++) {
+ m_width = std::max(1U, m_width / 2);
+ m_height = std::max(1U, m_height / 2);
+ m_depth = std::max(1U, m_depth / 2);
+ m_block_height = std::max(1U, m_block_height / 2);
+ m_block_depth = std::max(1U, m_block_depth / 2);
+ size += Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, m_depth,
+ m_block_height, m_block_depth);
+ }
+ return is_tiled ? Common::AlignUp(size, block_size_bytes) : size;
+}
+
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
SurfaceParams params{};
@@ -124,6 +147,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
break;
}
+ params.is_layered = SurfaceTargetIsLayered(params.target);
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
@@ -150,6 +174,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.max_mip_level = 0;
+ params.is_layered = false;
// Render target specific parameters, not used for caching
params.rt.index = static_cast<u32>(index);
@@ -182,6 +207,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.max_mip_level = 0;
+ params.is_layered = false;
params.rt = {};
params.InitCacheParameters(zeta_address);
@@ -361,10 +387,11 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
- SurfaceParams::MaxPixelFormat>
- morton_to_gl_fns = {
- // clang-format off
+using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
+ SurfaceParams::MaxPixelFormat>;
+
+static constexpr GLConversionArray morton_to_gl_fns = {
+ // clang-format off
MortonCopy<true, PixelFormat::ABGR8U>,
MortonCopy<true, PixelFormat::ABGR8S>,
MortonCopy<true, PixelFormat::ABGR8UI>,
@@ -418,13 +445,11 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<true, PixelFormat::Z24S8>,
MortonCopy<true, PixelFormat::S8Z24>,
MortonCopy<true, PixelFormat::Z32FS8>,
- // clang-format on
+ // clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
- SurfaceParams::MaxPixelFormat>
- gl_to_morton_fns = {
- // clang-format off
+static constexpr GLConversionArray gl_to_morton_fns = {
+ // clang-format off
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::ABGR8UI>,
@@ -479,9 +504,35 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32FS8>,
- // clang-format on
+ // clang-format on
};
+void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
+ std::vector<u8>& gl_buffer) {
+ u32 depth = params.depth;
+ if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
+ // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
+ depth = 1U;
+ }
+ if (params.is_layered) {
+ u64 offset = 0;
+ u64 offset_gl = 0;
+ u64 layer_size = params.LayerMemorySize();
+ u64 gl_size = params.LayerSizeGL();
+ for (u32 i = 0; i < depth; i++) {
+ functions[static_cast<std::size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, params.block_depth, 1,
+ gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+ offset += layer_size;
+ offset_gl += gl_size;
+ }
+ } else {
+ functions[static_cast<std::size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, params.block_depth, depth,
+ gl_buffer.data(), gl_buffer.size(), params.addr);
+ }
+}
+
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
@@ -881,21 +932,10 @@ void CachedSurface::LoadGLBuffer() {
gl_buffer.resize(params.size_in_bytes_gl);
if (params.is_tiled) {
- u32 depth = params.depth;
- u32 block_depth = params.block_depth;
-
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
- if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
- // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
- depth = 1U;
- block_depth = 1U;
- }
-
- morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
- gl_buffer.size(), params.addr);
+ SwizzleFunc(morton_to_gl_fns, params, gl_buffer);
} else {
const auto texture_src_data{Memory::GetPointer(params.addr)};
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -929,19 +969,10 @@ void CachedSurface::FlushGLBuffer() {
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
- u32 depth = params.depth;
- u32 block_depth = params.block_depth;
-
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
- if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
- // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
- depth = 1U;
- }
- gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
- gl_buffer.size(), GetAddr());
+ SwizzleFunc(gl_to_morton_fns, params, gl_buffer);
} else {
std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
}
@@ -1179,7 +1210,7 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- FlushRegion(src_params.addr, dst_params.size_in_bytes);
+ FlushRegion(src_params.addr, dst_params.MemorySize());
LoadSurface(dst_surface);
}
@@ -1221,44 +1252,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
CopySurface(old_surface, new_surface, copy_pbo.handle);
}
break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
case SurfaceParams::SurfaceTarget::Texture3D:
AccurateCopySurface(old_surface, new_surface);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap: {
- if (old_params.rt.array_mode != 1) {
- // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
- // yet (array rendering used as a cubemap texture).
- LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode);
- UNREACHABLE();
- return new_surface;
- }
-
- // This seems to be used for render-to-cubemap texture
- ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected");
- ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected");
- ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented");
-
- // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels.
- // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild.
- const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)};
-
- for (std::size_t index = 0; index < new_params.depth; ++index) {
- Surface face_surface{TryGetReservedSurface(old_params)};
- ASSERT_MSG(face_surface, "Unexpected");
-
- if (is_blit) {
- BlitSurface(face_surface, new_surface, read_framebuffer.handle,
- draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index,
- new_params.rt.index, index);
- } else {
- CopySurface(face_surface, new_surface, copy_pbo.handle,
- face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);
- }
-
- old_params.addr += byte_stride;
- }
- break;
- }
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(new_params.target));
@@ -1266,7 +1263,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
}
return new_surface;
-}
+} // namespace OpenGL
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
return TryGet(addr);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0dd0d90a3..50a7ab47d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -168,6 +168,23 @@ struct SurfaceParams {
}
}
+ static bool SurfaceTargetIsLayered(SurfaceTarget target) {
+ switch (target) {
+ case SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture3D:
+ return false;
+ case SurfaceTarget::Texture1DArray:
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubemap:
+ return true;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+ UNREACHABLE();
+ return false;
+ }
+ }
+
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
@@ -742,6 +759,25 @@ struct SurfaceParams {
return size_in_bytes_gl / 6;
}
+ /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
+ std::size_t MemorySize() const {
+ std::size_t size = InnerMemorySize(is_layered);
+ if (is_layered)
+ return size * depth;
+ return size;
+ }
+
+ /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
+ /// mipmaps.
+ std::size_t LayerMemorySize() const {
+ return InnerMemorySize(true);
+ }
+
+ /// Returns the size of a layer of this surface in OpenGL.
+ std::size_t LayerSizeGL() const {
+ return SizeInBytesRaw(true) / depth;
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
@@ -782,6 +818,7 @@ struct SurfaceParams {
u32 unaligned_height;
SurfaceTarget target;
u32 max_mip_level;
+ bool is_layered;
// Parameters used for caching
VAddr addr;
@@ -797,6 +834,9 @@ struct SurfaceParams {
u32 layer_stride;
u32 base_layer;
} rt;
+
+private:
+ std::size_t InnerMemorySize(bool layer_only = false) const;
};
}; // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fe4d1bd83..81ffb24e4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -6,6 +6,7 @@
#include <set>
#include <string>
#include <string_view>
+#include <unordered_set>
#include <boost/optional.hpp>
#include <fmt/format.h>
@@ -276,7 +277,8 @@ public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
const Tegra::Shader::Header& header)
- : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} {
+ : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
+ fixed_pipeline_output_attributes_used{} {
BuildRegisterList();
BuildInputList();
}
@@ -480,7 +482,12 @@ public:
std::to_string(static_cast<u32>(attribute)) + ']' +
GetSwizzle(elem) + " = " + src + ';');
} else {
- shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+ if (attribute == Attribute::Index::PointSize) {
+ fixed_pipeline_output_attributes_used.insert(attribute);
+ shader.AddLine(dest + " = " + src + ';');
+ } else {
+ shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+ }
}
}
}
@@ -524,6 +531,7 @@ public:
/// Add declarations.
void GenerateDeclarations(const std::string& suffix) {
+ GenerateVertex();
GenerateRegisters(suffix);
GenerateInternalFlags();
GenerateInputAttrs();
@@ -683,6 +691,20 @@ private:
declarations.AddNewLine();
}
+ void GenerateVertex() {
+ if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
+ return;
+ declarations.AddLine("out gl_PerVertex {");
+ ++declarations.scope;
+ declarations.AddLine("vec4 gl_Position;");
+ for (auto& o : fixed_pipeline_output_attributes_used) {
+ if (o == Attribute::Index::PointSize)
+ declarations.AddLine("float gl_PointSize;");
+ }
+ --declarations.scope;
+ declarations.AddLine("};");
+ }
+
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg, unsigned elem) {
if (reg == Register::ZeroIndex) {
@@ -836,6 +858,8 @@ private:
/// Generates code representing the declaration name of an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
+ case Attribute::Index::PointSize:
+ return "gl_PointSize";
case Attribute::Index::Position:
return "position";
default:
@@ -870,6 +894,7 @@ private:
const Maxwell3D::Regs::ShaderStage& stage;
const std::string& suffix;
const Tegra::Shader::Header& header;
+ std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
};
class GLSLGenerator {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index e883ffb1d..dfb562706 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -19,9 +19,6 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
out += Decompiler::GetCommonDeclarations();
out += R"(
-out gl_PerVertex {
- vec4 gl_Position;
-};
layout (location = 0) out vec4 position;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index f1b40e7f5..550ca856c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -142,7 +142,6 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
const u32 blocks_on_x = div_ceil(width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z;
const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
const u32 xy_block_size = gob_size * block_height;
const u32 block_size = xy_block_size * block_depth;
@@ -320,13 +319,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 gobs_in_x = 64 / bytes_per_pixel;
+ const u32 gobs_in_x = 64;
const u32 gobs_in_y = 8;
const u32 gobs_in_z = 1;
- const u32 aligned_width = Common::AlignUp(width, gobs_in_x);
+ const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x);
const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height);
const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth);
- return aligned_width * aligned_height * aligned_depth * bytes_per_pixel;
+ return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
}