diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 135 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 7 |
7 files changed, 137 insertions, 79 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index bca014a4a..78ba29fc1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -155,7 +155,6 @@ void Maxwell3D::ProcessQueryGet() { ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u32 value = Memory::Read32(*address); u64 result = 0; // TODO(Subv): Support the other query variables diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6cd08d28b..af7756266 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -79,6 +79,7 @@ union Attribute { constexpr explicit Attribute(u64 value) : value(value) {} enum class Index : u64 { + PointSize = 6, Position = 7, Attribute_0 = 8, Attribute_31 = 39, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9c8925383..591ec7998 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -78,6 +78,29 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { } } +std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const { + const u32 compression_factor{GetCompressionFactor(pixel_format)}; + const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; + u32 m_depth = (layer_only ? 1U : depth); + u32 m_width = std::max(1U, width / compression_factor); + u32 m_height = std::max(1U, height / compression_factor); + std::size_t size = Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, + m_depth, block_height, block_depth); + u32 m_block_height = block_height; + u32 m_block_depth = block_depth; + std::size_t block_size_bytes = 512 * block_height * block_depth; // 512 is GOB size + for (u32 i = 1; i < max_mip_level; i++) { + m_width = std::max(1U, m_width / 2); + m_height = std::max(1U, m_height / 2); + m_depth = std::max(1U, m_depth / 2); + m_block_height = std::max(1U, m_block_height / 2); + m_block_depth = std::max(1U, m_block_depth / 2); + size += Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, m_depth, + m_block_height, m_block_depth); + } + return is_tiled ? Common::AlignUp(size, block_size_bytes) : size; +} + /*static*/ SurfaceParams SurfaceParams::CreateForTexture( const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { SurfaceParams params{}; @@ -124,6 +147,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { break; } + params.is_layered = SurfaceTargetIsLayered(params.target); params.max_mip_level = config.tic.max_mip_level + 1; params.rt = {}; @@ -150,6 +174,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.target = SurfaceTarget::Texture2D; params.depth = 1; params.max_mip_level = 0; + params.is_layered = false; // Render target specific parameters, not used for caching params.rt.index = static_cast<u32>(index); @@ -182,6 +207,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { params.target = SurfaceTarget::Texture2D; params.depth = 1; params.max_mip_level = 0; + params.is_layered = false; params.rt = {}; params.InitCacheParameters(zeta_address); @@ -361,10 +387,11 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d } } -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), - SurfaceParams::MaxPixelFormat> - morton_to_gl_fns = { - // clang-format off +using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), + SurfaceParams::MaxPixelFormat>; + +static constexpr GLConversionArray morton_to_gl_fns = { + // clang-format off MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, MortonCopy<true, PixelFormat::ABGR8UI>, @@ -418,13 +445,11 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32FS8>, - // clang-format on + // clang-format on }; -static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), - SurfaceParams::MaxPixelFormat> - gl_to_morton_fns = { - // clang-format off +static constexpr GLConversionArray gl_to_morton_fns = { + // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::ABGR8UI>, @@ -479,9 +504,35 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32FS8>, - // clang-format on + // clang-format on }; +void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params, + std::vector<u8>& gl_buffer) { + u32 depth = params.depth; + if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { + // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. + depth = 1U; + } + if (params.is_layered) { + u64 offset = 0; + u64 offset_gl = 0; + u64 layer_size = params.LayerMemorySize(); + u64 gl_size = params.LayerSizeGL(); + for (u32 i = 0; i < depth; i++) { + functions[static_cast<std::size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, params.block_depth, 1, + gl_buffer.data() + offset_gl, gl_size, params.addr + offset); + offset += layer_size; + offset_gl += gl_size; + } + } else { + functions[static_cast<std::size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, params.block_depth, depth, + gl_buffer.data(), gl_buffer.size(), params.addr); + } +} + static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -881,21 +932,10 @@ void CachedSurface::LoadGLBuffer() { gl_buffer.resize(params.size_in_bytes_gl); if (params.is_tiled) { - u32 depth = params.depth; - u32 block_depth = params.block_depth; - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); - if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - block_depth = 1U; - } - - morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), - gl_buffer.size(), params.addr); + SwizzleFunc(morton_to_gl_fns, params, gl_buffer); } else { const auto texture_src_data{Memory::GetPointer(params.addr)}; const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; @@ -929,19 +969,10 @@ void CachedSurface::FlushGLBuffer() { const u8* const texture_src_data = Memory::GetPointer(params.addr); ASSERT(texture_src_data); if (params.is_tiled) { - u32 depth = params.depth; - u32 block_depth = params.block_depth; - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); - if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - } - gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), - gl_buffer.size(), GetAddr()); + SwizzleFunc(gl_to_morton_fns, params, gl_buffer); } else { std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes()); } @@ -1179,7 +1210,7 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - FlushRegion(src_params.addr, dst_params.size_in_bytes); + FlushRegion(src_params.addr, dst_params.MemorySize()); LoadSurface(dst_surface); } @@ -1221,44 +1252,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, CopySurface(old_surface, new_surface, copy_pbo.handle); } break; + case SurfaceParams::SurfaceTarget::TextureCubemap: case SurfaceParams::SurfaceTarget::Texture3D: AccurateCopySurface(old_surface, new_surface); break; - case SurfaceParams::SurfaceTarget::TextureCubemap: { - if (old_params.rt.array_mode != 1) { - // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this - // yet (array rendering used as a cubemap texture). - LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode); - UNREACHABLE(); - return new_surface; - } - - // This seems to be used for render-to-cubemap texture - ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); - ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); - ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); - - // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels. - // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild. - const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)}; - - for (std::size_t index = 0; index < new_params.depth; ++index) { - Surface face_surface{TryGetReservedSurface(old_params)}; - ASSERT_MSG(face_surface, "Unexpected"); - - if (is_blit) { - BlitSurface(face_surface, new_surface, read_framebuffer.handle, - draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index, - new_params.rt.index, index); - } else { - CopySurface(face_surface, new_surface, copy_pbo.handle, - face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index); - } - - old_params.addr += byte_stride; - } - break; - } default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(new_params.target)); @@ -1266,7 +1263,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, } return new_surface; -} +} // namespace OpenGL Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { return TryGet(addr); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0dd0d90a3..50a7ab47d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -168,6 +168,23 @@ struct SurfaceParams { } } + static bool SurfaceTargetIsLayered(SurfaceTarget target) { + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + return false; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + return true; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); + UNREACHABLE(); + return false; + } + } + /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a @@ -742,6 +759,25 @@ struct SurfaceParams { return size_in_bytes_gl / 6; } + /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. + std::size_t MemorySize() const { + std::size_t size = InnerMemorySize(is_layered); + if (is_layered) + return size * depth; + return size; + } + + /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including + /// mipmaps. + std::size_t LayerMemorySize() const { + return InnerMemorySize(true); + } + + /// Returns the size of a layer of this surface in OpenGL. + std::size_t LayerSizeGL() const { + return SizeInBytesRaw(true) / depth; + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); @@ -782,6 +818,7 @@ struct SurfaceParams { u32 unaligned_height; SurfaceTarget target; u32 max_mip_level; + bool is_layered; // Parameters used for caching VAddr addr; @@ -797,6 +834,9 @@ struct SurfaceParams { u32 layer_stride; u32 base_layer; } rt; + +private: + std::size_t InnerMemorySize(bool layer_only = false) const; }; }; // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fe4d1bd83..81ffb24e4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -6,6 +6,7 @@ #include <set> #include <string> #include <string_view> +#include <unordered_set> #include <boost/optional.hpp> #include <fmt/format.h> @@ -276,7 +277,8 @@ public: GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, const Tegra::Shader::Header& header) - : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} { + : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, + fixed_pipeline_output_attributes_used{} { BuildRegisterList(); BuildInputList(); } @@ -480,7 +482,12 @@ public: std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + " = " + src + ';'); } else { - shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + if (attribute == Attribute::Index::PointSize) { + fixed_pipeline_output_attributes_used.insert(attribute); + shader.AddLine(dest + " = " + src + ';'); + } else { + shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + } } } } @@ -524,6 +531,7 @@ public: /// Add declarations. void GenerateDeclarations(const std::string& suffix) { + GenerateVertex(); GenerateRegisters(suffix); GenerateInternalFlags(); GenerateInputAttrs(); @@ -683,6 +691,20 @@ private: declarations.AddNewLine(); } + void GenerateVertex() { + if (stage != Maxwell3D::Regs::ShaderStage::Vertex) + return; + declarations.AddLine("out gl_PerVertex {"); + ++declarations.scope; + declarations.AddLine("vec4 gl_Position;"); + for (auto& o : fixed_pipeline_output_attributes_used) { + if (o == Attribute::Index::PointSize) + declarations.AddLine("float gl_PointSize;"); + } + --declarations.scope; + declarations.AddLine("};"); + } + /// Generates code representing a temporary (GPR) register. std::string GetRegister(const Register& reg, unsigned elem) { if (reg == Register::ZeroIndex) { @@ -836,6 +858,8 @@ private: /// Generates code representing the declaration name of an output attribute register. std::string GetOutputAttribute(Attribute::Index attribute) { switch (attribute) { + case Attribute::Index::PointSize: + return "gl_PointSize"; case Attribute::Index::Position: return "position"; default: @@ -870,6 +894,7 @@ private: const Maxwell3D::Regs::ShaderStage& stage; const std::string& suffix; const Tegra::Shader::Header& header; + std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; }; class GLSLGenerator { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index e883ffb1d..dfb562706 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -19,9 +19,6 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) { out += Decompiler::GetCommonDeclarations(); out += R"( -out gl_PerVertex { - vec4 gl_Position; -}; layout (location = 0) out vec4 position; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f1b40e7f5..550ca856c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -142,7 +142,6 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 blocks_on_x = div_ceil(width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z; const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z; const u32 xy_block_size = gob_size * block_height; const u32 block_size = xy_block_size * block_depth; @@ -320,13 +319,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 gobs_in_x = 64 / bytes_per_pixel; + const u32 gobs_in_x = 64; const u32 gobs_in_y = 8; const u32 gobs_in_z = 1; - const u32 aligned_width = Common::AlignUp(width, gobs_in_x); + const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x); const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height); const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth); - return aligned_width * aligned_height * aligned_depth * bytes_per_pixel; + return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; } |
