diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 60 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 130 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 85 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 51 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 1166 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 9 |
13 files changed, 951 insertions, 682 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp new file mode 100644 index 000000000..7992b82c4 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_global_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/utils.h" + +namespace OpenGL { + +CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { + buffer.Create(); + // Bind and unbind the buffer so it gets allocated by the driver + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); +} + +GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) + : RasterizerCache{rasterizer} {} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h new file mode 100644 index 000000000..406a735bc --- /dev/null +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -0,0 +1,60 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +namespace GLShader { +class GlobalMemoryEntry; +} // namespace GLShader + +class RasterizerOpenGL; +class CachedGlobalRegion; +using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; + +class CachedGlobalRegion final : public RasterizerCacheObject { +public: + explicit CachedGlobalRegion(VAddr addr, u32 size); + + /// Gets the address of the shader in guest memory, required for cache management + VAddr GetAddr() const { + return addr; + } + + /// Gets the size of the shader in guest memory, required for cache management + std::size_t GetSizeInBytes() const { + return size; + } + + /// Gets the GL program handle for the buffer + GLuint GetBufferHandle() const { + return buffer.handle; + } + + // TODO(Rodrigo): When global memory is written (STG), implement flushing + void Flush() override { + UNIMPLEMENTED(); + } + +private: + VAddr addr{}; + u32 size{}; + + OGLBuffer buffer; +}; + +class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { +public: + explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9e93bd609..73567eb8c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -79,9 +79,29 @@ struct DrawParameters { } }; +struct FramebufferCacheKey { + bool is_single_buffer = false; + bool stencil_enable = false; + + std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{}; + u32 colors_count = 0; + + GLuint zeta = 0; + + auto Tie() const { + return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, + zeta); + } + + bool operator<(const FramebufferCacheKey& rhs) const { + return Tie() < rhs.Tie(); + } +}; + RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE) { + buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -90,9 +110,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo OpenGLState::ApplyDefaultState(); - // Create render framebuffer - framebuffer.Create(); - shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -276,7 +293,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. @@ -359,6 +376,46 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } SyncClipEnabled(clip_distances); + + gpu.dirty_flags.shaders = false; +} + +void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, + OpenGLState& current_state) { + const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); + auto& framebuffer = entry->second; + + if (is_cache_miss) + framebuffer.Create(); + + current_state.draw.draw_framebuffer = framebuffer.handle; + current_state.ApplyFramebufferState(); + + if (!is_cache_miss) + return; + + if (fbkey.is_single_buffer) { + if (fbkey.color_attachments[0] != GL_NONE) { + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0], + 0); + } + glDrawBuffer(fbkey.color_attachments[0]); + } else { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (fbkey.colors[index]) { + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), + fbkey.colors[index], 0); + } + } + glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); + } + + if (fbkey.zeta) { + GLenum zeta_attachment = + fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0); + } } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -444,10 +501,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0); // Bind the framebuffer surfaces - current_state.draw.draw_framebuffer = framebuffer.handle; - current_state.ApplyFramebufferState(); current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; + FramebufferCacheKey fbkey; + if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer @@ -463,14 +520,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->Texture().handle : 0, 0); - glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target)); + fbkey.is_single_buffer = true; + fbkey.color_attachments[0] = + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); + fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; } else { // Multiple color attachments are enabled - std::array<GLenum, Maxwell::NumRenderTargets> buffers; for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); @@ -485,22 +540,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us color_surface->GetSurfaceParams().srgb_conversion; } - buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), - GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0, - 0); + fbkey.color_attachments[index] = + GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); + fbkey.colors[index] = + color_surface != nullptr ? color_surface->Texture().handle : 0; } - glDrawBuffers(regs.rt_control.count, buffers.data()); + fbkey.is_single_buffer = false; + fbkey.colors_count = regs.rt_control.count; } } else { - // No color attachments are enabled - zero out all of them - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, - 0, 0); - } - glDrawBuffer(GL_NONE); + // No color attachments are enabled - leave them as zero + fbkey.is_single_buffer = true; } if (depth_surface) { @@ -508,22 +558,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // the shader doesn't actually write to it. depth_surface->MarkAsModified(true, res_cache); - if (regs.stencil_enable) { - // Attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - } else { - // Attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - // Clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } - } else { - // Clear both depth and stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); + fbkey.zeta = depth_surface->Texture().handle; + fbkey.stencil_enable = regs.stencil_enable; } + + SetupCachedFramebuffer(fbkey, current_state); + SyncViewport(current_state); } @@ -723,6 +763,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); + global_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); } @@ -976,8 +1017,11 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); Surface surface = res_cache.GetTextureSurface(texture, entry); if (surface != nullptr) { - state.texture_units[current_bindpoint].texture = surface->Texture().handle; - state.texture_units[current_bindpoint].target = surface->Target(); + const GLuint handle = + entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; + const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); + state.texture_units[current_bindpoint].texture = handle; + state.texture_units[current_bindpoint].target = target; state.texture_units[current_bindpoint].swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); state.texture_units[current_bindpoint].swizzle.g = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 988fa3e27..a53edee6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -40,6 +41,7 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; +struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -65,6 +67,10 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); + static constexpr std::size_t MaxGlobalMemorySize = 0x10000; + static_assert(MaxGlobalMemorySize % sizeof(float) == 0, + "The maximum size of a global memory must be a multiple of the size of float"); + private: class SamplerInfo { public: @@ -104,7 +110,7 @@ private: bool using_depth_fb = true, bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); - /* + /** * Configures the current constbuffers to use for the draw command. * @param stage The shader stage to configure buffers for. * @param shader The shader object that contains the specified stage. @@ -114,7 +120,7 @@ private: u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, GLenum primitive_mode, u32 current_bindpoint); - /* + /** * Configures the current textures to use for the draw command. * @param stage The shader stage to configure textures for. * @param shader The shader object that contains the specified stage. @@ -184,6 +190,7 @@ private: RasterizerCacheOpenGL res_cache; ShaderCacheOpenGL shader_cache; + GlobalRegionCacheOpenGL global_cache; Core::Frontend::EmuWindow& emu_window; @@ -195,11 +202,12 @@ private: OGLVertexArray> vertex_array_cache; + std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; + std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - OGLFramebuffer framebuffer; PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; @@ -214,6 +222,8 @@ private: void SetupShaders(GLenum primitive_mode); + void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); + enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5f4cdd119..bff0c65cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -44,6 +44,17 @@ struct FormatTuple { bool compressed; }; +static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) { + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); + if (max_mip_level == 1) { + glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0); + } +} + void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)}; @@ -101,8 +112,18 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); + + if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) { + // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, + // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also + // causes GetFormatType to properly return 'Depth' below). + params.pixel_format = PixelFormat::Z16; + } + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); + UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.unaligned_height = config.tic.Height(); @@ -257,7 +278,7 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U - {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI @@ -278,8 +299,6 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, true}, // BC6H_SF16 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F @@ -433,7 +452,7 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface, const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW); + glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY); if (source_format.compressed) { glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, static_cast<GLsizei>(src_params.size_in_bytes), nullptr); @@ -522,6 +541,9 @@ CachedSurface::CachedSurface(const SurfaceParams& params) glActiveTexture(GL_TEXTURE0); const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); + gl_internal_format = format_tuple.internal_format; + gl_is_compressed = format_tuple.compressed; + if (!format_tuple.compressed) { // Only pre-create the texture for non-compressed textures. switch (params.target) { @@ -550,15 +572,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) } } - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, - params.max_mip_level - 1); - if (params.max_mip_level == 1) { - glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0); - } + ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); LabelGLObject(GL_TEXTURE, texture.handle, params.addr, SurfaceParams::SurfaceTargetName(params.target)); @@ -610,18 +624,6 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo } } -static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - const u8 temp{data[offset]}; - data[offset] = data[offset + 1]; - data[offset + 1] = temp; - } - } -} - /** * Helper function to perform software conversion (as needed) when loading a buffer from Switch * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with @@ -654,12 +656,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. ConvertS8Z24ToZ24S8(data, width, height, false); break; - - case PixelFormat::G8R8U: - case PixelFormat::G8R8S: - // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. - ConvertG8R8ToR8G8(data, width, height); - break; } } @@ -671,8 +667,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, u32 width, u32 height) { switch (pixel_format) { - case PixelFormat::G8R8U: - case PixelFormat::G8R8S: case PixelFormat::ASTC_2D_4X4: case PixelFormat::ASTC_2D_8X8: case PixelFormat::ASTC_2D_4X4_SRGB: @@ -876,6 +870,31 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } +void CachedSurface::EnsureTextureView() { + if (texture_view.handle != 0) + return; + // Compressed texture are not being created with immutable storage + UNIMPLEMENTED_IF(gl_is_compressed); + + const GLenum target{TargetLayer()}; + + texture_view.Create(); + glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0, + params.max_mip_level, 0, 1); + + OpenGLState cur_state = OpenGLState::GetCurState(); + const auto& old_tex = cur_state.texture_units[0]; + SCOPE_EXIT({ + cur_state.texture_units[0] = old_tex; + cur_state.Apply(); + }); + cur_state.texture_units[0].texture = texture_view.handle; + cur_state.texture_units[0].target = target; + cur_state.Apply(); + + ApplyTextureDefaults(target, params.max_mip_level); +} + MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { if (params.type == SurfaceType::Fill) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index c710aa245..7223700c4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -293,10 +293,31 @@ public: return texture; } + const OGLTexture& TextureLayer() { + if (params.is_layered) { + return Texture(); + } + EnsureTextureView(); + return texture_view; + } + GLenum Target() const { return gl_target; } + GLenum TargetLayer() const { + using VideoCore::Surface::SurfaceTarget; + switch (params.target) { + case SurfaceTarget::Texture1D: + return GL_TEXTURE_1D_ARRAY; + case SurfaceTarget::Texture2D: + return GL_TEXTURE_2D_ARRAY; + case SurfaceTarget::TextureCubemap: + return GL_TEXTURE_CUBE_MAP_ARRAY; + } + return Target(); + } + const SurfaceParams& GetSurfaceParams() const { return params; } @@ -311,11 +332,16 @@ public: private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); + void EnsureTextureView(); + OGLTexture texture; + OGLTexture texture_view; std::vector<std::vector<u8>> gl_buffer; - SurfaceParams params; - GLenum gl_target; - std::size_t cached_size_in_bytes; + SurfaceParams params{}; + GLenum gl_target{}; + GLenum gl_internal_format{}; + bool gl_is_compressed{}; + std::size_t cached_size_in_bytes{}; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 038b25c75..c785fffa3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <boost/functional/hash.hpp> #include "common/assert.h" +#include "common/hash.h" #include "core/core.h" #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" @@ -66,14 +68,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) // stage here. setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); case Maxwell::ShaderProgram::VertexB: + CalculateProperties(); program_result = GLShader::GenerateVertexShader(setup); gl_type = GL_VERTEX_SHADER; break; case Maxwell::ShaderProgram::Geometry: + CalculateProperties(); program_result = GLShader::GenerateGeometryShader(setup); gl_type = GL_GEOMETRY_SHADER; break; case Maxwell::ShaderProgram::Fragment: + CalculateProperties(); program_result = GLShader::GenerateFragmentShader(setup); gl_type = GL_FRAGMENT_SHADER; break; @@ -140,9 +145,53 @@ GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, return target_program.handle; }; +static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // sched instructions appear once every 4 instructions. + static constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { + constexpr std::size_t start_offset = 10; + std::size_t offset = start_offset; + std::size_t size = start_offset * sizeof(u64); + while (offset < program.size()) { + const u64 inst = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if (inst == 0 || (inst >> 52) == 0x50b) { + break; + } + } + size += sizeof(inst); + offset++; + } + return size; +} + +void CachedShader::CalculateProperties() { + setup.program.real_size = CalculateProgramSize(setup.program.code); + setup.program.real_size_b = 0; + setup.program.unique_identifier = Common::CityHash64( + reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size); + if (program_type == Maxwell::ShaderProgram::VertexA) { + std::size_t seed = 0; + boost::hash_combine(seed, setup.program.unique_identifier); + setup.program.real_size_b = CalculateProgramSize(setup.program.code_b); + const u64 identifier_b = Common::CityHash64( + reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b); + boost::hash_combine(seed, identifier_b); + setup.program.unique_identifier = static_cast<u64>(seed); + } +} + ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { + if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<u32>(program)]; + } + const VAddr program_addr{GetShaderAddress(program)}; // Look up shader in the cache based on address @@ -154,7 +203,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Register(shader); } - return shader; + return last_shaders[static_cast<u32>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 08f470de3..768747968 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -4,6 +4,7 @@ #pragma once +#include <array> #include <map> #include <memory> @@ -67,6 +68,7 @@ public: 6, "ShaderTrianglesAdjacency"); default: UNREACHABLE_MSG("Unknown primitive mode."); + return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); } } @@ -81,6 +83,8 @@ private: GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, u32 max_vertices, const std::string& debug_name); + void CalculateProperties(); + VAddr addr; std::size_t shader_length; Maxwell::ShaderProgram program_type; @@ -112,6 +116,9 @@ public: /// Gets the current specified shader stage program Shader GetStageProgram(Maxwell::ShaderProgram program); + +private: + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0c1632bd1..1bb09e61b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -50,6 +50,14 @@ public: using std::runtime_error::runtime_error; }; +/// Generates code to use for a swizzle operation. +static std::string GetSwizzle(u64 elem) { + ASSERT(elem <= 3); + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + return swizzle; +} + /// Translate topology static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { @@ -201,14 +209,53 @@ private: } }; +template <typename T> +class ShaderScopedScope { +public: + explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) + : writer(writer), end_expr(std::move(end_expr)) { + + if (begin_expr.empty()) { + writer.AddLine('{'); + } else { + writer.AddExpression(begin_expr); + writer.AddLine(" {"); + } + ++writer.scope; + } + + ShaderScopedScope(const ShaderScopedScope&) = delete; + + ~ShaderScopedScope() { + --writer.scope; + if (end_expr.empty()) { + writer.AddLine('}'); + } else { + writer.AddExpression("} "); + writer.AddExpression(end_expr); + writer.AddLine(';'); + } + } + + ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; + +private: + T& writer; + std::string end_expr; +}; + class ShaderWriter { public: - void AddLine(std::string_view text) { + void AddExpression(std::string_view text) { DEBUG_ASSERT(scope >= 0); if (!text.empty()) { AppendIndentation(); } shader_source += text; + } + + void AddLine(std::string_view text) { + AddExpression(text); AddNewLine(); } @@ -228,6 +275,11 @@ public: return std::move(shader_source); } + ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, + std::string end_expr = {}) { + return ShaderScopedScope(*this, begin_expr, end_expr); + } + int scope = 0; private: @@ -295,6 +347,15 @@ public: BuildInputList(); } + void SetConditionalCodesFromExpression(const std::string& expresion) { + SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0"); + LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); + } + + void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) { + SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem))); + } + /** * Returns code that does an integer size conversion for the specified size. * @param value Value to perform integer size conversion on. @@ -311,7 +372,8 @@ public: // Default - do nothing return value; default: - UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); + UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); + return value; } } @@ -348,14 +410,24 @@ public: * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. * @param is_saturated Optional, when True, saturates the provided value. + * @param sets_cc Optional, when True, sets the corresponding values to the implemented + * condition flags. * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, - bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { - - SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, - dest_num_components, value_num_components, dest_elem, precise); + bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0, + bool precise = false) { + const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value; + SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem, + precise); + if (sets_cc) { + if (reg == Register::ZeroIndex) { + SetConditionalCodesFromExpression(clamped_value); + } else { + SetConditionalCodesFromRegister(reg, dest_elem); + } + } } /** @@ -366,25 +438,29 @@ public: * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. * @param is_saturated Optional, when True, saturates the provided value. + * @param sets_cc Optional, when True, sets the corresponding values to the implemented + * condition flags. * @param dest_elem Optional, the destination element to use for the operation. * @param size Register size to use for conversion instructions. */ void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, bool is_saturated = false, - u64 dest_elem = 0, Register::Size size = Register::Size::Word, - bool sets_cc = false) { + bool sets_cc = false, u64 dest_elem = 0, + Register::Size size = Register::Size::Word) { UNIMPLEMENTED_IF(is_saturated); - + const std::string final_value = ConvertIntegerSize(value, size); const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; - SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', - dest_num_components, value_num_components, dest_elem, false); + SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components, + value_num_components, dest_elem, false); if (sets_cc) { - const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; - SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); + if (reg == Register::ZeroIndex) { + SetConditionalCodesFromExpression(final_value); + } else { + SetConditionalCodesFromRegister(reg, dest_elem); + } } } @@ -417,10 +493,10 @@ public: // pack. I couldn't test this on hardware but it shouldn't really matter since most // of the time when a Mrg_* flag is used both components will be mirrored. That // being said, it deserves a test. - return "((" + GetRegisterAsInteger(reg, 0, false) + + return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; case Tegra::Shader::HalfMerge::Mrg_H1: - return "((" + GetRegisterAsInteger(reg, 0, false) + + return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; default: UNREACHABLE(); @@ -574,6 +650,7 @@ public: return "floatBitsToInt(" + value + ')'; } else { UNREACHABLE(); + return value; } } @@ -816,14 +893,12 @@ private: } if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + // This avoids optimizations of constant propagation and keeps the code as the original // Sadly using the precise keyword causes "linking" errors on fragment shaders. shader.AddLine("precise float tmp = " + src + ';'); shader.AddLine(dest + " = tmp;"); - --shader.scope; - shader.AddLine('}'); } else { shader.AddLine(dest + " = " + src + ';'); } @@ -878,7 +953,7 @@ private: case Attribute::Index::FrontFacing: // TODO(Subv): Find out what the values are for the other elements. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); - return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))"; + return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -962,14 +1037,6 @@ private: } } - /// Generates code to use for a swizzle operation. - static std::string GetSwizzle(u64 elem) { - ASSERT(elem <= 3); - std::string swizzle = "."; - swizzle += "xyzw"[elem]; - return swizzle; - } - ShaderWriter& shader; ShaderWriter& declarations; std::vector<GLSLRegister> regs; @@ -1231,7 +1298,7 @@ private: void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, const std::string& op_b, Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate) { + Tegra::Shader::Pred predicate, const bool set_cc) { std::string result{}; switch (logic_op) { case LogicOperation::And: { @@ -1255,7 +1322,7 @@ private: } if (dest != Tegra::Shader::Register::ZeroIndex) { - regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); } using Tegra::Shader::PredicateResultMode; @@ -1275,7 +1342,8 @@ private: } void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, - const std::string& op_c, const std::string& imm_lut) { + const std::string& op_c, const std::string& imm_lut, + const bool set_cc) { if (dest == Tegra::Shader::Register::ZeroIndex) { return; } @@ -1298,18 +1366,10 @@ private: result += ')'; - regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); } - void WriteTexsInstruction(const Instruction& instr, const std::string& coord, - const std::string& texture) { - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); - shader.AddLine("vec4 texture_tmp = " + texture + ';'); - + void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -1321,19 +1381,49 @@ private: if (written_components < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false, written_components % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false, written_components % 2); } ++written_components; } - --shader.scope; - shader.AddLine('}'); + } + + void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + std::array<std::string, 4> components; + u32 written_components = 0; + + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + components[written_components++] = texture + GetSwizzle(component); + } + if (written_components == 0) + return; + + const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { + return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; + }; + + regs.SetRegisterToHalfFloat( + instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + + if (written_components > 2) { + ASSERT(instr.texs.HasTwoDestinations()); + regs.SetRegisterToHalfFloat( + instr.gpr28, 0, + BuildComponent(components[2], components[3], written_components > 3), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + } } static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { @@ -1356,12 +1446,10 @@ private: * top. */ void EmitPushToFlowStack(u32 target) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); shader.AddLine("flow_stack_top++;"); - --shader.scope; - shader.AddLine('}'); } /* @@ -1369,13 +1457,11 @@ private: * popped address and decrementing the stack top. */ void EmitPopFromFlowStack() { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack_top--;"); shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); shader.AddLine("break;"); - --shader.scope; - shader.AddLine('}'); } /// Writes the output values from a fragment shader to the corresponding GLSL output variables. @@ -1487,6 +1573,252 @@ private: } } + std::pair<size_t, std::string> ValidateAndGetCoordinateElement( + const Tegra::Shader::TextureType texture_type, const bool depth_compare, + const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { + const size_t coord_count = TextureCoordinates(texture_type); + + size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); + const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); + if (total_coord_count > max_coords || total_reg_count > max_inputs) { + UNIMPLEMENTED_MSG("Unsupported Texture operation"); + total_coord_count = std::min(total_coord_count, max_coords); + } + // 1D.DC opengl is using a vec3 but 2nd component is ignored later. + total_coord_count += + (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) + ? 1 + : 0; + + constexpr std::array<const char*, 5> coord_container{ + {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(", + "vec4 coord = vec4("}}; + + return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]); + } + + std::string GetTextureCode(const Tegra::Shader::Instruction& instr, + const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, + const bool depth_compare, const bool is_array, + const size_t bias_offset) { + + if ((texture_type == Tegra::Shader::TextureType::Texture3D && + (is_array || depth_compare)) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && + depth_compare)) { + UNIMPLEMENTED_MSG("This method is not supported."); + } + + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ || + process_mode == Tegra::Shader::TextureProcessMode::LL || + process_mode == Tegra::Shader::TextureProcessMode::LLA; + + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. + const bool gl_lod_supported = !( + (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); + + const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; + std::string texture = read_method + sampler + ", coord"; + + UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None && + !gl_lod_supported); + + if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) { + if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { + texture += ", 0.0"; + } else { + // If present, lod or bias are always stored in the register indexed by the + // gpr20 + // field with an offset depending on the usage of the other registers + texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset); + } + } + texture += ")"; + return texture; + } + + std::pair<std::string, std::string> GetTEXCode( + const Instruction& instr, const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, + const bool is_array) { + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::string coord = coord_dcl; + for (size_t i = 0; i < coord_count;) { + coord += regs.GetRegisterAsFloat(coord_register + i); + ++i; + if (i != coord_count) { + coord += ','; + } + } + // 1D.DC in opengl the 2nd component is ignored. + if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) { + coord += ",0.0"; + } + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coord += ',' + regs.GetRegisterAsFloat(depth_register); + } + coord += ");"; + return std::make_pair( + coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0)); + } + + std::pair<std::string, std::string> GetTEXSCode( + const Instruction& instr, const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, + const bool is_array) { + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + const u64 last_coord_register = + (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + + std::string coord = coord_dcl; + for (size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i); + if (i < coord_count - 1) { + coord += ','; + } + } + + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coord += ',' + regs.GetRegisterAsFloat(depth_register); + } + coord += ");"; + + return std::make_pair(coord, + GetTextureCode(instr, texture_type, process_mode, depth_compare, + is_array, (coord_count > 2 ? 1 : 0))); + } + + std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr, + const Tegra::Shader::TextureType texture_type, + const bool depth_compare, const bool is_array) { + + const size_t coord_count = TextureCoordinates(texture_type); + const size_t total_coord_count = coord_count + (is_array ? 1 : 0); + const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); + + constexpr std::array<const char*, 5> coord_container{ + {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}}; + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::string coord = coord_container[total_coord_count]; + for (size_t i = 0; i < coord_count;) { + coord += regs.GetRegisterAsFloat(coord_register + i); + ++i; + if (i != coord_count) { + coord += ','; + } + } + + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + coord += ");"; + + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + std::string texture = "textureGather(" + sampler + ", coord, "; + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')'; + } else { + texture += std::to_string(instr.tld4.component) + ')'; + } + return std::make_pair(coord, texture); + } + + std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr, + const Tegra::Shader::TextureType texture_type, + const bool is_array) { + + const size_t coord_count = TextureCoordinates(texture_type); + const size_t total_coord_count = coord_count + (is_array ? 1 : 0); + const bool lod_enabled = + instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL; + + constexpr std::array<const char*, 4> coord_container{ + {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}}; + + std::string coord = coord_container[total_coord_count]; + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + + // if is array gpr20 is used + const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); + + const u64 last_coord_register = + ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + + for (size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i); + if (i < coord_count - 1) { + coord += ','; + } + } + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + coord += ");"; + + const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + std::string texture = "texelFetch(" + sampler + ", coords"; + + if (lod_enabled) { + // When lod is used always is in grp20 + texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')'; + } else { + texture += ", 0)"; + } + return std::make_pair(coord, texture); + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -1559,33 +1891,44 @@ private: UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5c68_1 != 0, - "FMUL tab5cb8_1({}) is not implemented", - instr.fmul.tab5c68_1.Value()); UNIMPLEMENTED_IF_MSG( instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0 .Value()); // SMO typical sends 1 here which seems to be the default - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMUL is not implemented"); op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, - instr.alu.saturate_d, 0, true); + std::string postfactor_op; + if (instr.fmul.postfactor != 0) { + s8 postfactor = static_cast<s8>(instr.fmul.postfactor); + + // postfactor encoded as 3-bit 1's complement in instruction, + // interpreted with below logic. + if (postfactor >= 4) { + postfactor = 7 - postfactor; + } else { + postfactor = 0 - postfactor; + } + + if (postfactor > 0) { + postfactor_op = " * " + std::to_string(1 << postfactor); + } else { + postfactor_op = " / " + std::to_string(1 << -postfactor); + } + } + + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1, + instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FADD is not implemented"); - op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Id::MUFU: { @@ -1593,31 +1936,31 @@ private: switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Sin: regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Ex2: regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Lg2: regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Rcp: regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Rsq: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Sqrt: regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; default: UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", @@ -1628,8 +1971,9 @@ private: case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMNMX is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in FMNMX is partially implemented"); op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); @@ -1640,7 +1984,7 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1, false, 0, true); + 1, 1, false, instr.generates_cc, 0, true); break; } case OpCode::Id::RRO_C: @@ -1665,18 +2009,16 @@ private: break; } case OpCode::Id::FMUL32_IMM: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FMUL32 is not implemented"); - - regs.SetRegisterToFloat(instr.gpr0, 0, - regs.GetRegisterAsFloat(instr.gpr8) + " * " + - GetImmediate32(instr), - 1, 1, instr.fmul32.saturate, 0, true); + regs.SetRegisterToFloat( + instr.gpr0, 0, + regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1, + instr.fmul32.saturate, instr.op_32.generates_cc, 0, true); break; } case OpCode::Id::FADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FADD32I is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.op_32.generates_cc, + "Condition codes generation in FADD32I is partially implemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = GetImmediate32(instr); @@ -1697,7 +2039,8 @@ private: op_b = "-(" + op_b + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, + instr.op_32.generates_cc, 0, true); break; } } @@ -1711,16 +2054,14 @@ private: switch (opcode->get().GetId()) { case OpCode::Id::BFE_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in BFE is not implemented"); - std::string inner_shift = '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; std::string outer_shift = '(' + inner_shift + " >> " + std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; - regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false, + instr.generates_cc); break; } default: { @@ -1731,8 +2072,6 @@ private: break; } case OpCode::Type::Bfi: { - UNIMPLEMENTED_IF(instr.generates_cc); - const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { switch (opcode->get().GetId()) { case OpCode::Id::BFI_IMM_R: @@ -1740,14 +2079,17 @@ private: std::to_string(instr.alu.GetSignedImm20_20())}; default: UNREACHABLE(); + return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), + std::to_string(instr.alu.GetSignedImm20_20())}; } }(); const std::string offset = '(' + packed_shift + " & 0xff)"; const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); - regs.SetRegisterToInteger( - instr.gpr0, false, 0, - "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1); + regs.SetRegisterToInteger(instr.gpr0, false, 0, + "bitfieldInsert(" + base + ", " + insert + ", " + offset + + ", " + bits + ')', + 1, 1, false, instr.generates_cc); break; } case OpCode::Type::Shift: { @@ -1769,9 +2111,6 @@ private: case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHR is not implemented"); - if (!instr.shift.is_signed) { // Logical shift right op_a = "uint(" + op_a + ')'; @@ -1779,7 +2118,7 @@ private: // Cast to int is superfluous for arithmetic shift, it's only for a logical shift regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', - 1, 1); + 1, 1, false, instr.generates_cc); break; } case OpCode::Id::SHL_C: @@ -1787,7 +2126,8 @@ private: case OpCode::Id::SHL_IMM: UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in SHL is not implemented"); - regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false, + instr.generates_cc); break; default: { UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); @@ -1801,18 +2141,17 @@ private: switch (opcode->get().GetId()) { case OpCode::Id::IADD32I: - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in IADD32I is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.op_32.generates_cc, + "Condition codes generation in IADD32I is partially implemented"); if (instr.iadd32i.negate_a) op_a = "-(" + op_a + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, - instr.iadd32i.saturate != 0); + instr.iadd32i.saturate, instr.op_32.generates_cc); break; case OpCode::Id::LOP32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in LOP32I is not implemented"); if (instr.alu.lop32i.invert_a) op_a = "~(" + op_a + ')'; @@ -1822,7 +2161,7 @@ private: WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, Tegra::Shader::PredicateResultMode::None, - Tegra::Shader::Pred::UnusedIndex); + Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); break; } default: { @@ -1851,7 +2190,7 @@ private: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD is not implemented"); + "Condition codes generation in IADD is partially implemented"); if (instr.alu_integer.negate_a) op_a = "-(" + op_a + ')'; @@ -1860,14 +2199,15 @@ private: op_b = "-(" + op_b + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, instr.generates_cc); break; } case OpCode::Id::IADD3_C: case OpCode::Id::IADD3_R: case OpCode::Id::IADD3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD3 is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in IADD3 is partially implemented"); std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); @@ -1923,14 +2263,16 @@ private: result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; } - regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false, + instr.generates_cc); break; } case OpCode::Id::ISCADD_C: case OpCode::Id::ISCADD_R: case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in ISCADD is partially implemented"); if (instr.alu_integer.negate_a) op_a = "-(" + op_a + ')'; @@ -1941,7 +2283,8 @@ private: const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); regs.SetRegisterToInteger(instr.gpr0, true, 0, - "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); + "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1, + false, instr.generates_cc); break; } case OpCode::Id::POPC_C: @@ -1965,8 +2308,6 @@ private: case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP is not implemented"); if (instr.alu.lop.invert_a) op_a = "~(" + op_a + ')'; @@ -1975,15 +2316,13 @@ private: op_b = "~(" + op_b + ')'; WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, + instr.generates_cc); break; } case OpCode::Id::LOP3_C: case OpCode::Id::LOP3_R: case OpCode::Id::LOP3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP3 is not implemented"); - const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); std::string lut; @@ -1993,15 +2332,16 @@ private: lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; } - WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut); + WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); break; } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IMNMX is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in IMNMX is partially implemented"); const std::string condition = GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); @@ -2009,7 +2349,7 @@ private: regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1); + 1, 1, false, instr.generates_cc); break; } case OpCode::Id::LEA_R2: @@ -2070,7 +2410,8 @@ private: UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; - regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false, + instr.generates_cc); break; } @@ -2175,7 +2516,7 @@ private: UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FFMA is not implemented"); + "Condition codes generation in FFMA is partially implemented"); switch (opcode->get().GetId()) { case OpCode::Id::FFMA_CR: { @@ -2206,7 +2547,7 @@ private: } regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', - 1, 1, instr.alu.saturate_d, 0, true); + 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Type::Hfma2: { @@ -2277,18 +2618,15 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1, instr.alu.saturate_d, 0, instr.conversion.dest_size, - instr.generates_cc.Value() != 0); + 1, instr.alu.saturate_d, instr.generates_cc, 0, + instr.conversion.dest_size); break; } case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.selector); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - std::string op_a{}; + std::string op_a; if (instr.is_b_gpr) { op_a = @@ -2310,14 +2648,12 @@ private: op_a = "-(" + op_a + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); break; } case OpCode::Id::F2F_R: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); if (instr.conversion.abs_a) { @@ -2349,14 +2685,13 @@ private: break; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, + instr.generates_cc); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: { UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); std::string op_a{}; if (instr.is_b_gpr) { @@ -2399,7 +2734,8 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1, false, 0, instr.conversion.dest_size); + 1, false, instr.generates_cc, 0, + instr.conversion.dest_size); break; } default: { @@ -2444,10 +2780,7 @@ private: case OpCode::Id::LD_C: { UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine("{"); - ++shader.scope; + const auto scope = shader.Scope(); shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); @@ -2473,19 +2806,13 @@ private: UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); } - - --shader.scope; - shader.AddLine("}"); break; } case OpCode::Id::LD_L: { UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", static_cast<unsigned>(instr.ld_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2502,9 +2829,6 @@ private: UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::ST_A: { @@ -2539,10 +2863,7 @@ private: UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", static_cast<unsigned>(instr.st_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2557,179 +2878,28 @@ private: UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; - std::string coord; const bool is_array = instr.tex.array != 0; - + const bool depth_compare = + instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - const bool depth_compare = - instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - u32 num_coordinates = TextureCoordinates(texture_type); - u32 start_index = 0; - std::string array_elem; - if (is_array) { - array_elem = regs.GetRegisterAsInteger(instr.gpr8); - start_index = 1; - } - const auto process_mode = instr.tex.GetTextureProcessMode(); - u32 start_index_b = 0; - std::string lod_value; - if (process_mode != Tegra::Shader::TextureProcessMode::LZ && - process_mode != Tegra::Shader::TextureProcessMode::None) { - start_index_b = 1; - lod_value = regs.GetRegisterAsFloat(instr.gpr20); - } - - std::string depth_value; - if (depth_compare) { - depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b); - } - - bool depth_compare_extra = false; + const auto [coord, texture] = + GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array); - switch (num_coordinates) { - case 1: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - if (is_array) { - if (depth_compare) { - coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " + - array_elem + ");"; - } else { - coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");"; - } - } else { - if (depth_compare) { - coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");"; - } else { - coord = "float coords = " + x + ';'; - } - } - break; - } - case 2: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); - if (is_array) { - if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value + - ", " + array_elem + ");"; - } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");"; - } - } else { - if (depth_compare) { - coord = - "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");"; - } else { - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } - break; - } - case 3: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); - const std::string z = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2); - if (is_array) { - depth_compare_extra = depth_compare; - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - array_elem + ");"; - } else { - if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - depth_value + ");"; - } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", - static_cast<u32>(num_coordinates)); - - // Fallback to interpreting as a 2D texture for now - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - texture_type = Tegra::Shader::TextureType::Texture2D; - } - - const std::string sampler = - GetSampler(instr.sampler, texture_type, is_array, depth_compare); - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); shader.AddLine(coord); - std::string texture; - switch (instr.tex.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::None: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, 0.0)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LB: - case Tegra::Shader::TextureProcessMode::LBA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set bias value along depth compare"); - } - break; - } - case Tegra::Shader::TextureProcessMode::LL: - case Tegra::Shader::TextureProcessMode::LLA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set lod value along depth compare"); - } - break; - } - default: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tex.GetTextureProcessMode())); - } - } - if (!depth_compare) { + if (depth_compare) { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1); + } else { shader.AddLine("vec4 texture_tmp = " + texture + ';'); std::size_t dest_elem{}; for (std::size_t elem = 0; elem < 4; ++elem) { @@ -2737,151 +2907,46 @@ private: // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, dest_elem); ++dest_elem; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEXS: { Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; - bool is_array{instr.texs.IsArrayTexture()}; + const bool is_array{instr.texs.IsArrayTexture()}; + const bool depth_compare = + instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + const auto process_mode = instr.texs.GetTextureProcessMode(); UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - const bool depth_compare = - instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - u32 num_coordinates = TextureCoordinates(texture_type); - const auto process_mode = instr.texs.GetTextureProcessMode(); - std::string lod_value; - std::string coord; - u32 lod_offset = 0; - if (process_mode == Tegra::Shader::TextureProcessMode::LL) { - if (num_coordinates > 2) { - lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); - lod_offset = 2; - } else { - lod_value = regs.GetRegisterAsFloat(instr.gpr20); - lod_offset = 1; - } - } + const auto scope = shader.Scope(); - switch (num_coordinates) { - case 1: { - coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';'; - break; - } - case 2: { - if (is_array) { - if (depth_compare) { - const std::string index = regs.GetRegisterAsInteger(instr.gpr8); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + - ");"; - } else { - const std::string index = regs.GetRegisterAsInteger(instr.gpr8); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; - } - } else { - if (lod_offset != 0) { - if (depth_compare) { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = - regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } else { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } else { - if (depth_compare) { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } else { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } - } - break; - } - case 3: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", - static_cast<u32>(num_coordinates)); + auto [coord, texture] = + GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); - // Fallback to interpreting as a 2D texture for now - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - texture_type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - } - const std::string sampler = - GetSampler(instr.sampler, texture_type, is_array, depth_compare); - std::string texture; - switch (process_mode) { - case Tegra::Shader::TextureProcessMode::None: { - texture = "texture(" + sampler + ", coords)"; - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (depth_compare && is_array) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "textureLod(" + sampler + ", coords, 0.0)"; - } - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - break; - } - default: { - texture = "texture(" + sampler + ", coords)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.texs.GetTextureProcessMode())); - } + shader.AddLine(coord); + + if (depth_compare) { + texture = "vec4(" + texture + ')'; } - if (!depth_compare) { - WriteTexsInstruction(instr, coord, texture); + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(instr, "texture_tmp"); } else { - WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); + WriteTexsInstructionHalfFloat(instr, "texture_tmp"); } - break; } case OpCode::Id::TLDS: { const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; - ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D); - ASSERT(is_array == false); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), @@ -2889,63 +2954,16 @@ private: UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), "MZ is not implemented"); - u32 extra_op_offset = 0; - - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; - std::string coords; - - switch (texture_type) { - case Tegra::Shader::TextureType::Texture1D: { - const std::string x = regs.GetRegisterAsInteger(instr.gpr8); - coords = "float coords = " + x + ';'; - break; - } - case Tegra::Shader::TextureType::Texture2D: { - UNIMPLEMENTED_IF_MSG(is_array, "Unhandled 2d array texture"); + const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array); - const std::string x = regs.GetRegisterAsInteger(instr.gpr8); - const std::string y = regs.GetRegisterAsInteger(instr.gpr20); - // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); - coords = "ivec2 coords = ivec2(" + x + ", " + y + ");"; - extra_op_offset = 1; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); - } - const std::string sampler = - GetSampler(instr.sampler, texture_type, is_array, false); - std::string texture = "texelFetch(" + sampler + ", coords, 0)"; - switch (instr.tlds.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::LZ: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - shader.AddLine( - "float lod = " + - regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';'); - texture = "texelFetch(" + sampler + ", coords, lod)"; - break; - } - default: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tlds.GetTextureProcessMode())); - } - } - WriteTexsInstruction(instr, coords, texture); + const auto scope = shader.Scope(); - --shader.scope; - shader.AddLine('}'); + shader.AddLine(coord); + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + WriteTexsInstructionFloat(instr, "texture_tmp"); break; } case OpCode::Id::TLD4: { - ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); - ASSERT(instr.tld4.array == 0); - std::string coord; UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); @@ -2955,64 +2973,30 @@ private: "NDV is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), "PTP is not implemented"); + + auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - auto texture_type = instr.tld4.texture_type.Value(); - u32 num_coordinates = TextureCoordinates(texture_type); - if (depth_compare) - num_coordinates += 1; - - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; + const bool is_array = instr.tld4.array != 0; - switch (num_coordinates) { - case 2: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); - break; - } - case 3: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); - shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", - static_cast<u32>(num_coordinates)); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); - texture_type = Tegra::Shader::TextureType::Texture2D; - } + const auto [coord, texture] = + GetTLD4Code(instr, texture_type, depth_compare, is_array); - const std::string sampler = - GetSampler(instr.sampler, texture_type, false, depth_compare); + const auto scope = shader.Scope(); - const std::string texture = "textureGather(" + sampler + ", coords, " + - std::to_string(instr.tld4.component) + ')'; + shader.AddLine(coord); + std::size_t dest_elem{}; - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); - std::size_t dest_elem{}; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, - dest_elem); - ++dest_elem; + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, + dest_elem); + ++dest_elem; } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TLD4S: { @@ -3023,45 +3007,42 @@ private: instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + std::string coords; const bool depth_compare = instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + const std::string sampler = GetSampler( instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); - if (!depth_compare) { - coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; - } else { - // Note: TLD4S coordinate encoding works just like TEXS's - const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"; - } - const std::string texture = "textureGather(" + sampler + ", coords, " + - std::to_string(instr.tld4s.component) + ')'; + + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + coords = "vec2 coords = vec2(" + op_a + ", "; + std::string texture = "textureGather(" + sampler + ", coords, "; if (!depth_compare) { - WriteTexsInstruction(instr, coords, texture); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + coords += op_b + ");"; + texture += std::to_string(instr.tld4s.component) + ')'; } else { - WriteTexsInstruction(instr, coords, "vec4(" + texture + ')'); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); + coords += op_b + ");"; + texture += op_c + ')'; } - - --shader.scope; - shader.AddLine('}'); + shader.AddLine(coords); + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + WriteTexsInstructionFloat(instr, "texture_tmp"); break; } case OpCode::Id::TXQ: { UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - ++shader.scope; - shader.AddLine('{'); - // TODO: the new commits on the texture refactor, change the way samplers work. + const auto scope = shader.Scope(); + + // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const std::string sampler = @@ -3072,7 +3053,8 @@ private: regs.GetRegisterAsInteger(instr.gpr8) + ')'; const std::string mip_level = "textureQueryLevels(" + sampler + ')'; shader.AddLine("ivec2 sizes = " + texture + ';'); - regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1); + + regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); @@ -3083,8 +3065,6 @@ private: static_cast<u32>(instr.txq.query_type.Value())); } } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TMML: { @@ -3099,17 +3079,18 @@ private: const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); - // TODO: add coordinates for different samplers once other texture types are + const auto scope = shader.Scope(); + + // TODO: Add coordinates for different samplers once other texture types are // implemented. - std::string coord; switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { - coord = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); break; } case Tegra::Shader::TextureType::Texture2D: { const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); break; } default: @@ -3117,22 +3098,15 @@ private: // Fallback to interpreting as a 2D texture for now const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); texture_type = Tegra::Shader::TextureType::Texture2D; } - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); + const std::string texture = "textureQueryLod(" + sampler + ", coords)"; - const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);"; - shader.AddLine(tmp); + shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); - --shader.scope; - shader.AddLine('}'); break; } default: { @@ -3268,7 +3242,7 @@ private: } case OpCode::Type::PredicateSetRegister: { UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); + "Condition codes generation in PSET is partially implemented"); const std::string op_a = GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); @@ -3285,10 +3259,11 @@ private: const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; if (instr.pset.bf == 0) { const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; - regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false, + instr.generates_cc); } else { const std::string value = '(' + result + ") ? 1.0 : 0.0"; - regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc); } break; } @@ -3353,6 +3328,7 @@ private: return std::to_string(instr.r2p.immediate_mask); default: UNREACHABLE(); + return std::to_string(instr.r2p.immediate_mask); } }(); const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + @@ -3404,14 +3380,11 @@ private: ") " + combiner + " (" + second_pred + "))"; if (instr.fset.bf) { - regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, + instr.generates_cc); } else { regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, - 1); - } - if (instr.generates_cc.Value() != 0) { - regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate); - LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete"); + 1, false, instr.generates_cc); } break; } @@ -3498,7 +3471,7 @@ private: UNIMPLEMENTED_IF(instr.xmad.sign_a); UNIMPLEMENTED_IF(instr.xmad.sign_b); UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); + "Condition codes generation in XMAD is partially implemented"); std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; std::string op_b; @@ -3584,7 +3557,8 @@ private: sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; } - regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, + instr.generates_cc); break; } default: { @@ -3788,8 +3762,7 @@ private: } regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, - instr.vmad.saturate == 1, 0, Register::Size::Word, - instr.vmad.cc); + instr.vmad.saturate, instr.vmad.cc); break; } case OpCode::Id::VSETP: { @@ -3816,7 +3789,10 @@ private: } break; } - default: { UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); } + default: { + UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); + break; + } } break; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 23ed91e27..5d0819dc5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <fmt/format.h> #include "common/assert.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -16,6 +17,8 @@ static constexpr u32 PROGRAM_OFFSET{10}; ProgramResult GenerateVertexShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); + out += "// Shader Unique Id: VS" + id + "\n\n"; out += Decompiler::GetCommonDeclarations(); out += R"( @@ -84,6 +87,8 @@ void main() { ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { // Version is intentionally skipped in shader generation, it's added by the lazy compilation. std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; + const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); + out += "// Shader Unique Id: GS" + id + "\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_geometry();\n"; @@ -117,6 +122,8 @@ void main() { ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); + out += "// Shader Unique Id: FS" + id + "\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_fragment();\n"; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 4fa6d7612..fcc20d3b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -177,6 +177,9 @@ struct ShaderSetup { struct { ProgramCode code; ProgramCode code_b; // Used for dual vertex shaders + u64 unique_identifier; + std::size_t real_size; + std::size_t real_size_b; } program; /// Used in scenarios where we have a dual vertex shaders diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4fd0d66c5..235732d86 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -138,7 +138,12 @@ void RendererOpenGL::SwapBuffers( // Load the framebuffer from memory, draw it to the screen, and swap buffers LoadFBToScreenInfo(*framebuffer); - DrawScreen(); + + if (renderer_settings.screenshot_requested) + CaptureScreenshot(); + + DrawScreen(render_window.GetFramebufferLayout()); + render_window.SwapBuffers(); } @@ -383,14 +388,13 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, /** * Draws the emulated screens to the emulator window. */ -void RendererOpenGL::DrawScreen() { +void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { if (renderer_settings.set_background_color) { // Update background color before drawing glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); } - const auto& layout = render_window.GetFramebufferLayout(); const auto& screen = layout.screen; glViewport(0, 0, layout.width, layout.height); @@ -414,6 +418,37 @@ void RendererOpenGL::DrawScreen() { /// Updates the framerate void RendererOpenGL::UpdateFramerate() {} +void RendererOpenGL::CaptureScreenshot() { + // Draw the current frame to the screenshot framebuffer + screenshot_framebuffer.Create(); + GLuint old_read_fb = state.draw.read_framebuffer; + GLuint old_draw_fb = state.draw.draw_framebuffer; + state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; + state.Apply(); + + Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; + + GLuint renderbuffer; + glGenRenderbuffers(1, &renderbuffer); + glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); + + DrawScreen(layout); + + glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + renderer_settings.screenshot_bits); + + screenshot_framebuffer.Release(); + state.draw.read_framebuffer = old_read_fb; + state.draw.draw_framebuffer = old_draw_fb; + state.Apply(); + glDeleteRenderbuffers(1, &renderbuffer); + + renderer_settings.screenshot_complete_callback(); + renderer_settings.screenshot_requested = false; +} + static const char* GetSource(GLenum source) { #define RET(s) \ case GL_DEBUG_SOURCE_##s: \ @@ -427,6 +462,7 @@ static const char* GetSource(GLenum source) { RET(OTHER); default: UNREACHABLE(); + return "Unknown source"; } #undef RET } @@ -445,6 +481,7 @@ static const char* GetType(GLenum type) { RET(MARKER); default: UNREACHABLE(); + return "Unknown type"; } #undef RET } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c0868c0e4..b85cc262f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -16,6 +16,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace Layout { +struct FramebufferLayout; +} + namespace OpenGL { /// Structure used for storing information about the textures for the Switch screen @@ -66,10 +70,12 @@ private: void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); - void DrawScreen(); + void DrawScreen(const Layout::FramebufferLayout& layout); void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); + void CaptureScreenshot(); + // Loads framebuffer from emulated memory into the display information structure void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); // Fills active OpenGL texture with the given RGBA color. @@ -82,6 +88,7 @@ private: OGLVertexArray vertex_array; OGLBuffer vertex_buffer; OGLProgram shader; + OGLFramebuffer screenshot_framebuffer; /// Display information for Switch screen ScreenInfo screen_info; |
