diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 962 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 378 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 712 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 221 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 188 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 75 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 27 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 149 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 47 |
13 files changed, 1813 insertions, 980 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4fdf93a3e..bcd1ae78d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,28 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> #include <memory> +#include <string> +#include <tuple> +#include <utility> #include <glad/glad.h> +#include "common/assert.h" #include "common/color.h" -#include "common/file_util.h" +#include "common/logging/log.h" #include "common/math_util.h" -#include "common/microprofile.h" -#include "common/profiler.h" +#include "common/vector_math.h" -#include "core/memory.h" -#include "core/settings.h" #include "core/hw/gpu.h" #include "video_core/pica.h" #include "video_core/pica_state.h" -#include "video_core/utils.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/renderer_opengl/renderer_opengl.h" static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && @@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { stage.GetAlphaMultiplier() == 1); } -RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } -RasterizerOpenGL::~RasterizerOpenGL() { } - -void RasterizerOpenGL::InitObjects() { +RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() { uniform_block_data.dirty = true; + for (unsigned index = 0; index < lighting_luts.size(); index++) { + uniform_block_data.lut_dirty[index] = true; + } + // Set vertex attributes glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); @@ -75,88 +76,47 @@ void RasterizerOpenGL::InitObjects() { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); - SetShader(); - - // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation - fb_color_texture.texture.Create(); - ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1); - - state.texture_units[0].texture_2d = fb_color_texture.texture.handle; - state.Apply(); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - state.texture_units[0].texture_2d = 0; - state.Apply(); - - fb_depth_texture.texture.Create(); - ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1); - - state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; - state.Apply(); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); - - state.texture_units[0].texture_2d = 0; - state.Apply(); - - // Configure OpenGL framebuffer + // Create render framebuffer framebuffer.Create(); - state.draw.framebuffer = framebuffer.handle; + // Allocate and bind lighting lut textures + for (size_t i = 0; i < lighting_luts.size(); ++i) { + lighting_luts[i].Create(); + state.lighting_luts[i].texture_1d = lighting_luts[i].handle; + } state.Apply(); - glActiveTexture(GL_TEXTURE0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); - - for (size_t i = 0; i < lighting_lut.size(); ++i) { - lighting_lut[i].Create(); - state.lighting_lut[i].texture_1d = lighting_lut[i].handle; - - glActiveTexture(GL_TEXTURE3 + i); - glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); - + for (size_t i = 0; i < lighting_luts.size(); ++i) { + glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i)); glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); } - state.Apply(); - GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); - ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE, - "OpenGL rasterizer framebuffer setup failed, status %X", status); -} - -void RasterizerOpenGL::Reset() { + // Sync fixed function OpenGL state SyncCullMode(); - SyncDepthModifiers(); SyncBlendEnabled(); SyncBlendFuncs(); SyncBlendColor(); SyncLogicOp(); SyncStencilTest(); SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); +} - SetShader(); +RasterizerOpenGL::~RasterizerOpenGL() { - res_cache.InvalidateAll(); } /** @@ -193,47 +153,98 @@ void RasterizerOpenGL::DrawTriangles() { if (vertex_batch.empty()) return; - SyncFramebuffer(); - SyncDrawState(); + const auto& regs = Pica::g_state.regs; + + // Sync and bind the framebuffer surfaces + CachedSurface* color_surface; + CachedSurface* depth_surface; + MathUtil::Rectangle<int> rect; + std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer); + + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); + bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); - if (state.draw.shader_dirty) { + if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return; + } + + // Sync the viewport + // These registers hold half-width and half-height, so must be multiplied by 2 + GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; + + glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), + (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), + (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); + + // Sync and bind the texture surfaces + const auto pica_textures = regs.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + texture_samplers[texture_index].SyncWithConfig(texture.config); + CachedSurface* surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + state.texture_units[texture_index].texture_2d = surface->texture.handle; + } else { + // Can occur when texture addr is null or its memory is unmapped/invalid + state.texture_units[texture_index].texture_2d = 0; + } + } else { + state.texture_units[texture_index].texture_2d = 0; + } + } + + // Sync and bind the shader + if (shader_dirty) { SetShader(); - state.draw.shader_dirty = false; + shader_dirty = false; } - for (unsigned index = 0; index < lighting_lut.size(); index++) { + // Sync the lighting luts + for (unsigned index = 0; index < lighting_luts.size(); index++) { if (uniform_block_data.lut_dirty[index]) { SyncLightingLUT(index); uniform_block_data.lut_dirty[index] = false; } } + // Sync the uniform data if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); uniform_block_data.dirty = false; } + state.Apply(); + + // Draw the vertex batch glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); - vertex_batch.clear(); - - // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture - const auto& regs = Pica::g_state.regs; - - u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) - * fb_color_texture.width * fb_color_texture.height; - - u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) - * fb_depth_texture.width * fb_depth_texture.height; + // Mark framebuffer surfaces as dirty + // TODO: Restrict invalidation area to the viewport + if (color_surface != nullptr) { + color_surface->dirty = true; + res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true); + } + if (depth_surface != nullptr) { + depth_surface->dirty = true; + res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true); + } - res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); - res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true); -} + vertex_batch.clear(); -void RasterizerOpenGL::FlushFramebuffer() { - CommitColorBuffer(); - CommitDepthBuffer(); + // Unbind textures for potential future use as framebuffer attachments + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + state.texture_units[texture_index].texture_2d = 0; + } + state.Apply(); } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { @@ -247,8 +258,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Depth modifiers case PICA_REG_INDEX(viewport_depth_range): - case PICA_REG_INDEX(viewport_depth_far_plane): - SyncDepthModifiers(); + SyncDepthScale(); + break; + case PICA_REG_INDEX(viewport_depth_near_plane): + SyncDepthOffset(); + break; + + // Depth buffering + case PICA_REG_INDEX(depthmap_enable): + shader_dirty = true; break; // Blending @@ -265,18 +283,39 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Alpha test case PICA_REG_INDEX(output_merger.alpha_test): SyncAlphaTest(); - state.draw.shader_dirty = true; + shader_dirty = true; break; - // Stencil test + // Sync GL stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) case PICA_REG_INDEX(output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; case PICA_REG_INDEX(output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.depth_format): SyncStencilTest(); break; - // Depth test + // Sync GL depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) case PICA_REG_INDEX(output_merger.depth_test_enable): SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync GL depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync GL color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.allow_color_write): + SyncColorWriteMask(); break; // Logic op @@ -284,6 +323,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLogicOp(); break; + // Texture 0 type + case PICA_REG_INDEX(texture0.type): + shader_dirty = true; + break; + // TEV stages case PICA_REG_INDEX(tev_stage0.color_source1): case PICA_REG_INDEX(tev_stage0.color_modifier1): @@ -310,7 +354,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_stage5.color_op): case PICA_REG_INDEX(tev_stage5.color_scale): case PICA_REG_INDEX(tev_combiner_buffer_input): - state.draw.shader_dirty = true; + shader_dirty = true; break; case PICA_REG_INDEX(tev_stage0.const_r): SyncTevConstColor(0, regs.tev_stage0); @@ -497,41 +541,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { } } +void RasterizerOpenGL::FlushAll() { + res_cache.FlushAll(); +} + void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { - const auto& regs = Pica::g_state.regs; + res_cache.FlushRegion(addr, size, nullptr, false); +} + +void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { + res_cache.FlushRegion(addr, size, nullptr, true); +} - u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) - * fb_color_texture.width * fb_color_texture.height; +bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + using PixelFormat = CachedSurface::PixelFormat; + using SurfaceType = CachedSurface::SurfaceType; - u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) - * fb_depth_texture.width * fb_depth_texture.height; + if (config.is_texture_copy) { + // TODO(tfarley): Try to hardware accelerate this + return false; + } + + CachedSurface src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); + + CachedSurface dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); + + MathUtil::Rectangle<int> src_rect; + CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + + if (src_surface == nullptr) { + return false; + } - // If source memory region overlaps 3DS framebuffers, commit them before the copy happens - if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) - CommitColorBuffer(); + // Require destination surface to have same resolution scale as source to preserve scaling + dst_params.res_scale_width = src_surface->res_scale_width; + dst_params.res_scale_height = src_surface->res_scale_height; - if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) - CommitDepthBuffer(); + MathUtil::Rectangle<int> dst_rect; + CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect); + + if (dst_surface == nullptr) { + return false; + } + + // Don't accelerate if the src and dst surfaces are the same + if (src_surface == dst_surface) { + return false; + } + + if (config.flip_vertically) { + std::swap(dst_rect.top, dst_rect.bottom); + } + + if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; + dst_surface->dirty = true; + res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); + return true; } -void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { - const auto& regs = Pica::g_state.regs; +bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + using PixelFormat = CachedSurface::PixelFormat; + using SurfaceType = CachedSurface::SurfaceType; + + CachedSurface* dst_surface = res_cache.TryGetFillSurface(config); + + if (dst_surface == nullptr) { + return false; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format); + + GLuint old_fb = cur_state.draw.draw_framebuffer; + cur_state.draw.draw_framebuffer = framebuffer.handle; + // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected + cur_state.Apply(); + + if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return false; + } + + GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + + // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases + // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/... + // Currently only handles formats that are multiples of the fill value size + + if (config.fill_24bit) { + switch (dst_surface->pixel_format) { + case PixelFormat::RGB8: + color_values[0] = config.value_24bit_r / 255.0f; + color_values[1] = config.value_24bit_g / 255.0f; + color_values[2] = config.value_24bit_b / 255.0f; + break; + default: + return false; + } + } else if (config.fill_32bit) { + u32 value = config.value_32bit; + + switch (dst_surface->pixel_format) { + case PixelFormat::RGBA8: + color_values[0] = (value >> 24) / 255.0f; + color_values[1] = ((value >> 16) & 0xFF) / 255.0f; + color_values[2] = ((value >> 8) & 0xFF) / 255.0f; + color_values[3] = (value & 0xFF) / 255.0f; + break; + default: + return false; + } + } else { + u16 value_16bit = config.value_16bit.Value(); + Math::Vec4<u8> color; + + switch (dst_surface->pixel_format) { + case PixelFormat::RGBA8: + color_values[0] = (value_16bit >> 8) / 255.0f; + color_values[1] = (value_16bit & 0xFF) / 255.0f; + color_values[2] = color_values[0]; + color_values[3] = color_values[1]; + break; + case PixelFormat::RGB5A1: + color = Color::DecodeRGB5A1((const u8*)&value_16bit); + color_values[0] = color[0] / 31.0f; + color_values[1] = color[1] / 31.0f; + color_values[2] = color[2] / 31.0f; + color_values[3] = color[3]; + break; + case PixelFormat::RGB565: + color = Color::DecodeRGB565((const u8*)&value_16bit); + color_values[0] = color[0] / 31.0f; + color_values[1] = color[1] / 63.0f; + color_values[2] = color[2] / 31.0f; + break; + case PixelFormat::RGBA4: + color = Color::DecodeRGBA4((const u8*)&value_16bit); + color_values[0] = color[0] / 15.0f; + color_values[1] = color[1] / 15.0f; + color_values[2] = color[2] / 15.0f; + color_values[3] = color[3] / 15.0f; + break; + case PixelFormat::IA8: + case PixelFormat::RG8: + color_values[0] = (value_16bit >> 8) / 255.0f; + color_values[1] = (value_16bit & 0xFF) / 255.0f; + break; + default: + return false; + } + } + + cur_state.color_mask.red_enabled = true; + cur_state.color_mask.green_enabled = true; + cur_state.color_mask.blue_enabled = true; + cur_state.color_mask.alpha_enabled = true; + cur_state.Apply(); + glClearBufferfv(GL_COLOR, 0, color_values); + } else if (dst_type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return false; + } + + GLfloat value_float; + if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) { + value_float = config.value_32bit / 65535.0f; // 2^16 - 1 + } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) { + value_float = config.value_32bit / 16777215.0f; // 2^24 - 1 + } + + cur_state.depth.write_mask = true; + cur_state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (dst_type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); - u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) - * fb_color_texture.width * fb_color_texture.height; + if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return false; + } - u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) - * fb_depth_texture.width * fb_depth_texture.height; + GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (config.value_32bit >> 24); - // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL - if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) - ReloadColorBuffer(); + cur_state.depth.write_mask = true; + cur_state.stencil.write_mask = true; + cur_state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } - if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) - ReloadDepthBuffer(); + cur_state.draw.draw_framebuffer = old_fb; + // TODO: Return scissor test to previous value when scissor test is implemented + cur_state.Apply(); - // Notify cache of flush in case the region touches a cached resource - res_cache.InvalidateInRange(addr, size); + dst_surface->dirty = true; + res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true); + return true; +} + +bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + + CachedSurface src_params; + src_params.addr = framebuffer_addr; + src_params.width = config.width; + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); + + MathUtil::Rectangle<int> src_rect; + CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height, + (float)src_rect.left / (float)scaled_width, + (float)src_rect.bottom / (float)scaled_height, + (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->texture.handle; + + return true; } void RasterizerOpenGL::SamplerInfo::Create() { @@ -567,114 +827,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { if (border_color != config.border_color.raw) { + border_color = config.border_color.raw; auto gl_color = PicaToGL::ColorRGBA8(border_color); glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); } } } -void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) { - GLint internal_format; - - texture.format = format; - texture.width = width; - texture.height = height; - - switch (format) { - case Pica::Regs::ColorFormat::RGBA8: - internal_format = GL_RGBA; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; - break; - - case Pica::Regs::ColorFormat::RGB8: - // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every - // specific OpenGL type used in this function using native-endian (that is, little-endian - // mostly everywhere) for words or half-words. - // TODO: check how those behave on big-endian processors. - internal_format = GL_RGB; - texture.gl_format = GL_BGR; - texture.gl_type = GL_UNSIGNED_BYTE; - break; - - case Pica::Regs::ColorFormat::RGB5A1: - internal_format = GL_RGBA; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1; - break; - - case Pica::Regs::ColorFormat::RGB565: - internal_format = GL_RGB; - texture.gl_format = GL_RGB; - texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; - break; - - case Pica::Regs::ColorFormat::RGBA4: - internal_format = GL_RGBA; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4; - break; - - default: - LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format); - UNIMPLEMENTED(); - break; - } - - state.texture_units[0].texture_2d = texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, - texture.gl_format, texture.gl_type, nullptr); - - state.texture_units[0].texture_2d = 0; - state.Apply(); -} - -void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) { - GLint internal_format; - - texture.format = format; - texture.width = width; - texture.height = height; - - switch (format) { - case Pica::Regs::DepthFormat::D16: - internal_format = GL_DEPTH_COMPONENT16; - texture.gl_format = GL_DEPTH_COMPONENT; - texture.gl_type = GL_UNSIGNED_SHORT; - break; - - case Pica::Regs::DepthFormat::D24: - internal_format = GL_DEPTH_COMPONENT24; - texture.gl_format = GL_DEPTH_COMPONENT; - texture.gl_type = GL_UNSIGNED_INT; - break; - - case Pica::Regs::DepthFormat::D24S8: - internal_format = GL_DEPTH24_STENCIL8; - texture.gl_format = GL_DEPTH_STENCIL; - texture.gl_type = GL_UNSIGNED_INT_24_8; - break; - - default: - LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format); - UNIMPLEMENTED(); - break; - } - - state.texture_units[0].texture_2d = texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, - texture.gl_format, texture.gl_type, nullptr); - - state.texture_units[0].texture_2d = 0; - state.Apply(); -} - void RasterizerOpenGL::SetShader() { PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); @@ -722,6 +881,8 @@ void RasterizerOpenGL::SetShader() { glUniformBlockBinding(current_shader->shader.handle, block_index, 0); // Update uniforms + SyncDepthScale(); + SyncDepthOffset(); SyncAlphaTest(); SyncCombinerColor(); auto& tev_stages = Pica::g_state.regs.GetTevStages(); @@ -730,6 +891,8 @@ void RasterizerOpenGL::SetShader() { SyncGlobalAmbient(); for (int light_index = 0; light_index < 8; light_index++) { + SyncLightSpecular0(light_index); + SyncLightSpecular1(light_index); SyncLightDiffuse(light_index); SyncLightAmbient(light_index); SyncLightPosition(light_index); @@ -737,83 +900,6 @@ void RasterizerOpenGL::SetShader() { } } -void RasterizerOpenGL::SyncFramebuffer() { - const auto& regs = Pica::g_state.regs; - - PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; - - PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; - - bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || - fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight()); - - bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || - fb_size_changed; - - bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || - fb_size_changed; - - bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr || - color_fb_prop_changed; - - bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr || - depth_fb_prop_changed; - - // Commit if framebuffer modified in any way - if (color_fb_modified) - CommitColorBuffer(); - - if (depth_fb_modified) - CommitDepthBuffer(); - - // Reconfigure framebuffer textures if any property has changed - if (color_fb_prop_changed) { - ReconfigureColorTexture(fb_color_texture, new_fb_color_format, - regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); - } - - if (depth_fb_prop_changed) { - ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, - regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); - - // Only attach depth buffer as stencil if it supports stencil - switch (new_fb_depth_format) { - case Pica::Regs::DepthFormat::D16: - case Pica::Regs::DepthFormat::D24: - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - break; - - case Pica::Regs::DepthFormat::D24S8: - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); - break; - - default: - LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format); - UNIMPLEMENTED(); - break; - } - } - - // Load buffer data again if fb modified in any way - if (color_fb_modified) { - cached_fb_color_addr = new_fb_color_addr; - - ReloadColorBuffer(); - } - - if (depth_fb_modified) { - cached_fb_depth_addr = new_fb_depth_addr; - - ReloadDepthBuffer(); - } - - GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); - ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE, - "OpenGL rasterizer framebuffer setup failed, status %X", status); -} - void RasterizerOpenGL::SyncCullMode() { const auto& regs = Pica::g_state.regs; @@ -839,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() { } } -void RasterizerOpenGL::SyncDepthModifiers() { - float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; +void RasterizerOpenGL::SyncDepthScale() { + float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} - // TODO: Implement scale modifier - uniform_block_data.data.depth_offset = depth_offset; - uniform_block_data.dirty = true; +void RasterizerOpenGL::SyncDepthOffset() { + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } } void RasterizerOpenGL::SyncBlendEnabled() { @@ -854,6 +947,8 @@ void RasterizerOpenGL::SyncBlendEnabled() { void RasterizerOpenGL::SyncBlendFuncs() { const auto& regs = Pica::g_state.regs; + state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); + state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); @@ -880,13 +975,39 @@ void RasterizerOpenGL::SyncLogicOp() { state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); } +void RasterizerOpenGL::SyncColorWriteMask() { + const auto& regs = Pica::g_state.regs; + + auto IsColorWriteEnabled = [&](u32 value) { + return (regs.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE : GL_FALSE; + }; + + state.color_mask.red_enabled = IsColorWriteEnabled(regs.output_merger.red_enable); + state.color_mask.green_enabled = IsColorWriteEnabled(regs.output_merger.green_enable); + state.color_mask.blue_enabled = IsColorWriteEnabled(regs.output_merger.blue_enable); + state.color_mask.alpha_enabled = IsColorWriteEnabled(regs.output_merger.alpha_enable); +} + +void RasterizerOpenGL::SyncStencilWriteMask() { + const auto& regs = Pica::g_state.regs; + state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) + ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerOpenGL::SyncDepthWriteMask() { + const auto& regs = Pica::g_state.regs; + state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) + ? GL_TRUE + : GL_FALSE; +} + void RasterizerOpenGL::SyncStencilTest() { const auto& regs = Pica::g_state.regs; state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; - state.stencil.write_mask = regs.output_merger.stencil_test.write_mask; state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); @@ -898,11 +1019,6 @@ void RasterizerOpenGL::SyncDepthTest() { regs.output_merger.depth_write_enable == 1; state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; - state.color_mask.red_enabled = regs.output_merger.red_enable; - state.color_mask.green_enabled = regs.output_merger.green_enable; - state.color_mask.blue_enabled = regs.output_merger.blue_enable; - state.color_mask.alpha_enabled = regs.output_merger.alpha_enable; - state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; } void RasterizerOpenGL::SyncCombinerColor() { @@ -989,229 +1105,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { uniform_block_data.dirty = true; } } - -void RasterizerOpenGL::SyncDrawState() { - const auto& regs = Pica::g_state.regs; - - // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; - - // OpenGL uses different y coordinates, so negate corner offset and flip origin - // TODO: Ensure viewport_corner.x should not be negated or origin flipped - // TODO: Use floating-point viewports for accuracy if supported - glViewport((GLsizei)regs.viewport_corner.x, - (GLsizei)regs.viewport_corner.y, - viewport_width, viewport_height); - - // Sync bound texture(s), upload if not cached - const auto pica_textures = regs.GetTextures(); - for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { - const auto& texture = pica_textures[texture_index]; - - if (texture.enabled) { - texture_samplers[texture_index].SyncWithConfig(texture.config); - res_cache.LoadAndBindTexture(state, texture_index, texture); - } else { - state.texture_units[texture_index].texture_2d = 0; - } - } - - state.draw.uniform_buffer = uniform_buffer.handle; - state.Apply(); -} - -MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); - -void RasterizerOpenGL::ReloadColorBuffer() { - u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); - - if (color_buffer == nullptr) - return; - - MICROPROFILE_SCOPE(OpenGL_FramebufferReload); - - u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); - - std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. - for (int y = 0; y < fb_color_texture.height; ++y) { - for (int x = 0; x < fb_color_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel; - - u8* pixel = color_buffer + dst_offset; - memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel); - } - } - - state.texture_units[0].texture_2d = fb_color_texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height, - fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get()); - - state.texture_units[0].texture_2d = 0; - state.Apply(); -} - -void RasterizerOpenGL::ReloadDepthBuffer() { - if (cached_fb_depth_addr == 0) - return; - - // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); - - if (depth_buffer == nullptr) - return; - - MICROPROFILE_SCOPE(OpenGL_FramebufferReload); - - u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); - - // OpenGL needs 4 bpp alignment for D24 - u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; - - std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); - - u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get(); - - if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); - - u8* pixel = depth_buffer + dst_offset; - u32 depth_stencil = *(u32*)pixel; - ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24); - } - } - } else { - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; - - u8* pixel = depth_buffer + dst_offset; - memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel); - } - } - } - - state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { - // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer. - // The bug has been reported to Intel (https://communities.intel.com/message/324464) - glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0, - GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get()); - } else { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, - fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); - } - - state.texture_units[0].texture_2d = 0; - state.Apply(); -} - -Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); -MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); - -void RasterizerOpenGL::CommitColorBuffer() { - if (cached_fb_color_addr != 0) { - u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); - - if (color_buffer != nullptr) { - Common::Profiling::ScopeTimer timer(buffer_commit_category); - MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); - - u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); - - std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); - - state.texture_units[0].texture_2d = fb_color_texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get()); - - state.texture_units[0].texture_2d = 0; - state.Apply(); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. - for (int y = 0; y < fb_color_texture.height; ++y) { - for (int x = 0; x < fb_color_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel; - - u8* pixel = color_buffer + dst_offset; - memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel); - } - } - } - } -} - -void RasterizerOpenGL::CommitDepthBuffer() { - if (cached_fb_depth_addr != 0) { - // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. - u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); - - if (depth_buffer != nullptr) { - Common::Profiling::ScopeTimer timer(buffer_commit_category); - MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); - - u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); - - // OpenGL needs 4 bpp alignment for D24 - u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; - - std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); - - state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get()); - - state.texture_units[0].texture_2d = 0; - state.Apply(); - - u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get(); - - if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); - - u8* pixel = depth_buffer + dst_offset; - u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index]; - *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24); - } - } - } else { - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; - - u8* pixel = depth_buffer + dst_offset; - memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel); - } - } - } - } - } -} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index fc85aa3ff..d70369400 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -4,22 +4,33 @@ #pragma once +#include <array> #include <cstddef> #include <cstring> #include <memory> #include <vector> #include <unordered_map> +#include <glad/glad.h> + +#include "common/bit_field.h" #include "common/common_types.h" #include "common/hash.h" +#include "common/vector_math.h" + +#include "core/hw/gpu.h" #include "video_core/pica.h" #include "video_core/pica_state.h" +#include "video_core/pica_types.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/pica_to_gl.h" -#include "video_core/shader/shader_interpreter.h" +#include "video_core/shader/shader.h" + +struct ScreenInfo; /** * This struct contains all state used to generate the GLSL shader program that emulates the current @@ -28,158 +39,185 @@ * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) * two separate shaders sharing the same key. + * + * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." + * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." + * = Bytewise copy instead of memberwise copy. + * This is important because the padding bytes are included in the hash and comparison between objects. */ -struct PicaShaderConfig { +union PicaShaderConfig { + /// Construct a PicaShaderConfig with the current Pica register configuration. static PicaShaderConfig CurrentConfig() { PicaShaderConfig res; + + auto& state = res.state; + std::memset(&state, 0, sizeof(PicaShaderConfig::State)); + const auto& regs = Pica::g_state.regs; - res.alpha_test_func = regs.output_merger.alpha_test.enable ? + state.depthmap_enable = regs.depthmap_enable; + + state.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; - // Copy relevant TevStageConfig fields only. We're doing this manually (instead of calling - // the GetTevStages() function) because BitField explicitly disables copies. - - res.tev_stages[0].sources_raw = regs.tev_stage0.sources_raw; - res.tev_stages[1].sources_raw = regs.tev_stage1.sources_raw; - res.tev_stages[2].sources_raw = regs.tev_stage2.sources_raw; - res.tev_stages[3].sources_raw = regs.tev_stage3.sources_raw; - res.tev_stages[4].sources_raw = regs.tev_stage4.sources_raw; - res.tev_stages[5].sources_raw = regs.tev_stage5.sources_raw; - - res.tev_stages[0].modifiers_raw = regs.tev_stage0.modifiers_raw; - res.tev_stages[1].modifiers_raw = regs.tev_stage1.modifiers_raw; - res.tev_stages[2].modifiers_raw = regs.tev_stage2.modifiers_raw; - res.tev_stages[3].modifiers_raw = regs.tev_stage3.modifiers_raw; - res.tev_stages[4].modifiers_raw = regs.tev_stage4.modifiers_raw; - res.tev_stages[5].modifiers_raw = regs.tev_stage5.modifiers_raw; - - res.tev_stages[0].ops_raw = regs.tev_stage0.ops_raw; - res.tev_stages[1].ops_raw = regs.tev_stage1.ops_raw; - res.tev_stages[2].ops_raw = regs.tev_stage2.ops_raw; - res.tev_stages[3].ops_raw = regs.tev_stage3.ops_raw; - res.tev_stages[4].ops_raw = regs.tev_stage4.ops_raw; - res.tev_stages[5].ops_raw = regs.tev_stage5.ops_raw; - - res.tev_stages[0].scales_raw = regs.tev_stage0.scales_raw; - res.tev_stages[1].scales_raw = regs.tev_stage1.scales_raw; - res.tev_stages[2].scales_raw = regs.tev_stage2.scales_raw; - res.tev_stages[3].scales_raw = regs.tev_stage3.scales_raw; - res.tev_stages[4].scales_raw = regs.tev_stage4.scales_raw; - res.tev_stages[5].scales_raw = regs.tev_stage5.scales_raw; - - res.combiner_buffer_input = + state.texture0_type = regs.texture0.type; + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. + const auto& tev_stages = regs.GetTevStages(); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); + for (size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = tev_stages[i]; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; + } + + state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; // Fragment lighting - res.lighting.enable = !regs.lighting.disable; - res.lighting.src_num = regs.lighting.num_lights + 1; + state.lighting.enable = !regs.lighting.disable; + state.lighting.src_num = regs.lighting.num_lights + 1; - for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { + for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; - res.lighting.light[light_index].num = num; - res.lighting.light[light_index].directional = light.directional != 0; - res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; - res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); - res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); - res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); + state.lighting.light[light_index].num = num; + state.lighting.light[light_index].directional = light.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } - res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; - res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; - res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); - res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - - res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; - res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; - res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); - res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - - res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; - res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; - res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); - res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - - res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; - res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; - res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); - res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - - res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; - res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; - res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); - res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - - res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; - res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; - res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); - res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - - res.lighting.config = regs.lighting.config; - res.lighting.fresnel_selector = regs.lighting.fresnel_selector; - res.lighting.bump_mode = regs.lighting.bump_mode; - res.lighting.bump_selector = regs.lighting.bump_selector; - res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; - res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + state.lighting.config = regs.lighting.config; + state.lighting.fresnel_selector = regs.lighting.fresnel_selector; + state.lighting.bump_mode = regs.lighting.bump_mode; + state.lighting.bump_selector = regs.lighting.bump_selector; + state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; } bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); } bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); } bool operator ==(const PicaShaderConfig& o) const { - return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; + return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; - std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; - u8 combiner_buffer_input = 0; + // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC. + // This makes BitField not TC when used in a union or struct so we have to resort + // to this ugly hack. + // Once that bug is fixed we can use Pica::Regs::TevStageConfig here. + // Doesn't include const_color because we don't sync it, see comment in CurrentConfig() + struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::Regs::TevStageConfig() const noexcept { + Pica::Regs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } + }; - struct { - struct { - unsigned num = 0; - bool directional = false; - bool two_sided_diffuse = false; - bool dist_atten_enable = false; - GLfloat dist_atten_scale = 0.0f; - GLfloat dist_atten_bias = 0.0f; - } light[8]; - - bool enable = false; - unsigned src_num = 0; - Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; - unsigned bump_selector = 0; - bool bump_renorm = false; - bool clamp_highlights = false; - - Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; - Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + struct State { + + Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::TextureConfig::TextureType texture0_type; + std::array<TevStageConfigRaw, 6> tev_stages; + u8 combiner_buffer_input; + + Pica::Regs::DepthBuffering depthmap_enable; struct { - bool enable = false; - bool abs_input = false; - Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; - float scale = 1.0f; - } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + GLfloat dist_atten_scale; + GLfloat dist_atten_bias; + } light[8]; + + bool enable; + unsigned src_num; + Pica::Regs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::Regs::LightingConfig config; + Pica::Regs::LightingFresnelSelector fresnel_selector; + + struct { + bool enable; + bool abs_input; + Pica::Regs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + } state; }; +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) +static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); +#endif namespace std { template <> struct hash<PicaShaderConfig> { size_t operator()(const PicaShaderConfig& k) const { - return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); + return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State)); } }; @@ -191,16 +229,17 @@ public: RasterizerOpenGL(); ~RasterizerOpenGL() override; - void InitObjects() override; - void Reset() override; void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override; - void FlushFramebuffer() override; void NotifyPicaRegisterChanged(u32 id) override; + void FlushAll() override; void FlushRegion(PAddr addr, u32 size) override; - void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; /// OpenGL shader generated for a given Pica register state struct PicaShader { @@ -210,26 +249,6 @@ public: private: - /// Structure used for storing information about color textures - struct TextureInfo { - OGLTexture texture; - GLsizei width; - GLsizei height; - Pica::Regs::ColorFormat format; - GLenum gl_format; - GLenum gl_type; - }; - - /// Structure used for storing information about depth textures - struct DepthTextureInfo { - OGLTexture texture; - GLsizei width; - GLsizei height; - Pica::Regs::DepthFormat format; - GLenum gl_format; - GLenum gl_type; - }; - struct SamplerInfo { using TextureConfig = Pica::Regs::TextureConfig; @@ -265,6 +284,7 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); normquat[0] = v.quat.x.ToFloat32(); normquat[1] = v.quat.y.ToFloat32(); normquat[2] = v.quat.z.ToFloat32(); @@ -285,6 +305,7 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat tex_coord0_w; GLfloat normquat[4]; GLfloat view[3]; }; @@ -303,6 +324,7 @@ private: GLvec4 const_color[6]; GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; + GLfloat depth_scale; GLfloat depth_offset; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; @@ -311,23 +333,17 @@ private: static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); - /// Reconfigure the OpenGL color texture to use the given format and dimensions - void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); - - /// Reconfigure the OpenGL depth texture to use the given format and dimensions - void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height); - /// Sets the OpenGL shader in accordance with the current PICA register state void SetShader(); - /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer - void SyncFramebuffer(); - /// Syncs the cull mode to match the PICA register void SyncCullMode(); - /// Syncs the depth scale and offset to match the PICA registers - void SyncDepthModifiers(); + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); /// Syncs the blend enabled status to match the PICA register void SyncBlendEnabled(); @@ -344,90 +360,70 @@ private: /// Syncs the logic op states to match the PICA register void SyncLogicOp(); + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + /// Syncs the stencil test states to match the PICA register void SyncStencilTest(); /// Syncs the depth test states to match the PICA register void SyncDepthTest(); - /// Syncs the TEV constant color to match the PICA register - void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage); - /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage); + /// Syncs the lighting global ambient color to match the PICA register void SyncGlobalAmbient(); /// Syncs the lighting lookup tables void SyncLightingLUT(unsigned index); - /// Syncs the specified light's diffuse color to match the PICA register - void SyncLightDiffuse(int light_index); - - /// Syncs the specified light's ambient color to match the PICA register - void SyncLightAmbient(int light_index); - - /// Syncs the specified light's position to match the PICA register - void SyncLightPosition(int light_index); - /// Syncs the specified light's specular 0 color to match the PICA register void SyncLightSpecular0(int light_index); /// Syncs the specified light's specular 1 color to match the PICA register void SyncLightSpecular1(int light_index); - /// Syncs the remaining OpenGL drawing state to match the current PICA state - void SyncDrawState(); - - /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture - void ReloadColorBuffer(); + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); - /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture - void ReloadDepthBuffer(); + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); - /** - * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory - * Loads the OpenGL framebuffer textures into temporary buffers - * Then copies into the 3DS framebuffer using proper Morton order - */ - void CommitColorBuffer(); + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); - /** - * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory - * Loads the OpenGL framebuffer textures into temporary buffers - * Then copies into the 3DS framebuffer using proper Morton order - */ - void CommitDepthBuffer(); + OpenGLState state; RasterizerCacheOpenGL res_cache; std::vector<HardwareVertex> vertex_batch; - OpenGLState state; - - PAddr cached_fb_color_addr; - PAddr cached_fb_depth_addr; - - // Hardware rasterizer - std::array<SamplerInfo, 3> texture_samplers; - TextureInfo fb_color_texture; - DepthTextureInfo fb_depth_texture; - std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; const PicaShader* current_shader = nullptr; + bool shader_dirty; struct { UniformData data; bool lut_dirty[6]; bool dirty; - } uniform_block_data; + } uniform_block_data = {}; + std::array<SamplerInfo, 3> texture_samplers; OGLVertexArray vertex_array; OGLBuffer vertex_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; - std::array<OGLTexture, 6> lighting_lut; - std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; + std::array<OGLTexture, 6> lighting_luts; + std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 1323c12e4..7efd0038a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -2,9 +2,19 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <memory> +#include <algorithm> +#include <atomic> +#include <cstring> +#include <iterator> +#include <unordered_set> +#include <utility> +#include <vector> -#include "common/hash.h" +#include <glad/glad.h> + +#include "common/bit_field.h" +#include "common/emu_window.h" +#include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" #include "common/vector_math.h" @@ -12,71 +22,693 @@ #include "core/memory.h" #include "video_core/debug_utils/debug_utils.h" +#include "video_core/pica_state.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +struct FormatTuple { + GLint internal_format; + GLenum format; + GLenum type; +}; + +static const std::array<FormatTuple, 5> fb_format_tuples = {{ + { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8 + { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8 + { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1 + { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565 + { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4 +}}; + +static const std::array<FormatTuple, 4> depth_format_tuples = {{ + { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16 + {}, + { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24 + { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8 +}}; + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + transfer_framebuffers[0].Create(); + transfer_framebuffers[1].Create(); +} RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - InvalidateAll(); + FlushAll(); +} + +static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) { + using PixelFormat = CachedSurface::PixelFormat; + + u8* data_ptrs[2]; + u32 depth_stencil_shifts[2] = {24, 8}; + + if (morton_to_gl) { + std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); + } + + if (pixel_format == PixelFormat::D24S8) { + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + const u32 coarse_y = y & ~7; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + + data_ptrs[morton_to_gl] = morton_data + morton_offset; + data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; + + // Swap depth and stencil value ordering since 3DS does not match OpenGL + u32 depth_stencil; + memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); + depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]); + + memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); + } + } + } else { + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + const u32 coarse_y = y & ~7; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + + data_ptrs[morton_to_gl] = morton_data + morton_offset; + data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; + + memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); + } + } + } +} + +bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { + using SurfaceType = CachedSurface::SurfaceType; + + OpenGLState cur_state = OpenGLState::GetCurState(); + + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components + OpenGLState::ResetTexture(src_tex); + OpenGLState::ResetTexture(dst_tex); + + // Keep track of previous framebuffer bindings + GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer }; + cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; + cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + cur_state.Apply(); + + u32 buffers = 0; + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return false; + } + + if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + return false; + } + + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, + dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, + buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + // Restore previous framebuffer bindings + cur_state.draw.read_framebuffer = old_fbs[0]; + cur_state.draw.draw_framebuffer = old_fbs[1]; + cur_state.Apply(); + + return true; +} + +bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { + using SurfaceType = CachedSurface::SurfaceType; + + if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { + return false; + } + + return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); +} + +static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) { + // Allocate an uninitialized texture of appropriate size and format for the surface + using SurfaceType = CachedSurface::SurfaceType; + + OpenGLState cur_state = OpenGLState::GetCurState(); + + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + SurfaceType type = CachedSurface::GetFormatType(pixel_format); + + FormatTuple tuple; + if (type == SurfaceType::Color) { + ASSERT((size_t)pixel_format < fb_format_tuples.size()); + tuple = fb_format_tuples[(unsigned int)pixel_format]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = (size_t)pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + tuple = depth_format_tuples[tuple_idx]; + } else { + tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; + } + + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, + tuple.format, tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); } -MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); +MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); +CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) { + using PixelFormat = CachedSurface::PixelFormat; + using SurfaceType = CachedSurface::SurfaceType; + + if (params.addr == 0) { + return nullptr; + } + + u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + + // Check for an exact match in existing surfaces + CachedSurface* best_exact_surface = nullptr; + float exact_surface_goodness = -1.f; + + auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); + auto range = surface_cache.equal_range(surface_interval); + for (auto it = range.first; it != range.second; ++it) { + for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { + CachedSurface* surface = it2->get(); + + // Check if the request matches the surface exactly + if (params.addr == surface->addr && + params.width == surface->width && params.height == surface->height && + params.pixel_format == surface->pixel_format) + { + // Make sure optional param-matching criteria are fulfilled + bool tiling_match = (params.is_tiled == surface->is_tiled); + bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + if (!match_res_scale || res_scale_match) { + // Prioritize same-tiling and highest resolution surfaces + float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; + if (match_goodness > exact_surface_goodness || surface->dirty) { + exact_surface_goodness = match_goodness; + best_exact_surface = surface; + } + } + } + } + } + + // Return the best exact surface if found + if (best_exact_surface != nullptr) { + return best_exact_surface; + } + + // No matching surfaces found, so create a new one + u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); + if (texture_src_data == nullptr) { + return nullptr; + } + + MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); + + std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>(); -void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { - const auto cached_texture = texture_cache.find(info.physical_address); + new_surface->addr = params.addr; + new_surface->size = params_size; - if (cached_texture != texture_cache.end()) { - state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; - state.Apply(); + new_surface->texture.Create(); + new_surface->width = params.width; + new_surface->height = params.height; + new_surface->stride = params.stride; + new_surface->res_scale_width = params.res_scale_width; + new_surface->res_scale_height = params.res_scale_height; + + new_surface->is_tiled = params.is_tiled; + new_surface->pixel_format = params.pixel_format; + new_surface->dirty = false; + + if (!load_if_create) { + // Don't load any data; just allocate the surface's texture + AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); } else { - MICROPROFILE_SCOPE(OpenGL_TextureUpload); + // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game + + Memory::RasterizerFlushRegion(params.addr, params_size); + + // Load data from memory to the new surface + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = new_surface->texture.handle; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride); + if (!new_surface->is_tiled) { + // TODO: Ensure this will always be a color format, not a depth or other format + ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); + const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; + + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, + tuple.format, tuple.type, texture_src_data); + } else { + SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); + if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { + FormatTuple tuple; + if ((size_t)params.pixel_format < fb_format_tuples.size()) { + tuple = fb_format_tuples[(unsigned int)params.pixel_format]; + } else { + // Texture + tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; + } + + std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); - std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); + Pica::DebugUtils::TextureInfo tex_info; + tex_info.width = params.width; + tex_info.height = params.height; + tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; + tex_info.physical_address = params.addr; - new_texture->texture.Create(); - state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; - state.Apply(); - glActiveTexture(GL_TEXTURE0 + texture_unit); + for (unsigned y = 0; y < params.height; ++y) { + for (unsigned x = 0; x < params.width; ++x) { + tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info); + } + } - u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); + } else { + // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format + size_t tuple_idx = (size_t)params.pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - new_texture->width = info.width; - new_texture->height = info.height; - new_texture->size = info.stride * info.height; - new_texture->addr = info.physical_address; - new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); + u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; - std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + bool use_4bpp = (params.pixel_format == PixelFormat::D24); - for (int y = 0; y < info.height; ++y) { - for (int x = 0; x < info.width; ++x) { - temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); + u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; + + std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel); + + u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); + + MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true); + + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, + tuple.format, tuple.type, temp_fb_depth_buffer.data()); } } + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface + if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { + OGLTexture scaled_texture; + scaled_texture.Create(); + + AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); + BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format), + MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), + MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight())); + + new_surface->texture.Release(); + new_surface->texture.handle = scaled_texture.handle; + scaled_texture.handle = 0; + cur_state.texture_units[0].texture_2d = new_surface->texture.handle; + cur_state.Apply(); + } - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - texture_cache.emplace(info.physical_address, std::move(new_texture)); + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); } + + Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); + surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface }))); + return new_surface.get(); } -void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { - // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound - auto cache_upper_bound = texture_cache.upper_bound(addr + size); +CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) { + if (params.addr == 0) { + return nullptr; + } + + u32 total_pixels = params.width * params.height; + u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8; - for (auto it = texture_cache.begin(); it != cache_upper_bound;) { - const auto& info = *it->second; + // Attempt to find encompassing surfaces + CachedSurface* best_subrect_surface = nullptr; + float subrect_surface_goodness = -1.f; - // Flush the texture only if the memory region intersects and a change is detected - if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && - (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { + auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); + auto cache_upper_bound = surface_cache.upper_bound(surface_interval); + for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { + for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { + CachedSurface* surface = it2->get(); - it = texture_cache.erase(it); + // Check if the request is contained in the surface + if (params.addr >= surface->addr && + params.addr + params_size - 1 <= surface->addr + surface->size - 1 && + params.pixel_format == surface->pixel_format) + { + // Make sure optional param-matching criteria are fulfilled + bool tiling_match = (params.is_tiled == surface->is_tiled); + bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + if (!match_res_scale || res_scale_match) { + // Prioritize same-tiling and highest resolution surfaces + float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; + if (match_goodness > subrect_surface_goodness || surface->dirty) { + subrect_surface_goodness = match_goodness; + best_subrect_surface = surface; + } + } + } + } + } + + // Return the best subrect surface if found + if (best_subrect_surface != nullptr) { + unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); + + int x0, y0; + + if (!params.is_tiled) { + u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel; + x0 = begin_pixel_index % best_subrect_surface->width; + y0 = begin_pixel_index / best_subrect_surface->width; + + out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height); + } else { + u32 bytes_per_tile = 8 * 8 * bytes_per_pixel; + u32 tiles_per_row = best_subrect_surface->width / 8; + + u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile; + x0 = begin_tile_index % tiles_per_row * 8; + y0 = begin_tile_index / tiles_per_row * 8; + + // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. + out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height)); + } + + out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); + out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width); + out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height); + out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height); + + return best_subrect_surface; + } + + // No subrect found - create and return a new surface + if (!params.is_tiled) { + out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height)); + } else { + out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0); + } + + return GetSurface(params, match_res_scale, load_if_create); +} + +CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) { + Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + + CachedSurface params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format); + return GetSurface(params, false, true); +} + +std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { + const auto& regs = Pica::g_state.regs; + + // Make sur that framebuffers don't overlap if both color and depth are being used + u32 fb_area = config.GetWidth() * config.GetHeight(); + bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && + config.GetDepthBufferPhysicalAddress() != 0 && + MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), + config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); + bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; + bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap); + + if (framebuffers_overlap && using_color_fb && using_depth_fb) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + // get color and depth surfaces + CachedSurface color_params; + CachedSurface depth_params; + color_params.width = depth_params.width = config.GetWidth(); + color_params.height = depth_params.height = config.GetHeight(); + color_params.is_tiled = depth_params.is_tiled = true; + if (VideoCore::g_scaled_resolution_enabled) { + auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); + + // Assume same scaling factor for top and bottom screens + color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; + color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; + } + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); + + MathUtil::Rectangle<int> color_rect; + CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; + + MathUtil::Rectangle<int> depth_rect; + CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; + + // Sanity check to make sure found surfaces aren't the same + if (using_depth_fb && using_color_fb && color_surface == depth_surface) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); + using_depth_fb = false; + depth_surface = nullptr; + } + + MathUtil::Rectangle<int> rect; + + if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { + // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match + if (color_rect.left != 0 || color_rect.top != 0) { + color_surface = GetSurface(color_params, true, true); + } + + if (depth_rect.left != 0 || depth_rect.top != 0) { + depth_surface = GetSurface(depth_params, true, true); + } + + if (!color_surface->is_tiled) { + rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height)); } else { - ++it; + rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0); } + } else if (color_surface != nullptr) { + rect = color_rect; + } else if (depth_surface != nullptr) { + rect = depth_rect; + } else { + rect = MathUtil::Rectangle<int>(0, 0, 0, 0); } + + return std::make_tuple(color_surface, depth_surface, rect); } -void RasterizerCacheOpenGL::InvalidateAll() { - texture_cache.clear(); +CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); + auto range = surface_cache.equal_range(surface_interval); + for (auto it = range.first; it != range.second; ++it) { + for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { + int bits_per_value = 0; + if (config.fill_24bit) { + bits_per_value = 24; + } else if (config.fill_32bit) { + bits_per_value = 32; + } else { + bits_per_value = 16; + } + + CachedSurface* surface = it2->get(); + + if (surface->addr == config.GetStartAddress() && + CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && + (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress())) + { + return surface; + } + } + } + + return nullptr; +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); +void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { + using PixelFormat = CachedSurface::PixelFormat; + using SurfaceType = CachedSurface::SurfaceType; + + if (!surface->dirty) { + return; + } + + MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); + + u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); + if (dst_buffer == nullptr) { + return; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_tex = cur_state.texture_units[0].texture_2d; + + OGLTexture unscaled_tex; + GLuint texture_to_flush = surface->texture.handle; + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { + unscaled_tex.Create(); + + AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height); + BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format), + MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), + MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); + + texture_to_flush = unscaled_tex.handle; + } + + cur_state.texture_units[0].texture_2d = texture_to_flush; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride); + if (!surface->is_tiled) { + // TODO: Ensure this will always be a color format, not a depth or other format + ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); + const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; + + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); + } else { + SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); + if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { + ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); + const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; + + u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; + + std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); + + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); + + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. + MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false); + } else { + // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format + size_t tuple_idx = (size_t)surface->pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + const FormatTuple& tuple = depth_format_tuples[tuple_idx]; + + u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; + + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + bool use_4bpp = (surface->pixel_format == PixelFormat::D24); + + u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; + + std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); + + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); + + u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); + + MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false); + } + } + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + + surface->dirty = false; + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) { + if (size == 0) { + return; + } + + // Gather up unique surfaces that touch the region + std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces; + + auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); + auto cache_upper_bound = surface_cache.upper_bound(surface_interval); + for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { + std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()), + [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); }); + } + + // Flush and invalidate surfaces + for (auto surface : touching_surfaces) { + FlushSurface(surface.get()); + if (invalidate) { + Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); + surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface }))); + } + } +} + +void RasterizerCacheOpenGL::FlushAll() { + for (auto& surfaces : surface_cache) { + for (auto& surface : surfaces.second) { + FlushSurface(surface.get()); + } + } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index b69651427..225596415 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -4,40 +4,219 @@ #pragma once -#include <map> +#include <array> #include <memory> +#include <set> +#include <tuple> + +#include <boost/icl/interval_map.hpp> +#include <glad/glad.h> + +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +#include "core/hw/gpu.h" #include "video_core/pica.h" -#include "video_core/debug_utils/debug_utils.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" + +namespace MathUtil { +template <class T> struct Rectangle; +} + +struct CachedSurface; + +using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>; + +struct CachedSurface { + enum class PixelFormat { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + + // Depth buffer-only formats + D16 = 14, + // gap + D24 = 16, + D24S8 = 17, + + Invalid = 255, + }; + + enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Invalid = 4, + }; + + static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { + static const std::array<unsigned int, 18> bpp_table = { + 32, // RGBA8 + 24, // RGB8 + 16, // RGB5A1 + 16, // RGB565 + 16, // RGBA4 + 16, // IA8 + 16, // RG8 + 8, // I8 + 8, // A8 + 8, // IA4 + 4, // I4 + 4, // A4 + 4, // ETC1 + 8, // ETC1A4 + 16, // D16 + 0, + 24, // D24 + 32, // D24S8 + }; + + ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); + return bpp_table[(unsigned int)format]; + } + + static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) { + return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) { + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { + return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { + switch (format) { + // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat + case GPU::Regs::PixelFormat::RGB565: + return PixelFormat::RGB565; + case GPU::Regs::PixelFormat::RGB5A1: + return PixelFormat::RGB5A1; + default: + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + } + + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { + SurfaceType a_type = GetFormatType(pixel_format_a); + SurfaceType b_type = GetFormatType(pixel_format_b); + + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + return true; + } + + if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { + return true; + } + + if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { + return true; + } + + return false; + } + + static SurfaceType GetFormatType(PixelFormat pixel_format) { + if ((unsigned int)pixel_format < 5) { + return SurfaceType::Color; + } + + if ((unsigned int)pixel_format < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; + } + + u32 GetScaledWidth() const { + return (u32)(width * res_scale_width); + } + + u32 GetScaledHeight() const { + return (u32)(height * res_scale_height); + } + + PAddr addr; + u32 size; + + PAddr min_valid; + PAddr max_valid; + + OGLTexture texture; + u32 width; + u32 height; + u32 stride = 0; + float res_scale_width = 1.f; + float res_scale_height = 1.f; + + bool is_tiled; + PixelFormat pixel_format; + bool dirty; +}; class RasterizerCacheOpenGL : NonCopyable { public: + RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); + /// Blits one texture to another + bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect); + + /// Attempt to blit one surface's texture to another + bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); + /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); + CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create); - void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { - LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); - } + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached) + CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect); - /// Invalidate any cached resource intersecting the specified region. - void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); + /// Gets a surface based on the texture configuration + CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); - /// Invalidate all cached OpenGL resources tracked by this cache manager - void InvalidateAll(); + /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration + std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config); -private: - struct CachedTexture { - OGLTexture texture; - GLuint width; - GLuint height; - u32 size; - u64 hash; - PAddr addr; - }; + /// Attempt to get a surface that exactly matches the fill region and format + CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); + + /// Write the surface back to memory + void FlushSurface(CachedSurface* surface); - std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; + /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache + void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); + + /// Flush all cached resources tracked by this cache manager + void FlushAll(); + +private: + SurfaceCache surface_cache; + OGLFramebuffer transfer_framebuffers[2]; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ee4b54ab9..71d60e69c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -2,9 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <cstddef> + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/logging/log.h" + #include "video_core/pica.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_util.h" using Pica::Regs; using TevStageConfig = Regs::TevStageConfig; @@ -24,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { } /// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, TevStageConfig::Source source, +static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, const std::string& index_name) { + const auto& state = config.state; using Source = TevStageConfig::Source; switch (source) { case Source::PrimaryColor: @@ -38,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "secondary_fragment_color"; break; case Source::Texture0: - out += "texture(tex[0], texcoord[0])"; + // Only unit 0 respects the texturing type (according to 3DBrew) + switch(state.texture0_type) { + case Pica::Regs::TextureConfig::Texture2D: + out += "texture(tex[0], texcoord[0])"; + break; + case Pica::Regs::TextureConfig::Projection2D: + out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; + break; + default: + out += "texture(tex[0], texcoord[0])"; + LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); + UNIMPLEMENTED(); + break; + } break; case Source::Texture1: out += "texture(tex[1], texcoord[1])"; @@ -63,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, } /// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, +static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using ColorModifier = TevStageConfig::ColorModifier; switch (modifier) { case ColorModifier::SourceColor: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::OneMinusSourceColor: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::OneMinusSourceAlpha: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::OneMinusSourceRed: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::OneMinusSourceGreen: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; case ColorModifier::OneMinusSourceBlue: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; default: @@ -120,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier } /// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, +static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using AlphaModifier = TevStageConfig::AlphaModifier; switch (modifier) { case AlphaModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::OneMinusSourceAlpha: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::OneMinusSourceRed: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::OneMinusSourceGreen: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; case AlphaModifier::OneMinusSourceBlue: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; default: @@ -198,6 +220,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper case Operation::AddThenMultiply: out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; break; + case Operation::Dot3_RGB: + out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; + break; default: out += "vec3(0.0)"; LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); @@ -276,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { - auto& stage = config.tev_stages[index]; + const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { std::string index_name = std::to_string(index); out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; - AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); + AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); + AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); + AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); out += ");\n"; out += "vec3 color_output_" + index_name + " = "; @@ -293,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += ";\n"; out += "float alpha_results_" + index_name + "[3] = float[3]("; - AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); out += ");\n"; out += "float alpha_output_" + index_name + " = "; @@ -320,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + const auto& lighting = config.state.lighting; + // Define lighting globals out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" @@ -327,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "vec3 refl_value = vec3(0.0);\n"; // Compute fragment normals - if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture - std::string bump_selector = std::to_string(config.lighting.bump_selector); + std::string bump_selector = std::to_string(lighting.bump_selector); out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result - if (config.lighting.bump_renorm) { + if (lighting.bump_renorm) { std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } - } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); UNIMPLEMENTED(); @@ -350,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) { const std::string half_angle = "normalize(normalize(view) + light_vector)"; std::string index; switch (input) { @@ -378,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) - index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; } else { // LUT index is in the range of (-1.0, 1.0) @@ -396,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { }; // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { - const auto& light_config = config.lighting.light[light_index]; + for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.light[light_index]; std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; // Compute light vector (directional or positional) @@ -421,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // If enabled, clamp specular component if lighting result is negative - std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; // Specular 0 component std::string d0_lut_value = "1.0"; - if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; } // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; } // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -461,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Specular 1 component std::string d1_lut_value = "1.0"; - if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; // Fresnel - if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; // Enabled for difffuse lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "diffuse_sum.a *= " + value + ";\n"; // Enabled for the specular lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "specular_sum.a *= " + value + ";\n"; } @@ -499,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } std::string GenerateFragmentShader(const PicaShaderConfig& config) { + const auto& state = config.state; + std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 @@ -508,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { in vec4 primary_color; in vec2 texcoord[3]; +in float texcoord0_w; in vec4 normquat; in vec3 view; @@ -525,6 +555,7 @@ layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; + float depth_scale; float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; @@ -544,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (config.alpha_test_func == Regs::CompareFunc::Never) { + if (state.alpha_test_func == Regs::CompareFunc::Never) { out += "discard; }"; return out; } - if (config.lighting.enable) + if (state.lighting.enable) WriteLighting(out, config); out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; - for (size_t index = 0; index < config.tev_stages.size(); ++index) + for (size_t index = 0; index < state.tev_stages.size(); ++index) WriteTevStage(out, config, (unsigned)index); - if (config.alpha_test_func != Regs::CompareFunc::Always) { + if (state.alpha_test_func != Regs::CompareFunc::Always) { out += "if ("; - AppendAlphaTestCondition(out, config.alpha_test_func); + AppendAlphaTestCondition(out, state.alpha_test_func); out += ") discard;\n"; } out += "color = last_tex_env_out;\n"; - out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; + + out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + out += "gl_FragDepth = depth;\n"; + + out += "}"; return out; } @@ -574,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0); std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out float texcoord0_w; out vec4 normquat; out vec3 view; @@ -593,6 +634,7 @@ void main() { texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; + texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0ca9d2879..bef3249cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -6,7 +6,7 @@ #include <string> -#include "video_core/renderer_opengl/gl_rasterizer.h" +union PicaShaderConfig; namespace GLShader { diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index e3f7a5868..dded3db46 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -2,9 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> #include <vector> +#include <glad/glad.h> + #include "common/logging/log.h" #include "video_core/renderer_opengl/gl_shader_util.h" diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 097242f6f..f59912f79 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,7 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW, }; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 08e4d0b54..fa141fc9a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,7 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "video_core/pica.h" +#include <glad/glad.h> + +#include "common/common_funcs.h" +#include "common/logging/log.h" + #include "video_core/renderer_opengl/gl_state.h" OpenGLState OpenGLState::cur_state; @@ -32,6 +36,8 @@ OpenGLState::OpenGLState() { stencil.action_stencil_fail = GL_KEEP; blend.enabled = false; + blend.rgb_equation = GL_FUNC_ADD; + blend.a_equation = GL_FUNC_ADD; blend.src_rgb_func = GL_ONE; blend.dst_rgb_func = GL_ZERO; blend.src_a_func = GL_ONE; @@ -48,17 +54,19 @@ OpenGLState::OpenGLState() { texture_unit.sampler = 0; } - for (auto& lut : lighting_lut) { + for (auto& lut : lighting_luts) { lut.texture_1d = 0; } - draw.framebuffer = 0; + draw.read_framebuffer = 0; + draw.draw_framebuffer = 0; draw.vertex_array = 0; draw.vertex_buffer = 0; + draw.uniform_buffer = 0; draw.shader_program = 0; } -void OpenGLState::Apply() { +void OpenGLState::Apply() const { // Culling if (cull.enabled != cur_state.cull.enabled) { if (cull.enabled) { @@ -159,6 +167,11 @@ void OpenGLState::Apply() { blend.src_a_func, blend.dst_a_func); } + if (blend.rgb_equation != cur_state.blend.rgb_equation || + blend.a_equation != cur_state.blend.a_equation) { + glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); + } + if (logic_op != cur_state.logic_op) { glLogicOp(logic_op); } @@ -175,16 +188,19 @@ void OpenGLState::Apply() { } // Lighting LUTs - for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { - if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { + for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) { + if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) { glActiveTexture(GL_TEXTURE3 + i); - glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); + glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d); } } // Framebuffer - if (draw.framebuffer != cur_state.draw.framebuffer) { - glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); + if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); + } + if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); } // Vertex array @@ -210,45 +226,58 @@ void OpenGLState::Apply() { cur_state = *this; } -void OpenGLState::ResetTexture(GLuint id) { +GLenum OpenGLState::CheckFBStatus(GLenum target) { + GLenum fb_status = glCheckFramebufferStatus(target); + if (fb_status != GL_FRAMEBUFFER_COMPLETE) { + const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); + LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status); + } + + return fb_status; +} + +void OpenGLState::ResetTexture(GLuint handle) { for (auto& unit : cur_state.texture_units) { - if (unit.texture_2d == id) { + if (unit.texture_2d == handle) { unit.texture_2d = 0; } } } -void OpenGLState::ResetSampler(GLuint id) { +void OpenGLState::ResetSampler(GLuint handle) { for (auto& unit : cur_state.texture_units) { - if (unit.sampler == id) { + if (unit.sampler == handle) { unit.sampler = 0; } } } -void OpenGLState::ResetProgram(GLuint id) { - if (cur_state.draw.shader_program == id) { +void OpenGLState::ResetProgram(GLuint handle) { + if (cur_state.draw.shader_program == handle) { cur_state.draw.shader_program = 0; } } -void OpenGLState::ResetBuffer(GLuint id) { - if (cur_state.draw.vertex_buffer == id) { +void OpenGLState::ResetBuffer(GLuint handle) { + if (cur_state.draw.vertex_buffer == handle) { cur_state.draw.vertex_buffer = 0; } - if (cur_state.draw.uniform_buffer == id) { + if (cur_state.draw.uniform_buffer == handle) { cur_state.draw.uniform_buffer = 0; } } -void OpenGLState::ResetVertexArray(GLuint id) { - if (cur_state.draw.vertex_array == id) { +void OpenGLState::ResetVertexArray(GLuint handle) { + if (cur_state.draw.vertex_array == handle) { cur_state.draw.vertex_array = 0; } } -void OpenGLState::ResetFramebuffer(GLuint id) { - if (cur_state.draw.framebuffer == id) { - cur_state.draw.framebuffer = 0; +void OpenGLState::ResetFramebuffer(GLuint handle) { + if (cur_state.draw.read_framebuffer == handle) { + cur_state.draw.read_framebuffer = 0; + } + if (cur_state.draw.draw_framebuffer == handle) { + cur_state.draw.draw_framebuffer = 0; } } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index e848058d7..228727054 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -40,6 +40,8 @@ public: struct { bool enabled; // GL_BLEND + GLenum rgb_equation; // GL_BLEND_EQUATION_RGB + GLenum a_equation; // GL_BLEND_EQUATION_ALPHA GLenum src_rgb_func; // GL_BLEND_SRC_RGB GLenum dst_rgb_func; // GL_BLEND_DST_RGB GLenum src_a_func; // GL_BLEND_SRC_ALPHA @@ -63,15 +65,15 @@ public: struct { GLuint texture_1d; // GL_TEXTURE_BINDING_1D - } lighting_lut[6]; + } lighting_luts[6]; struct { - GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING + GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING + GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM - bool shader_dirty; } draw; OpenGLState(); @@ -82,14 +84,18 @@ public: } /// Apply this state as the current OpenGL state - void Apply(); - - static void ResetTexture(GLuint id); - static void ResetSampler(GLuint id); - static void ResetProgram(GLuint id); - static void ResetBuffer(GLuint id); - static void ResetVertexArray(GLuint id); - static void ResetFramebuffer(GLuint id); + void Apply() const; + + /// Check the status of the current OpenGL read or draw framebuffer configuration + static GLenum CheckFBStatus(GLenum target); + + /// Resets and unbinds any references to the given resource in the current OpenGL state + static void ResetTexture(GLuint handle); + static void ResetSampler(GLuint handle); + static void ResetProgram(GLuint handle); + static void ResetBuffer(GLuint handle); + static void ResetVertexArray(GLuint handle); + static void ResetFramebuffer(GLuint handle); private: static OpenGLState cur_state; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index fd3617d77..6dc2758c5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -4,9 +4,16 @@ #pragma once +#include <array> +#include <cstddef> + #include <glad/glad.h> +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/pica.h" @@ -71,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { return gl_mode; } +inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) { + static const GLenum blend_equation_table[] = { + GL_FUNC_ADD, // BlendEquation::Add + GL_FUNC_SUBTRACT, // BlendEquation::Subtract + GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract + GL_MIN, // BlendEquation::Min + GL_MAX, // BlendEquation::Max + }; + + // Range check table for input + if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation); + UNREACHABLE(); + + return GL_FUNC_ADD; + } + + return blend_equation_table[(unsigned)equation]; +} + inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { static const GLenum blend_func_table[] = { GL_ZERO, // BlendFactor::Zero diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 11c4d0daf..8f424a435 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,23 +5,28 @@ #include <algorithm> #include <cstddef> #include <cstdlib> +#include <memory> + +#include <glad/glad.h> #include "common/assert.h" +#include "common/bit_field.h" #include "common/emu_window.h" #include "common/logging/log.h" #include "common/profiler_reporting.h" +#include "common/synchronized_wrapper.h" -#include "core/memory.h" -#include "core/settings.h" #include "core/hw/gpu.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" +#include "core/memory.h" +#include "core/settings.h" +#include "core/tracer/recorder.h" -#include "video_core/video_core.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" static const char vertex_shader[] = R"( #version 150 core @@ -107,7 +112,7 @@ void RendererOpenGL::SwapBuffers() { OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); - for(int i : {0, 1}) { + for (int i : {0, 1}) { const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 @@ -117,25 +122,25 @@ void RendererOpenGL::SwapBuffers() { LCD::Read(color_fill.raw, lcd_color_addr); if (color_fill.is_enabled) { - LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); + LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); // Resize the texture in case the framebuffer size has changed - textures[i].width = 1; - textures[i].height = 1; + screen_infos[i].texture.width = 1; + screen_infos[i].texture.height = 1; } else { - if (textures[i].width != (GLsizei)framebuffer.width || - textures[i].height != (GLsizei)framebuffer.height || - textures[i].format != framebuffer.color_format) { + if (screen_infos[i].texture.width != (GLsizei)framebuffer.width || + screen_infos[i].texture.height != (GLsizei)framebuffer.height || + screen_infos[i].texture.format != framebuffer.color_format) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. - ConfigureFramebufferTexture(textures[i], framebuffer); + ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer); } - LoadFBToActiveGLTexture(framebuffer, textures[i]); + LoadFBToScreenInfo(framebuffer, screen_infos[i]); // Resize the texture in case the framebuffer size has changed - textures[i].width = framebuffer.width; - textures[i].height = framebuffer.height; + screen_infos[i].texture.width = framebuffer.width; + screen_infos[i].texture.height = framebuffer.height; } } @@ -166,8 +171,8 @@ void RendererOpenGL::SwapBuffers() { /** * Loads framebuffer from emulated memory into the active OpenGL texture. */ -void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, - const TextureInfo& texture) { +void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info) { const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; @@ -177,8 +182,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer_addr, (int)framebuffer.width, (int)framebuffer.height, (int)framebuffer.format); - const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr); - int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); size_t pixel_stride = framebuffer.stride / bpp; @@ -189,24 +192,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - state.texture_units[0].texture_2d = texture.handle; - state.Apply(); + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) { + // Reset the screen info's display texture to its own permanent texture + screen_info.display_texture = screen_info.texture.resource.handle; + screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); - glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); + Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they - // differ from the LCD resolution. - // TODO: Applications could theoretically crash Citra here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, - texture.gl_format, texture.gl_type, framebuffer_data); + const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + state.texture_units[0].texture_2d = screen_info.texture.resource.handle; + state.Apply(); - state.texture_units[0].texture_2d = 0; - state.Apply(); + glActiveTexture(GL_TEXTURE0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); + + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they + // differ from the LCD resolution. + // TODO: Applications could theoretically crash Citra here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, + screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + state.texture_units[0].texture_2d = 0; + state.Apply(); + } } /** @@ -216,7 +229,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& */ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) { - state.texture_units[0].texture_2d = texture.handle; + state.texture_units[0].texture_2d = texture.resource.handle; state.Apply(); glActiveTexture(GL_TEXTURE0); @@ -224,6 +237,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color // Update existing texture glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } /** @@ -233,20 +249,22 @@ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); // Link shaders and get variable locations - program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); - uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); - uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); - attrib_position = glGetAttribLocation(program_id, "vert_position"); - attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); + shader.Create(vertex_shader, fragment_shader); + state.draw.shader_program = shader.handle; + state.Apply(); + uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); + uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); + attrib_position = glGetAttribLocation(shader.handle, "vert_position"); + attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord"); // Generate VBO handle for drawing - glGenBuffers(1, &vertex_buffer_handle); + vertex_buffer.Create(); // Generate VAO - glGenVertexArrays(1, &vertex_array_handle); + vertex_array.Create(); - state.draw.vertex_array = vertex_array_handle; - state.draw.vertex_buffer = vertex_buffer_handle; + state.draw.vertex_array = vertex_array.handle; + state.draw.vertex_buffer = vertex_buffer.handle; state.draw.uniform_buffer = 0; state.Apply(); @@ -258,13 +276,13 @@ void RendererOpenGL::InitOpenGLObjects() { glEnableVertexAttribArray(attrib_tex_coord); // Allocate textures for each screen - for (auto& texture : textures) { - glGenTextures(1, &texture.handle); + for (auto& screen_info : screen_infos) { + screen_info.texture.resource.Create(); // Allocation of storage is deferred until the first frame, when we // know the framebuffer size. - state.texture_units[0].texture_2d = texture.handle; + state.texture_units[0].texture_2d = screen_info.texture.resource.handle; state.Apply(); glActiveTexture(GL_TEXTURE0); @@ -273,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + screen_info.display_texture = screen_info.texture.resource.handle; } state.texture_units[0].texture_2d = 0; @@ -327,30 +347,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, UNIMPLEMENTED(); } - state.texture_units[0].texture_2d = texture.handle; + state.texture_units[0].texture_2d = texture.resource.handle; state.Apply(); glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } /** * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. */ -void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { +void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { + auto& texcoords = screen_info.display_texcoords; + std::array<ScreenRectVertex, 4> vertices = {{ - ScreenRectVertex(x, y, 1.f, 0.f), - ScreenRectVertex(x+w, y, 1.f, 1.f), - ScreenRectVertex(x, y+h, 0.f, 0.f), - ScreenRectVertex(x+w, y+h, 0.f, 1.f), + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y+h, texcoords.top, texcoords.left), + ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right), }}; - state.texture_units[0].texture_2d = texture.handle; + state.texture_units[0].texture_2d = screen_info.display_texture; state.Apply(); glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } /** @@ -362,9 +390,6 @@ void RendererOpenGL::DrawScreens() { glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); - state.draw.shader_program = program_id; - state.Apply(); - // Set projection matrix std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, (float)layout.height); @@ -374,9 +399,9 @@ void RendererOpenGL::DrawScreens() { glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, + DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top, (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); - DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, + DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); m_current_frame++; @@ -448,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, bool RendererOpenGL::Init() { render_window->MakeCurrent(); - // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders - if (!gladLoadGL()) { - LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting..."); - exit(-1); - } - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index fe4d142a5..00e1044ab 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -8,13 +8,34 @@ #include <glad/glad.h> +#include "common/common_types.h" +#include "common/math_util.h" + #include "core/hw/gpu.h" #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" class EmuWindow; +/// Structure used for storing information about the textures for each 3DS screen +struct TextureInfo { + OGLTexture resource; + GLsizei width; + GLsizei height; + GPU::Regs::PixelFormat format; + GLenum gl_format; + GLenum gl_type; +}; + +/// Structure used for storing information about the display target for each 3DS screen +struct ScreenInfo { + GLuint display_texture; + MathUtil::Rectangle<float> display_texcoords; + TextureInfo texture; +}; + class RendererOpenGL : public RendererBase { public: @@ -37,26 +58,16 @@ public: void ShutDown() override; private: - /// Structure used for storing information about the textures for each 3DS screen - struct TextureInfo { - GLuint handle; - GLsizei width; - GLsizei height; - GPU::Regs::PixelFormat format; - GLenum gl_format; - GLenum gl_type; - }; - void InitOpenGLObjects(); void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer); void DrawScreens(); - void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); + void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); - // Loads framebuffer from emulated memory into the active OpenGL texture. - void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, - const TextureInfo& texture); + // Loads framebuffer from emulated memory into the display information structure + void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info); // Fills active OpenGL texture with the given RGB color. void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); @@ -69,10 +80,10 @@ private: OpenGLState state; // OpenGL object IDs - GLuint vertex_array_handle; - GLuint vertex_buffer_handle; - GLuint program_id; - std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively + OGLVertexArray vertex_array; + OGLBuffer vertex_buffer; + OGLShader shader; + std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively // Shader uniform location indices GLuint uniform_modelview_matrix; GLuint uniform_color_texture; |
