diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 74 | ||||
| -rw-r--r-- | src/video_core/renderer_base.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_base.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 125 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 6 |
7 files changed, 226 insertions, 30 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 9176a8dbc..58f2904ce 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -254,6 +254,15 @@ enum class TextureQueryType : u64 { BorderColor = 22, }; +enum class TextureProcessMode : u64 { + None = 0, + LZ = 1, // Unknown, appears to be the same as none. + LB = 2, // Load Bias. + LL = 3, // Load LOD (LevelOfDetail) + LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB + LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL +}; + enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; @@ -424,6 +433,45 @@ union Instruction { } bfe; union { + BitField<48, 3, u64> pred48; + + union { + BitField<20, 20, u64> entry_a; + BitField<39, 5, u64> entry_b; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } imm; + + union { + BitField<20, 14, u64> cb_index; + BitField<34, 5, u64> cb_offset; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> uses_cc; + } hi; + + union { + BitField<20, 14, u64> cb_index; + BitField<34, 5, u64> cb_offset; + BitField<39, 5, u64> entry_a; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } rz; + + union { + BitField<39, 5, u64> entry_a; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } r1; + + union { + BitField<28, 8, u64> entry_a; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> uses_cc; + } r2; + + } lea; + + union { BitField<0, 5, FlowCondition> cond; } flow; @@ -478,6 +526,18 @@ union Instruction { } psetp; union { + BitField<12, 3, u64> pred12; + BitField<15, 1, u64> neg_pred12; + BitField<24, 2, PredOperation> cond; + BitField<29, 3, u64> pred29; + BitField<32, 1, u64> neg_pred29; + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred39; + BitField<44, 1, u64> bf; + BitField<45, 2, PredOperation> op; + } pset; + + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; BitField<43, 1, u64> neg_a; @@ -522,6 +582,7 @@ union Instruction { BitField<28, 1, u64> array; BitField<29, 2, TextureType> texture_type; BitField<31, 4, u64> component_mask; + BitField<55, 3, TextureProcessMode> process_mode; bool IsComponentEnabled(size_t component) const { return ((1ull << component) & component_mask) != 0; @@ -726,6 +787,11 @@ public: ISCADD_C, // Scale and Add ISCADD_R, ISCADD_IMM, + LEA_R1, + LEA_R2, + LEA_RZ, + LEA_IMM, + LEA_HI, POPC_C, POPC_R, POPC_IMM, @@ -784,6 +850,7 @@ public: ISET_C, ISET_IMM, PSETP, + PSET, XMAD_IMM, XMAD_CR, XMAD_RC, @@ -807,6 +874,7 @@ public: IntegerSet, IntegerSetPredicate, PredicateSetPredicate, + PredicateSetRegister, Conversion, Xmad, Unknown, @@ -958,6 +1026,11 @@ private: INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), + INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), + INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), + INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), + INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), + INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), @@ -1012,6 +1085,7 @@ private: INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), + INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index be17a2b9c..0df3725c2 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() { UpdateCurrentFramebufferLayout(); renderer_settings.use_framelimiter = Settings::values.use_frame_limit; + renderer_settings.set_background_color = true; } void RendererBase::UpdateCurrentFramebufferLayout() { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 2a357f9d0..2cd0738ff 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -19,6 +19,7 @@ namespace VideoCore { struct RendererSettings { std::atomic_bool use_framelimiter{false}; + std::atomic_bool set_background_color{false}; }; class RendererBase : NonCopyable { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 29d61eccd..fb56decc0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -53,8 +53,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.unaligned_height = config.tic.Height(); - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTargetFromTextureType(config.tic.texture_type); switch (params.target) { @@ -89,8 +87,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.width = config.width; params.height = config.height; params.unaligned_height = config.height; - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTarget::Texture2D; params.depth = 1; params.size_in_bytes = params.SizeInBytes(); @@ -110,8 +106,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.width = zeta_width; params.height = zeta_height; params.unaligned_height = zeta_height; - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTarget::Texture2D; params.depth = 1; params.size_in_bytes = params.SizeInBytes(); @@ -477,30 +471,27 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // Only pre-create the texture for non-compressed textures. switch (params.target) { case SurfaceParams::SurfaceTarget::Texture1D: - glTexImage1D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), 0, format_tuple.format, format_tuple.type, nullptr); + glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth()); break; case SurfaceParams::SurfaceTarget::Texture2D: - glTexImage2D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight(), 0, format_tuple.format, - format_tuple.type, nullptr); + glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth(), rect.GetHeight()); break; case SurfaceParams::SurfaceTarget::Texture3D: case SurfaceParams::SurfaceTarget::Texture2DArray: - glTexImage3D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight(), params.depth, 0, format_tuple.format, - format_tuple.type, nullptr); + glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth(), rect.GetHeight(), params.depth); break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(params.target)); UNREACHABLE(); - glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, rect.GetWidth(), - rect.GetHeight(), 0, format_tuple.format, format_tuple.type, nullptr); + glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(), + rect.GetHeight()); } } - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); @@ -817,16 +808,20 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; - // If format is unchanged, we can do a faster blit without reinterpreting pixel data - if (params.pixel_format == new_params.pixel_format) { + if (params.pixel_format == new_params.pixel_format || + !Settings::values.use_accurate_framebuffers) { + // If the format is the same, just do a framebuffer blit. This is significantly faster than + // using PBOs. The is also likely less accurate, as textures will be converted rather than + // reinterpreted. + BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, params.GetRect(), params.type, read_framebuffer.handle, draw_framebuffer.handle); - return new_surface; - } + } else { + // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy, + // where pixels are reinterpreted as a new format (without conversion). This code path uses + // OpenGL PBOs and is quite slow. - // When using accurate framebuffers, always copy old data to new surface, regardless of format - if (Settings::values.use_accurate_framebuffers) { auto source_format = GetFormatTuple(params.pixel_format, params.component_type); auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e660998d0..57ea8593b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -680,8 +680,8 @@ struct SurfaceParams { /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { - return std::tie(pixel_format, type, cache_width, cache_height) == - std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); + return std::tie(pixel_format, type, width, height) == + std::tie(other.pixel_format, other.type, other.width, other.height); } VAddr addr; @@ -696,10 +696,6 @@ struct SurfaceParams { u32 unaligned_height; size_t size_in_bytes; SurfaceTarget target; - - // Parameters used for caching only - u32 cache_width; - u32 cache_height; }; }; // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e350113f1..2d56370c7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1505,6 +1505,73 @@ private: 1, 1); break; } + case OpCode::Id::LEA_R2: + case OpCode::Id::LEA_R1: + case OpCode::Id::LEA_IMM: + case OpCode::Id::LEA_RZ: + case OpCode::Id::LEA_HI: { + std::string op_a; + std::string op_b; + std::string op_c; + + switch (opcode->GetId()) { + case OpCode::Id::LEA_R2: { + op_a = regs.GetRegisterAsInteger(instr.gpr20); + op_b = regs.GetRegisterAsInteger(instr.gpr39); + op_c = std::to_string(instr.lea.r2.entry_a); + break; + } + + case OpCode::Id::LEA_R1: { + const bool neg = instr.lea.r1.neg != 0; + op_a = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_a = "-(" + op_a + ')'; + op_b = regs.GetRegisterAsInteger(instr.gpr20); + op_c = std::to_string(instr.lea.r1.entry_a); + break; + } + + case OpCode::Id::LEA_IMM: { + const bool neg = instr.lea.imm.neg != 0; + op_b = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_b = "-(" + op_b + ')'; + op_a = std::to_string(instr.lea.imm.entry_a); + op_c = std::to_string(instr.lea.imm.entry_b); + break; + } + + case OpCode::Id::LEA_RZ: { + const bool neg = instr.lea.rz.neg != 0; + op_b = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_b = "-(" + op_b + ')'; + op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset, + GLSLRegister::Type::Integer); + op_c = std::to_string(instr.lea.rz.entry_a); + + break; + } + + case OpCode::Id::LEA_HI: + default: { + op_b = regs.GetRegisterAsInteger(instr.gpr8); + op_a = std::to_string(instr.lea.imm.entry_a); + op_c = std::to_string(instr.lea.imm.entry_b); + LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) { + LOG_ERROR(HW_GPU, "Unhandled LEA Predicate"); + UNREACHABLE(); + } + const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; + regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1); + + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", opcode->GetName()); @@ -1786,15 +1853,47 @@ private: coord = "vec2 coords = vec2(" + x + ", " + y + ");"; texture_type = Tegra::Shader::TextureType::Texture2D; } + // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias + // or lod. + const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); const std::string sampler = GetSampler(instr.sampler, texture_type, false); // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); ++shader.scope; shader.AddLine(coord); - const std::string texture = "texture(" + sampler + ", coords)"; + std::string texture; + switch (instr.tex.process_mode) { + case Tegra::Shader::TextureProcessMode::None: { + texture = "texture(" + sampler + ", coords)"; + break; + } + case Tegra::Shader::TextureProcessMode::LZ: { + texture = "textureLod(" + sampler + ", coords, 0.0)"; + break; + } + case Tegra::Shader::TextureProcessMode::LB: + case Tegra::Shader::TextureProcessMode::LBA: { + // TODO: Figure if A suffix changes the equation at all. + texture = "texture(" + sampler + ", coords, " + op_c + ')'; + break; + } + case Tegra::Shader::TextureProcessMode::LL: + case Tegra::Shader::TextureProcessMode::LLA: { + // TODO: Figure if A suffix changes the equation at all. + texture = "textureLod(" + sampler + ", coords, " + op_c + ')'; + break; + } + default: { + texture = "texture(" + sampler + ", coords)"; + LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", + static_cast<u32>(instr.tex.process_mode.Value())); + UNREACHABLE(); + } + } size_t dest_elem{}; for (size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { @@ -2087,6 +2186,30 @@ private: } break; } + case OpCode::Type::PredicateSetRegister: { + const std::string op_a = + GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); + const std::string op_b = + GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0); + + const std::string second_pred = + GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0); + + const std::string combiner = GetPredicateCombiner(instr.pset.op); + + const std::string predicate = + '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')'; + const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; + if (instr.pset.bf == 0) { + const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; + regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1); + } else { + const std::string value = '(' + result + ") ? 1.0 : 0.0"; + regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); + } + + break; + } case OpCode::Type::PredicateSetPredicate: { const std::string op_a = GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index ccff3e342..96d916b07 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, * Draws the emulated screens to the emulator window. */ void RendererOpenGL::DrawScreen() { + if (renderer_settings.set_background_color) { + // Update background color before drawing + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, + 0.0f); + } + const auto& layout = render_window.GetFramebufferLayout(); const auto& screen = layout.screen; |
