aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h74
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_base.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp125
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
7 files changed, 226 insertions, 30 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9176a8dbc..58f2904ce 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -254,6 +254,15 @@ enum class TextureQueryType : u64 {
BorderColor = 22,
};
+enum class TextureProcessMode : u64 {
+ None = 0,
+ LZ = 1, // Unknown, appears to be the same as none.
+ LB = 2, // Load Bias.
+ LL = 3, // Load LOD (LevelOfDetail)
+ LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+ LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
+};
+
enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
@@ -424,6 +433,45 @@ union Instruction {
} bfe;
union {
+ BitField<48, 3, u64> pred48;
+
+ union {
+ BitField<20, 20, u64> entry_a;
+ BitField<39, 5, u64> entry_b;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } imm;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> uses_cc;
+ } hi;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } rz;
+
+ union {
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } r1;
+
+ union {
+ BitField<28, 8, u64> entry_a;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> uses_cc;
+ } r2;
+
+ } lea;
+
+ union {
BitField<0, 5, FlowCondition> cond;
} flow;
@@ -478,6 +526,18 @@ union Instruction {
} psetp;
union {
+ BitField<12, 3, u64> pred12;
+ BitField<15, 1, u64> neg_pred12;
+ BitField<24, 2, PredOperation> cond;
+ BitField<29, 3, u64> pred29;
+ BitField<32, 1, u64> neg_pred29;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, PredOperation> op;
+ } pset;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -522,6 +582,7 @@ union Instruction {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
+ BitField<55, 3, TextureProcessMode> process_mode;
bool IsComponentEnabled(size_t component) const {
return ((1ull << component) & component_mask) != 0;
@@ -726,6 +787,11 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
+ LEA_R1,
+ LEA_R2,
+ LEA_RZ,
+ LEA_IMM,
+ LEA_HI,
POPC_C,
POPC_R,
POPC_IMM,
@@ -784,6 +850,7 @@ public:
ISET_C,
ISET_IMM,
PSETP,
+ PSET,
XMAD_IMM,
XMAD_CR,
XMAD_RC,
@@ -807,6 +874,7 @@ public:
IntegerSet,
IntegerSetPredicate,
PredicateSetPredicate,
+ PredicateSetRegister,
Conversion,
Xmad,
Unknown,
@@ -958,6 +1026,11 @@ private:
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+ INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+ INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+ INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+ INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+ INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -1012,6 +1085,7 @@ private:
INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+ INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index be17a2b9c..0df3725c2 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+ renderer_settings.set_background_color = true;
}
void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2a357f9d0..2cd0738ff 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,6 +19,7 @@ namespace VideoCore {
struct RendererSettings {
std::atomic_bool use_framelimiter{false};
+ std::atomic_bool set_background_color{false};
};
class RendererBase : NonCopyable {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 29d61eccd..fb56decc0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -53,8 +53,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
- params.cache_width = Common::AlignUp(params.width, 8);
- params.cache_height = Common::AlignUp(params.height, 8);
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
switch (params.target) {
@@ -89,8 +87,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
- params.cache_width = Common::AlignUp(params.width, 8);
- params.cache_height = Common::AlignUp(params.height, 8);
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
@@ -110,8 +106,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
- params.cache_width = Common::AlignUp(params.width, 8);
- params.cache_height = Common::AlignUp(params.height, 8);
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
@@ -477,30 +471,27 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
// Only pre-create the texture for non-compressed textures.
switch (params.target) {
case SurfaceParams::SurfaceTarget::Texture1D:
- glTexImage1D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
- rect.GetWidth(), 0, format_tuple.format, format_tuple.type, nullptr);
+ glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth());
break;
case SurfaceParams::SurfaceTarget::Texture2D:
- glTexImage2D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
- rect.GetWidth(), rect.GetHeight(), 0, format_tuple.format,
- format_tuple.type, nullptr);
+ glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth(), rect.GetHeight());
break;
case SurfaceParams::SurfaceTarget::Texture3D:
case SurfaceParams::SurfaceTarget::Texture2DArray:
- glTexImage3D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
- rect.GetWidth(), rect.GetHeight(), params.depth, 0, format_tuple.format,
- format_tuple.type, nullptr);
+ glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth(), rect.GetHeight(), params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
- glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, rect.GetWidth(),
- rect.GetHeight(), 0, format_tuple.format, format_tuple.type, nullptr);
+ glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+ rect.GetHeight());
}
}
- glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
@@ -817,16 +808,20 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)};
- // If format is unchanged, we can do a faster blit without reinterpreting pixel data
- if (params.pixel_format == new_params.pixel_format) {
+ if (params.pixel_format == new_params.pixel_format ||
+ !Settings::values.use_accurate_framebuffers) {
+ // If the format is the same, just do a framebuffer blit. This is significantly faster than
+ // using PBOs. The is also likely less accurate, as textures will be converted rather than
+ // reinterpreted.
+
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
params.GetRect(), params.type, read_framebuffer.handle,
draw_framebuffer.handle);
- return new_surface;
- }
+ } else {
+ // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+ // where pixels are reinterpreted as a new format (without conversion). This code path uses
+ // OpenGL PBOs and is quite slow.
- // When using accurate framebuffers, always copy old data to new surface, regardless of format
- if (Settings::values.use_accurate_framebuffers) {
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e660998d0..57ea8593b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -680,8 +680,8 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const {
- return std::tie(pixel_format, type, cache_width, cache_height) ==
- std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+ return std::tie(pixel_format, type, width, height) ==
+ std::tie(other.pixel_format, other.type, other.width, other.height);
}
VAddr addr;
@@ -696,10 +696,6 @@ struct SurfaceParams {
u32 unaligned_height;
size_t size_in_bytes;
SurfaceTarget target;
-
- // Parameters used for caching only
- u32 cache_width;
- u32 cache_height;
};
}; // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e350113f1..2d56370c7 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1505,6 +1505,73 @@ private:
1, 1);
break;
}
+ case OpCode::Id::LEA_R2:
+ case OpCode::Id::LEA_R1:
+ case OpCode::Id::LEA_IMM:
+ case OpCode::Id::LEA_RZ:
+ case OpCode::Id::LEA_HI: {
+ std::string op_a;
+ std::string op_b;
+ std::string op_c;
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::LEA_R2: {
+ op_a = regs.GetRegisterAsInteger(instr.gpr20);
+ op_b = regs.GetRegisterAsInteger(instr.gpr39);
+ op_c = std::to_string(instr.lea.r2.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_R1: {
+ const bool neg = instr.lea.r1.neg != 0;
+ op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_a = "-(" + op_a + ')';
+ op_b = regs.GetRegisterAsInteger(instr.gpr20);
+ op_c = std::to_string(instr.lea.r1.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_IMM: {
+ const bool neg = instr.lea.imm.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ break;
+ }
+
+ case OpCode::Id::LEA_RZ: {
+ const bool neg = instr.lea.rz.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+ GLSLRegister::Type::Integer);
+ op_c = std::to_string(instr.lea.rz.entry_a);
+
+ break;
+ }
+
+ case OpCode::Id::LEA_HI:
+ default: {
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+ LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+ UNREACHABLE();
+ }
+ const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
opcode->GetName());
@@ -1786,15 +1853,47 @@ private:
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
texture_type = Tegra::Shader::TextureType::Texture2D;
}
+ // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+ // or lod.
+ const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
const std::string sampler = GetSampler(instr.sampler, texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
+
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
- const std::string texture = "texture(" + sampler + ", coords)";
+ std::string texture;
+ switch (instr.tex.process_mode) {
+ case Tegra::Shader::TextureProcessMode::None: {
+ texture = "texture(" + sampler + ", coords)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LZ: {
+ texture = "textureLod(" + sampler + ", coords, 0.0)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LB:
+ case Tegra::Shader::TextureProcessMode::LBA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "texture(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LL:
+ case Tegra::Shader::TextureProcessMode::LLA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ default: {
+ texture = "texture(" + sampler + ", coords)";
+ LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+ static_cast<u32>(instr.tex.process_mode.Value()));
+ UNREACHABLE();
+ }
+ }
size_t dest_elem{};
for (size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
@@ -2087,6 +2186,30 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetRegister: {
+ const std::string op_a =
+ GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+ const std::string second_pred =
+ GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+ const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+ const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+ if (instr.pset.bf == 0) {
+ const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+ regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+ } else {
+ const std::string value = '(' + result + ") ? 1.0 : 0.0";
+ regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+ }
+
+ break;
+ }
case OpCode::Type::PredicateSetPredicate: {
const std::string op_a =
GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index ccff3e342..96d916b07 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreen() {
+ if (renderer_settings.set_background_color) {
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+ 0.0f);
+ }
+
const auto& layout = render_window.GetFramebufferLayout();
const auto& screen = layout.screen;