diff options
Diffstat (limited to 'src/video_core')
20 files changed, 760 insertions, 237 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 98ed11ec5..2a3ff234a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { regs.reg_array[method] = value; -#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32)) - switch (method) { case MAXWELL3D_REG_INDEX(code_address.code_address_high): case MAXWELL3D_REG_INDEX(code_address.code_address_low): { @@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { break; } -#undef MAXWELL3D_REG_INDEX + VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr); @@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() { void Maxwell3D::DrawArrays() { LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), regs.vertex_buffer.count); + ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); @@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() { debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); } - VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/); + const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; + VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed); } void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { @@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); - ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, - "TIC versions other than BlockLinear are unimplemented"); + ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || + tic_entry.header_version == Texture::TICHeaderVersion::Pitch, + "TIC versions other than BlockLinear or Pitch are unimplemented"); - ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D, + ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) || + (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap), "Texture types other than Texture2D are unimplemented"); auto r_type = tic_entry.r_type.Value(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1fae41cb2..d4fcedace 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,6 +20,9 @@ namespace Tegra { namespace Engines { +#define MAXWELL3D_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) + class Maxwell3D final { public: explicit Maxwell3D(MemoryManager& memory_manager); @@ -248,6 +251,52 @@ public: Patches = 0xe, }; + enum class IndexFormat : u32 { + UnsignedByte = 0x0, + UnsignedShort = 0x1, + UnsignedInt = 0x2, + }; + + struct Blend { + enum class Equation : u32 { + Add = 1, + Subtract = 2, + ReverseSubtract = 3, + Min = 4, + Max = 5, + }; + + enum class Factor : u32 { + Zero = 0x1, + One = 0x2, + SourceColor = 0x3, + OneMinusSourceColor = 0x4, + SourceAlpha = 0x5, + OneMinusSourceAlpha = 0x6, + DestAlpha = 0x7, + OneMinusDestAlpha = 0x8, + DestColor = 0x9, + OneMinusDestColor = 0xa, + SourceAlphaSaturate = 0xb, + Source1Color = 0x10, + OneMinusSource1Color = 0x11, + Source1Alpha = 0x12, + OneMinusSource1Alpha = 0x13, + ConstantColor = 0x61, + OneMinusConstantColor = 0x62, + ConstantAlpha = 0x63, + OneMinusConstantAlpha = 0x64, + }; + + u32 separate_alpha; + Equation equation_rgb; + Factor factor_source_rgb; + Factor factor_dest_rgb; + Equation equation_a; + Factor factor_source_a; + Factor factor_dest_a; + }; + union { struct { INSERT_PADDING_WORDS(0x200); @@ -270,7 +319,15 @@ public: } } rt[NumRenderTargets]; - INSERT_PADDING_WORDS(0x80); + struct { + f32 scale_x; + f32 scale_y; + f32 scale_z; + u32 translate_x; + u32 translate_y; + u32 translate_z; + INSERT_PADDING_WORDS(2); + } viewport_transform[NumViewports]; struct { union { @@ -375,7 +432,42 @@ public: }; } draw; - INSERT_PADDING_WORDS(0x139); + INSERT_PADDING_WORDS(0x6B); + + struct { + u32 start_addr_high; + u32 start_addr_low; + u32 end_addr_high; + u32 end_addr_low; + IndexFormat format; + u32 first; + u32 count; + + unsigned FormatSizeInBytes() const { + switch (format) { + case IndexFormat::UnsignedByte: + return 1; + case IndexFormat::UnsignedShort: + return 2; + case IndexFormat::UnsignedInt: + return 4; + } + UNREACHABLE(); + } + + GPUVAddr StartAddress() const { + return static_cast<GPUVAddr>( + (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low); + } + + GPUVAddr EndAddress() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | + end_addr_low); + } + } index_array; + + INSERT_PADDING_WORDS(0xC7); + struct { u32 query_address_high; u32 query_address_low; @@ -410,7 +502,9 @@ public: } } vertex_array[NumVertexArrays]; - INSERT_PADDING_WORDS(0x40); + Blend blend; + + INSERT_PADDING_WORDS(0x39); struct { u32 limit_high; @@ -563,6 +657,7 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(rt, 0x200); +ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); ASSERT_REG_POSITION(zeta, 0x3F8); @@ -572,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); +ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(query, 0x6C0); ASSERT_REG_POSITION(vertex_array[0], 0x700); +ASSERT_REG_POSITION(blend, 0x780); ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); ASSERT_REG_POSITION(shader_config[0], 0x800); ASSERT_REG_POSITION(const_buffer, 0x8E0); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index eff0c35a1..7cd125f05 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -4,6 +4,7 @@ #pragma once +#include <cstring> #include <map> #include <string> #include "common/bit_field.h" @@ -12,14 +13,10 @@ namespace Tegra { namespace Shader { struct Register { - Register() = default; + constexpr Register() = default; constexpr Register(u64 value) : value(value) {} - constexpr u64 GetIndex() const { - return value; - } - constexpr operator u64() const { return value; } @@ -43,13 +40,13 @@ struct Register { } private: - u64 value; + u64 value{}; }; union Attribute { Attribute() = default; - constexpr Attribute(u64 value) : value(value) {} + constexpr explicit Attribute(u64 value) : value(value) {} enum class Index : u64 { Position = 7, @@ -68,7 +65,20 @@ union Attribute { } fmt28; BitField<39, 8, u64> reg; - u64 value; + u64 value{}; +}; + +union Sampler { + Sampler() = default; + + constexpr explicit Sampler(u64 value) : value(value) {} + + enum class Index : u64 { + Sampler_0 = 8, + }; + + BitField<36, 13, Index> index; + u64 value{}; }; union Uniform { @@ -80,6 +90,7 @@ union OpCode { enum class Id : u64 { TEXS = 0x6C, IPA = 0xE0, + FMUL32_IMM = 0x1E, FFMA_IMM = 0x65, FFMA_CR = 0x93, FFMA_RC = 0xA3, @@ -132,6 +143,7 @@ union OpCode { switch (op2) { case Id::IPA: + case Id::FMUL32_IMM: return op2; } @@ -225,6 +237,7 @@ union OpCode { info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; + info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"}; info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; info_table[Id::EXIT] = {Type::Trivial, "exit"}; @@ -238,7 +251,7 @@ union OpCode { BitField<55, 9, Id> op3; BitField<52, 12, Id> op4; BitField<51, 13, Id> op5; - u64 value; + u64 value{}; }; static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); @@ -280,6 +293,7 @@ enum class SubOp : u64 { Lg2 = 0x3, Rcp = 0x4, Rsq = 0x5, + Min = 0x8, }; union Instruction { @@ -295,15 +309,33 @@ union Instruction { BitField<20, 8, Register> gpr20; BitField<20, 7, SubOp> sub_op; BitField<28, 8, Register> gpr28; - BitField<36, 13, u64> imm36; BitField<39, 8, Register> gpr39; union { + BitField<20, 19, u64> imm20_19; + BitField<20, 32, u64> imm20_32; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; BitField<50, 1, u64> abs_d; + BitField<56, 1, u64> negate_imm; + + float GetImm20_19() const { + float result{}; + u32 imm{static_cast<u32>(imm20_19)}; + imm <<= 12; + imm |= negate_imm ? 0x80000000 : 0; + std::memcpy(&result, &imm, sizeof(imm)); + return result; + } + + float GetImm20_32() const { + float result{}; + u32 imm{static_cast<u32>(imm20_32)}; + std::memcpy(&result, &imm, sizeof(imm)); + return result; + } } alu; union { @@ -311,11 +343,13 @@ union Instruction { BitField<49, 1, u64> negate_c; } ffma; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; Attribute attribute; Uniform uniform; + Sampler sampler; u64 hex; }; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 71a8661b4..2888daedc 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -15,7 +15,10 @@ namespace Tegra { enum class RenderTargetFormat : u32 { NONE = 0x0, + RGBA16_FLOAT = 0xCA, + RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, + RGBA8_SRGB = 0xD6, }; class DebugContext; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 35d262189..36629dd11 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -19,7 +19,7 @@ public: virtual void DrawArrays() = 0; /// Notify rasterizer that the specified Maxwell register has been changed - virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; + virtual void NotifyMaxwellRegisterChanged(u32 method) = 0; /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 28abc563a..13e2a77ce 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.vertex_buffer = stream_buffer->GetHandle(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); - state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; state.Apply(); @@ -128,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - - if (is_indexed) { - UNREACHABLE(); - } - - // TODO(bunnei): Add support for 1+ vertex arrays - vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; -} - void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; @@ -150,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // TODO(bunnei): Add support for 1+ vertex arrays const auto& vertex_array{regs.vertex_array[0]}; + const auto& vertex_array_limit{regs.vertex_array_limit[0]}; ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { @@ -162,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // to avoid OpenGL errors. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; + NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), + attrib.offset.Value(), attrib.IsNormalized()); + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); @@ -170,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { } // Copy vertex array data - const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; res_cache.FlushRegion(data_addr, data_size, nullptr); Memory::ReadBlock(data_addr, array_ptr, data_size); @@ -333,13 +326,18 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - AnalyzeVertexArray(is_indexed); + const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; + const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; + + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = vertex_num * regs.vertex_array[0].stride; + state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); size_t buffer_size = static_cast<size_t>(vs_input_size); if (is_indexed) { - UNREACHABLE(); + buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; } // Uniform space for the 5 shader stages @@ -354,9 +352,18 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexArray(buffer_ptr, buffer_offset); ptr_pos += vs_input_size; + // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - UNREACHABLE(); + ptr_pos = Common::AlignUp(ptr_pos, 4); + + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const VAddr index_data_addr{ + memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; + Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); + + index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + ptr_pos += index_buffer_size; } SetupShaders(buffer_ptr, buffer_offset, ptr_pos); @@ -366,11 +373,16 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; if (is_indexed) { - UNREACHABLE(); + const GLint index_min{static_cast<GLint>(regs.index_array.first)}; + const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)}; + glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count, + MaxwellToGL::IndexFormat(regs.index_array.format), + reinterpret_cast<const void*>(index_buffer_offset), + -index_min); } else { - glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, - regs.vertex_buffer.count); + glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); } // Disable scissor test @@ -434,7 +446,32 @@ void RasterizerOpenGL::BindTextures() { } } -void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} +void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + switch (method) { + case MAXWELL3D_REG_INDEX(blend.separate_alpha): + ASSERT_MSG(false, "unimplemented"); + break; + case MAXWELL3D_REG_INDEX(blend.equation_rgb): + state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_rgb): + state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb): + state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.equation_a): + state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_a): + state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_a): + state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a); + break; + } +} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); @@ -486,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; - src_params.is_tiled = false; + src_params.is_tiled = true; + src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.component_type = + SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); src_params.UpdateParams(); MathUtil::Rectangle<u32> src_rect; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 548ce0453..9ece415f7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,7 +32,7 @@ public: ~RasterizerOpenGL() override; void DrawArrays() override; - void NotifyMaxwellRegisterChanged(u32 id) override; + void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; @@ -155,7 +155,6 @@ private: GLsizeiptr vs_input_size; - void AnalyzeVertexArray(bool is_indexed); void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 213b20a21..561c6913d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -36,6 +36,7 @@ using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; +using ComponentType = SurfaceParams::ComponentType; struct FormatTuple { GLint internal_format; @@ -47,26 +48,24 @@ struct FormatTuple { u32 compression_factor; }; -static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 +static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 }}; -static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 -}}; - -static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); - if (type == SurfaceType::Color) { - ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size()); - return fb_format_tuples[static_cast<unsigned int>(pixel_format)]; + if (type == SurfaceType::ColorTexture) { + ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); + // For now only UNORM components are supported + ASSERT(component_type == ComponentType::UNorm); + return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats ASSERT_MSG(false, "Unimplemented"); - } else if (type == SurfaceType::Texture) { - ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } UNREACHABLE(); @@ -85,56 +84,42 @@ static u16 GetResolutionScaleFactor() { } template <bool morton_to_gl, PixelFormat format> -static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, + VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - for (u32 y = 0; y < 8; ++y) { - for (u32 x = 0; x < 8; ++x) { - u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; - u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; - if (morton_to_gl) { - std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); - } else { - std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); - } - } - } -} - -template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); -} -template <> -void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base, - VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - auto data = - Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height); - std::memcpy(gl_buffer, data.data(), data.size()); + if (morton_to_gl) { + auto data = Tegra::Texture::UnswizzleTexture( + base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, + block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } else { + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); + } } -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, - MortonCopy<true, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + morton_to_gl_fns = { + MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, }; -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, - MortonCopy<false, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + gl_to_morton_fns = { + MortonCopy<false, PixelFormat::ABGR8>, + MortonCopy<false, PixelFormat::B5G6R5>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + nullptr, + nullptr, + nullptr, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -183,7 +168,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec u32 buffers = 0; - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -311,15 +296,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == - std::tie(addr, width, height, stride, pixel_format, is_tiled) && + other_surface.stride, other_surface.block_height, other_surface.pixel_format, + other_surface.component_type, + other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, + pixel_format, component_type, is_tiled) && pixel_format != PixelFormat::Invalid; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { return sub_surface.addr >= addr && sub_surface.end <= end && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && + sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && + sub_surface.component_type == component_type && (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && GetSubRect(sub_surface).left + sub_surface.width <= stride; @@ -328,7 +316,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && + component_type == expanded_surface.component_type && stride == expanded_surface.stride && (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % BytesInPixels(stride * (is_tiled ? 8 : 1)) == 0; @@ -339,6 +328,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { end < texcopy_params.end) { return false; } + if (texcopy_params.block_height != block_height || + texcopy_params.component_type != component_type) + return false; + if (texcopy_params.width != texcopy_params.stride) { const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && @@ -481,18 +474,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { const u64 start_offset = load_start - addr; if (!is_tiled) { - ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, - texture_src_data + start_offset, &gl_buffer[start_offset], - true); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + bytes_per_pixel * width * height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); } } @@ -533,11 +521,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); } } @@ -556,7 +543,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint GLint y0 = static_cast<GLint>(rect.bottom); size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); GLuint target_tex = texture.handle; // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in @@ -629,7 +616,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); @@ -662,7 +649,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui state.draw.read_framebuffer = read_fb_handle; state.Apply(); - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, @@ -1041,9 +1028,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.height = config.tic.Height(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + + // TODO(Subv): Different types per component are not supported. + ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && + config.tic.r_type.Value() == config.tic.b_type.Value() && + config.tic.r_type.Value() == config.tic.a_type.Value()); + + params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); + + if (config.tic.IsTiled()) { + params.block_height = config.tic.BlockHeight(); + } else { + // Use the texture-provided stride value if the texture isn't tiled. + params.stride = params.PixelsInBytes(config.tic.Pitch()); + } + params.UpdateParams(); - if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) { + if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 || + params.stride != params.width) { Surface src_surface; MathUtil::Rectangle<u32> rect; std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); @@ -1094,10 +1097,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; + // TODO(Subv): Can framebuffers use a different block height? + color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); color_params.UpdateParams(); ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); @@ -1293,7 +1299,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface const SurfaceInterval invalid_interval(addr, addr + size); if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); // Surfaces can't have a gap ASSERT(region_owner->width == region_owner->stride); @@ -1355,7 +1360,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { surface->gl_buffer_size = 0; surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + AllocateSurfaceTexture(surface->texture.handle, + GetFormatTuple(surface->pixel_format, surface->component_type), surface->GetScaledWidth(), surface->GetScaledHeight()); return surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e7ce506cf..6861efe16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -52,27 +52,45 @@ enum class ScaleMatch { struct SurfaceParams { enum class PixelFormat { - RGBA8 = 0, - DXT1 = 1, + ABGR8 = 0, + B5G6R5 = 1, + DXT1 = 2, + DXT23 = 3, + DXT45 = 4, + + Max, Invalid = 255, }; + static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); + + enum class ComponentType { + Invalid = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + Float = 5, + }; + enum class SurfaceType { - Color = 0, - Texture = 1, - Depth = 2, - DepthStencil = 3, - Fill = 4, - Invalid = 5 + ColorTexture = 0, + Depth = 1, + DepthStencil = 2, + Fill = 3, + Invalid = 4, }; static constexpr unsigned int GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<unsigned int, 2> bpp_table = { - 32, // RGBA8 - 64, // DXT1 + constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { + 32, // ABGR8 + 16, // B5G6R5 + 64, // DXT1 + 128, // DXT23 + 128, // DXT45 }; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -85,8 +103,9 @@ struct SurfaceParams { static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -94,8 +113,9 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -104,10 +124,69 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; + case Tegra::Texture::TextureFormat::B5G6R5: + return PixelFormat::B5G6R5; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; + case Tegra::Texture::TextureFormat::DXT23: + return PixelFormat::DXT23; + case Tegra::Texture::TextureFormat::DXT45: + return PixelFormat::DXT45; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { + // TODO(Subv): Properly implement this + switch (format) { + case PixelFormat::ABGR8: + return Tegra::Texture::TextureFormat::A8R8G8B8; + case PixelFormat::B5G6R5: + return Tegra::Texture::TextureFormat::B5G6R5; + case PixelFormat::DXT1: + return Tegra::Texture::TextureFormat::DXT1; + case PixelFormat::DXT23: + return Tegra::Texture::TextureFormat::DXT23; + case PixelFormat::DXT45: + return Tegra::Texture::TextureFormat::DXT45; + default: + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { + // TODO(Subv): Implement more component types + switch (type) { + case Tegra::Texture::ComponentType::UNORM: + return ComponentType::UNorm; default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { + // TODO(Subv): Implement more render targets + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + case Tegra::RenderTargetFormat::RGB10_A2_UNORM: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromGPUPixelFormat( + Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -116,8 +195,7 @@ struct SurfaceParams { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); - if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && - (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { return true; } @@ -133,12 +211,8 @@ struct SurfaceParams { } static SurfaceType GetFormatType(PixelFormat pixel_format) { - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) { - return SurfaceType::Color; - } - - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) { - return SurfaceType::Texture; + if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { + return SurfaceType::ColorTexture; } // TODO(Subv): Implement the other formats @@ -210,11 +284,13 @@ struct SurfaceParams { u32 width = 0; u32 height = 0; u32 stride = 0; + u32 block_height = 0; u16 res_scale = 1; bool is_tiled = false; PixelFormat pixel_format = PixelFormat::Invalid; SurfaceType type = SurfaceType::Invalid; + ComponentType component_type = ComponentType::Invalid; }; struct CachedSurface : SurfaceParams { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e11711533..de137558d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -5,6 +5,7 @@ #include <map> #include <set> #include <string> +#include <string_view> #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -17,6 +18,7 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; using Tegra::Shader::Uniform; @@ -108,12 +110,25 @@ private: class ShaderWriter { public: - void AddLine(const std::string& text) { + void AddLine(std::string_view text) { DEBUG_ASSERT(scope >= 0); if (!text.empty()) { - shader_source += std::string(static_cast<size_t>(scope) * 4, ' '); + AppendIndentation(); } - shader_source += text + '\n'; + shader_source += text; + AddNewLine(); + } + + void AddLine(char character) { + DEBUG_ASSERT(scope >= 0); + AppendIndentation(); + shader_source += character; + AddNewLine(); + } + + void AddNewLine() { + DEBUG_ASSERT(scope >= 0); + shader_source += '\n'; } std::string GetResult() { @@ -123,6 +138,10 @@ public: int scope = 0; private: + void AppendIndentation() { + shader_source.append(static_cast<size_t>(scope) * 4, ' '); + } + std::string shader_source; }; @@ -155,23 +174,27 @@ private: /// Generates code representing an input attribute register. std::string GetInputAttribute(Attribute::Index attribute) { - declr_input_attribute.insert(attribute); + switch (attribute) { + case Attribute::Index::Position: + return "position"; + default: + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_input_attribute.insert(attribute); + return "input_attribute_" + std::to_string(index); + } - const u32 index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - if (attribute >= Attribute::Index::Attribute_0) { - return "input_attribute_" + std::to_string(index); + NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + UNREACHABLE(); } - - LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); - UNREACHABLE(); } /// Generates code representing an output attribute register. std::string GetOutputAttribute(Attribute::Index attribute) { switch (attribute) { case Attribute::Index::Position: - return "gl_Position"; + return "position"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -180,22 +203,47 @@ private: return "output_attribute_" + std::to_string(index); } - LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); + NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); UNREACHABLE(); } } + /// Generates code representing a 19-bit immediate value + static std::string GetImmediate19(const Instruction& instr) { + return std::to_string(instr.alu.GetImm20_19()); + } + + /// Generates code representing a 32-bit immediate value + static std::string GetImmediate32(const Instruction& instr) { + return std::to_string(instr.alu.GetImm20_32()); + } + /// Generates code representing a temporary (GPR) register. - std::string GetRegister(const Register& reg) { - return *declr_register.insert("register_" + std::to_string(reg)).first; + std::string GetRegister(const Register& reg, unsigned elem = 0) { + if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) { + // GPRs 0-3 are output color for the fragment shader + return std::string{"color."} + "rgba"[(reg + elem) & 3]; + } + + return *declr_register.insert("register_" + std::to_string(reg + elem)).first; } /// Generates code representing a uniform (C buffer) register. std::string GetUniform(const Uniform& reg) { - declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage); + declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index), + static_cast<unsigned>(reg.offset), stage); return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']'; } + /// Generates code representing a texture sampler. + std::string GetSampler(const Sampler& sampler) const { + // TODO(Subv): Support more than just texture sampler 0 + ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported"); + const unsigned index{static_cast<unsigned>(sampler.index.Value()) - + static_cast<unsigned>(Sampler::Index::Sampler_0)}; + return "tex[" + std::to_string(index) + "]"; + } + /** * Adds code that calls a subroutine. * @param subroutine the subroutine to call. @@ -217,12 +265,13 @@ private: * @param value the code representing the value to assign. */ void SetDest(u64 elem, const std::string& reg, const std::string& value, - u64 dest_num_components, u64 value_num_components) { + u64 dest_num_components, u64 value_num_components, bool is_abs = false) { std::string swizzle = "."; swizzle += "xyzw"[elem]; std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + src = is_abs ? "abs(" + src + ")" : src; shader.AddLine(dest + " = " + src + ";"); } @@ -240,8 +289,6 @@ private: switch (OpCode::GetInfo(instr.opcode).type) { case OpCode::Type::Arithmetic: { - ASSERT(!instr.alu.abs_d); - std::string dest = GetRegister(instr.gpr0); std::string op_a = instr.alu.negate_a ? "-" : ""; op_a += GetRegister(instr.gpr8); @@ -250,63 +297,114 @@ private: } std::string op_b = instr.alu.negate_b ? "-" : ""; - if (instr.is_b_gpr) { - op_b += GetRegister(instr.gpr20); + + if (instr.is_b_imm) { + op_b += GetImmediate19(instr); } else { - op_b += GetUniform(instr.uniform); + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } } + if (instr.alu.abs_b) { op_b = "abs(" + op_b + ")"; } switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: { - SetDest(0, dest, op_a + " * " + op_b, 1, 1); + case OpCode::Id::FMUL_R: + case OpCode::Id::FMUL_IMM: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); + break; + } + case OpCode::Id::FMUL32_IMM: { + // fmul32i doesn't have abs or neg bits. + SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); break; } case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: { - SetDest(0, dest, op_a + " + " + op_b, 1, 1); + case OpCode::Id::FADD_R: + case OpCode::Id::FADD_IMM: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); break; } - default: { - LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - static_cast<unsigned>(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::MUFU: { + switch (instr.sub_op) { + case SubOp::Cos: + SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Sin: + SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Ex2: + SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Lg2: + SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Rcp: + SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); + break; + case SubOp::Rsq: + SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Min: + SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d); + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}", + static_cast<unsigned>(instr.sub_op.Value())); + UNREACHABLE(); + } break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } break; } case OpCode::Type::Ffma: { - ASSERT_MSG(!instr.ffma.negate_b, "untested"); - ASSERT_MSG(!instr.ffma.negate_c, "untested"); - std::string dest = GetRegister(instr.gpr0); std::string op_a = GetRegister(instr.gpr8); - std::string op_b = instr.ffma.negate_b ? "-" : ""; - op_b += GetUniform(instr.uniform); - std::string op_c = instr.ffma.negate_c ? "-" : ""; - op_c += GetRegister(instr.gpr39); switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::FFMA_CR: { - SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + op_b += GetUniform(instr.uniform); + op_c += GetRegister(instr.gpr39); break; } - - default: { - LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", - static_cast<unsigned>(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::FFMA_RR: { + op_b += GetRegister(instr.gpr20); + op_c += GetRegister(instr.gpr39); + break; + } + case OpCode::Id::FFMA_RC: { + op_b += GetRegister(instr.gpr39); + op_c += GetUniform(instr.uniform); + break; + } + case OpCode::Id::FFMA_IMM: { + op_b += GetImmediate19(instr); + op_c += GetRegister(instr.gpr39); break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } + + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); break; } case OpCode::Type::Memory: { @@ -315,22 +413,40 @@ private: switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::LD_A: { - ASSERT(instr.attribute.fmt20.size == 0); + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); break; } case OpCode::Id::ST_A: { - ASSERT(instr.attribute.fmt20.size == 0); + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); break; } - default: { - LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", - static_cast<unsigned>(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = GetRegister(instr.gpr8); + const std::string op_b = GetRegister(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + // Add an extra scope and declare the texture coords inside to prevent overwriting + // them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "texture(" + sampler + ", coords)"; + for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4); + } + --shader.scope; + shader.AddLine("}"); break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } break; } @@ -342,14 +458,18 @@ private: offset = PROGRAM_END - 1; break; } - - default: { - LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - static_cast<unsigned>(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::IPA: { + const auto& attribute = instr.attribute.fmt28; + std::string dest = GetRegister(instr.gpr0); + SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4); break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } break; @@ -379,7 +499,7 @@ private: for (const auto& subroutine : subroutines) { shader.AddLine("bool " + subroutine.GetName() + "();"); } - shader.AddLine(""); + shader.AddNewLine(); // Add the main entry point shader.AddLine("bool exec_shader() {"); @@ -422,14 +542,14 @@ private: } --shader.scope; - shader.AddLine("}"); + shader.AddLine('}'); } shader.AddLine("default: return false;"); - shader.AddLine("}"); + shader.AddLine('}'); --shader.scope; - shader.AddLine("}"); + shader.AddLine('}'); shader.AddLine("return false;"); } @@ -456,7 +576,7 @@ private: for (const auto& reg : declr_register) { declarations.AddLine("float " + reg + " = 0.0;"); } - declarations.AddLine(""); + declarations.AddNewLine(); for (const auto& index : declr_input_attribute) { // TODO(bunnei): Use proper number of elements for these @@ -465,7 +585,7 @@ private: static_cast<u32>(Attribute::Index::Attribute_0)) + ") in vec4 " + GetInputAttribute(index) + ";"); } - declarations.AddLine(""); + declarations.AddNewLine(); for (const auto& index : declr_output_attribute) { // TODO(bunnei): Use proper number of elements for these @@ -474,15 +594,15 @@ private: static_cast<u32>(Attribute::Index::Attribute_0)) + ") out vec4 " + GetOutputAttribute(index) + ";"); } - declarations.AddLine(""); + declarations.AddNewLine(); unsigned const_buffer_layout = 0; for (const auto& entry : GetConstBuffersDeclarations()) { declarations.AddLine("layout(std430) buffer " + entry.GetName()); - declarations.AddLine("{"); + declarations.AddLine('{'); declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); declarations.AddLine("};"); - declarations.AddLine(""); + declarations.AddNewLine(); ++const_buffer_layout; } } @@ -501,7 +621,7 @@ private: std::set<Attribute::Index> declr_input_attribute; std::set<Attribute::Index> declr_output_attribute; std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; -}; +}; // namespace Decompiler std::string GetCommonDeclarations() { return "bool exec_shader();"; @@ -514,7 +634,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, GLSLGenerator generator(subroutines, program_code, main_offset, stage); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { - LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); + NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); } return boost::none; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 9f6e0ef58..382c76b7a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <array> #include <functional> #include <string> diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index aeea1c805..254f6e2c3 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -27,10 +27,19 @@ out gl_PerVertex { vec4 gl_Position; }; +out vec4 position; + +layout (std140) uniform vs_config { + vec4 viewport_flip; +}; + void main() { exec_shader(); -} + // Viewport can be flipped, which is unsupported by glViewport + position.xy *= viewport_flip.xy; + gl_Position = position; +} )"; out += program.first; return {out, program.second}; @@ -46,8 +55,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo .get_value_or({}); out += R"( +in vec4 position; out vec4 color; +layout (std140) uniform fs_config { + vec4 viewport_flip; +}; + uniform sampler2D tex[32]; void main() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 85b838faa..17b3925a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -53,6 +53,12 @@ void SetShaderSamplerBindings(GLuint shader) { } // namespace Impl -void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {} +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Support more than one viewport + viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; + viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; +} } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index be63320e0..e963b4b7e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader); // Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); - // TODO(Subv): Use this for something. + alignas(16) GLvec4 viewport_flip; }; -// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is -// incorrect"); +static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 7b8a15ed2..f91dfe36a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <iterator> #include <glad/glad.h> -#include "common/common_funcs.h" #include "common/logging/log.h" #include "video_core/renderer_opengl/gl_state.h" @@ -192,7 +192,7 @@ void OpenGLState::Apply() const { } // Textures - for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) { + for (size_t i = 0; i < std::size(texture_units); ++i) { if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { glActiveTexture(TextureUnits::MaxwellTexture(i).Enum()); glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 4bc2f52e0..e78dc5784 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <memory> #include <glad/glad.h> #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7909dcfc3..a49265b38 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -31,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_BYTE; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str()); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); UNREACHABLE(); return {}; } @@ -40,7 +40,21 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str()); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); + UNREACHABLE(); + return {}; +} + +inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { + switch (index_format) { + case Maxwell::IndexFormat::UnsignedByte: + return GL_UNSIGNED_BYTE; + case Maxwell::IndexFormat::UnsignedShort: + return GL_UNSIGNED_SHORT; + case Maxwell::IndexFormat::UnsignedInt: + return GL_UNSIGNED_INT; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); UNREACHABLE(); return {}; } @@ -52,7 +66,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); return {}; } @@ -64,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) { case Tegra::Texture::TextureFilter::Nearest: return GL_NEAREST; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u", - static_cast<u32>(filter_mode)); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}", + static_cast<u32>(filter_mode)); UNREACHABLE(); return {}; } inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { switch (wrap_mode) { + case Tegra::Texture::WrapMode::Wrap: + return GL_REPEAT; case Tegra::Texture::WrapMode::ClampToEdge: return GL_CLAMP_TO_EDGE; + case Tegra::Texture::WrapMode::ClampOGL: + // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use + // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to + // manually mix them. However the shader part of this is not yet implemented. + return GL_CLAMP_TO_BORDER; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", + static_cast<u32>(wrap_mode)); + UNREACHABLE(); + return {}; +} + +inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { + switch (equation) { + case Maxwell::Blend::Equation::Add: + return GL_FUNC_ADD; + case Maxwell::Blend::Equation::Subtract: + return GL_FUNC_SUBTRACT; + case Maxwell::Blend::Equation::ReverseSubtract: + return GL_FUNC_REVERSE_SUBTRACT; + case Maxwell::Blend::Equation::Min: + return GL_MIN; + case Maxwell::Blend::Equation::Max: + return GL_MAX; + } + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + UNREACHABLE(); + return {}; +} + +inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + return GL_ZERO; + case Maxwell::Blend::Factor::One: + return GL_ONE; + case Maxwell::Blend::Factor::SourceColor: + return GL_SRC_COLOR; + case Maxwell::Blend::Factor::OneMinusSourceColor: + return GL_ONE_MINUS_SRC_COLOR; + case Maxwell::Blend::Factor::SourceAlpha: + return GL_SRC_ALPHA; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + return GL_ONE_MINUS_SRC_ALPHA; + case Maxwell::Blend::Factor::DestAlpha: + return GL_DST_ALPHA; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + return GL_ONE_MINUS_DST_ALPHA; + case Maxwell::Blend::Factor::DestColor: + return GL_DST_COLOR; + case Maxwell::Blend::Factor::OneMinusDestColor: + return GL_ONE_MINUS_DST_COLOR; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + return GL_SRC_ALPHA_SATURATE; + case Maxwell::Blend::Factor::Source1Color: + return GL_SRC1_COLOR; + case Maxwell::Blend::Factor::OneMinusSource1Color: + return GL_ONE_MINUS_SRC1_COLOR; + case Maxwell::Blend::Factor::Source1Alpha: + return GL_SRC1_ALPHA; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + return GL_ONE_MINUS_SRC1_ALPHA; + case Maxwell::Blend::Factor::ConstantColor: + return GL_CONSTANT_COLOR; + case Maxwell::Blend::Factor::OneMinusConstantColor: + return GL_ONE_MINUS_CONSTANT_COLOR; + case Maxwell::Blend::Factor::ConstantAlpha: + return GL_CONSTANT_ALPHA; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + return GL_ONE_MINUS_CONSTANT_ALPHA; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode)); + NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); UNREACHABLE(); return {}; } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 2e87281eb..4df687786 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -48,31 +48,39 @@ u32 BytesPerPixel(TextureFormat format) { case TextureFormat::DXT1: // In this case a 'pixel' actually refers to a 4x4 tile. return 8; + case TextureFormat::DXT23: + case TextureFormat::DXT45: + // In this case a 'pixel' actually refers to a 4x4 tile. + return 16; case TextureFormat::A8R8G8B8: return 4; + case TextureFormat::B5G6R5: + return 2; default: UNIMPLEMENTED_MSG("Format not implemented"); break; } } -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) { +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height) { u8* data = Memory::GetPointer(address); u32 bytes_per_pixel = BytesPerPixel(format); - static constexpr u32 DefaultBlockHeight = 16; - std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); switch (format) { case TextureFormat::DXT1: - // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values. + case TextureFormat::DXT23: + case TextureFormat::DXT45: + // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values. CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; case TextureFormat::A8R8G8B8: + case TextureFormat::B5G6R5: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; default: UNIMPLEMENTED_MSG("Format not implemented"); @@ -89,7 +97,10 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat // TODO(Subv): Implement. switch (format) { case TextureFormat::DXT1: + case TextureFormat::DXT23: + case TextureFormat::DXT45: case TextureFormat::A8R8G8B8: + case TextureFormat::B5G6R5: // TODO(Subv): For the time being just forward the same data without any decoding. rgba_data = texture_data; break; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 0c21694ff..a700911cf 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -14,7 +14,8 @@ namespace Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height); +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height = TICEntry::DefaultBlockHeight); /** * Decodes an unswizzled texture into a A8R8G8B8 texture. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c12ed6e1d..86e45aa88 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -4,6 +4,7 @@ #pragma once +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -13,8 +14,11 @@ namespace Tegra { namespace Texture { enum class TextureFormat : u32 { - A8R8G8B8 = 8, + A8R8G8B8 = 0x8, + B5G6R5 = 0x15, DXT1 = 0x24, + DXT23 = 0x25, + DXT45 = 0x26, }; enum class TextureType : u32 { @@ -55,6 +59,8 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { + static constexpr u32 DefaultBlockHeight = 16; + union { u32 raw; BitField<0, 7, TextureFormat> format; @@ -68,7 +74,12 @@ struct TICEntry { BitField<0, 16, u32> address_high; BitField<21, 3, TICHeaderVersion> header_version; }; - INSERT_PADDING_BYTES(4); + union { + BitField<3, 3, u32> block_height; + + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + }; union { BitField<0, 16, u32> width_minus_1; BitField<23, 4, TextureType> texture_type; @@ -80,6 +91,13 @@ struct TICEntry { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); } + u32 Pitch() const { + ASSERT(header_version == TICHeaderVersion::Pitch || + header_version == TICHeaderVersion::PitchColorKey); + // The pitch value is 21 bits, and is 32B aligned. + return pitch_high << 5; + } + u32 Width() const { return width_minus_1 + 1; } @@ -88,6 +106,13 @@ struct TICEntry { return height_minus_1 + 1; } + u32 BlockHeight() const { + ASSERT(header_version == TICHeaderVersion::BlockLinear || + header_version == TICHeaderVersion::BlockLinearColorKey); + // The block height is stored in log2 format. + return 1 << block_height; + } + bool IsTiled() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; |
