diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/color.h | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 100 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/pica.h | 152 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/utils.h | 6 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.h | 1 | ||||
| -rw-r--r-- | src/video_core/video_core.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/video_core.h | 1 |
12 files changed, 306 insertions, 125 deletions
diff --git a/src/video_core/color.h b/src/video_core/color.h index 43d635e2c..4d2026eb0 100644 --- a/src/video_core/color.h +++ b/src/video_core/color.h @@ -5,6 +5,8 @@ #pragma once #include "common/common_types.h" +#include "common/swap.h" + #include "video_core/math.h" namespace Color { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index e031871e8..1ea7cad07 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -27,6 +27,10 @@ static int float_regs_counter = 0; static u32 uniform_write_buffer[4]; +static int default_attr_counter = 0; + +static u32 default_attr_write_buffer[3]; + Common::Profiling::TimingCategory category_drawing("Drawing"); static inline void WritePicaReg(u32 id, u32 value, u32 mask) { @@ -71,12 +75,9 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { u32 vertex_attribute_sources[16]; boost::fill(vertex_attribute_sources, 0xdeadbeef); u32 vertex_attribute_strides[16]; - u32 vertex_attribute_formats[16]; + Regs::VertexAttributeFormat vertex_attribute_formats[16]; - // HACK: Initialize vertex_attribute_elements to zero to prevent infinite loops below. - // This is one of the hacks required to deal with uninitalized vertex attributes. - // TODO: Fix this properly. - u32 vertex_attribute_elements[16] = {}; + u32 vertex_attribute_elements[16]; u32 vertex_attribute_element_size[16]; // Setup attribute data from loaders @@ -90,7 +91,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { u32 attribute_index = loader_config.GetComponent(component); vertex_attribute_sources[attribute_index] = load_address; vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = static_cast<u32>(attribute_config.GetFormat(attribute_index)); + vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); load_address += attribute_config.GetStride(attribute_index); @@ -101,7 +102,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); const auto& index_info = registers.index_array; - const u8* index_address_8 = Memory::GetPointer(PAddrToVAddr(base_address + index_info.offset)); + const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = index_info.format != 0; @@ -126,26 +127,29 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { input.attr[0].w = debug_token; for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); - - // TODO(neobrain): Ocarina of Time 3D has GetNumTotalAttributes return 8, - // yet only provides 2 valid source data addresses. Need to figure out - // what's wrong there, until then we just continue when address lookup fails - if (srcdata == nullptr) - continue; - - const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : - (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : - (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : - *(float*)srcdata; - input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", - comp, i, vertex, index, - attribute_config.GetPhysicalBaseAddress(), - vertex_attribute_sources[i] - base_address, - vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], - input.attr[i][comp].ToFloat32()); + if (attribute_config.IsDefaultAttribute(i)) { + input.attr[i] = VertexShader::GetDefaultAttribute(i); + LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", + i, vertex, index, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), + input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + } else { + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]); + + const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata : + *(float*)srcdata; + + input.attr[i][comp] = float24::FromFloat32(srcval); + LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", + comp, i, vertex, index, + attribute_config.GetPhysicalBaseAddress(), + vertex_attribute_sources[i] - base_address, + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], + input.attr[i][comp].ToFloat32()); + } } } @@ -224,7 +228,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // it directly write the values? uniform_write_buffer[float_regs_counter++] = value; - // Uniforms are written in a packed format such that 4 float24 values are encoded in + // Uniforms are written in a packed format such that four float24 values are encoded in // three 32-bit numbers. We write to internal memory once a full such vector is // written. if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || @@ -259,6 +263,46 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } break; } + + // Load default vertex input attributes + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): + { + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = value; + + // Default attributes are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if (default_attr_counter >= 3) { + default_attr_counter = 0; + + auto& setup = registers.vs_default_attributes_setup; + + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + break; + } + + Math::Vec4<float24>& attribute = VertexShader::GetDefaultAttribute(setup.index); + + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + setup.index = setup.index + 1; + } + break; + } // Load shader program code case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 83982b4f2..883df48a5 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -393,6 +393,17 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture } } + case Regs::TextureFormat::I4: + { + u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); + const u8* source_ptr = source + morton_offset / 2; + + u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); + i = Color::Convert4To8(i); + + return { i, i, i, 255 }; + } + case Regs::TextureFormat::A4: { u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); @@ -507,7 +518,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture // Add modifier unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value(); - static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{ + static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, { 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 } }}; @@ -597,7 +608,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { png_init_io(png_ptr, fp.GetHandle()); - // Write header (8 bit colour depth) + // Write header (8 bit color depth) png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index fe20cd77d..3fbf95721 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -10,10 +10,11 @@ #include <map> #include <vector> +#include "common/assert.h" #include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" - -#include "core/mem_map.h" +#include "common/logging/log.h" namespace Pica { @@ -153,7 +154,7 @@ struct Regs { I8 = 7, A8 = 8, IA4 = 9, - + I4 = 10, A4 = 11, ETC1 = 12, // compressed ETC1A4 = 13, // compressed @@ -223,7 +224,8 @@ struct Regs { Texture1 = 0x4, Texture2 = 0x5, Texture3 = 0x6, - // 0x7-0xc = primary color?? + + PreviousBuffer = 0xd, Constant = 0xe, Previous = 0xf, }; @@ -296,7 +298,18 @@ struct Regs { BitField<24, 8, u32> const_a; }; - INSERT_PADDING_WORDS(0x1); + union { + BitField< 0, 2, u32> color_scale; + BitField<16, 2, u32> alpha_scale; + }; + + inline unsigned GetColorMultiplier() const { + return (color_scale < 3) ? (1 << color_scale) : 1; + } + + inline unsigned GetAlphaMultiplier() const { + return (alpha_scale < 3) ? (1 << alpha_scale) : 1; + } }; TevStageConfig tev_stage0; @@ -306,11 +319,36 @@ struct Regs { TevStageConfig tev_stage2; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage3; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0x3); + + union { + // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in + // these masks are set + BitField< 8, 4, u32> update_mask_rgb; + BitField<12, 4, u32> update_mask_a; + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_a & (1 << stage_index)); + } + } tev_combiner_buffer_input; + + INSERT_PADDING_WORDS(0xf); TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; - INSERT_PADDING_WORDS(0x3); + + union { + BitField< 0, 8, u32> r; + BitField< 8, 8, u32> g; + BitField<16, 8, u32> b; + BitField<24, 8, u32> a; + } tev_combiner_buffer_color; + + INSERT_PADDING_WORDS(0x2); const std::array<Regs::TevStageConfig,6> GetTevStages() const { return { tev_stage0, tev_stage1, @@ -423,9 +461,7 @@ struct Regs { D24S8 = 3 }; - /* - * Returns the number of bytes in the specified depth format - */ + // Returns the number of bytes in the specified depth format static u32 BytesPerDepthPixel(DepthFormat format) { switch (format) { case DepthFormat::D16: @@ -440,6 +476,20 @@ struct Regs { } } + // Returns the number of bits per depth component of the specified depth format + static u32 DepthBitsPerPixel(DepthFormat format) { + switch (format) { + case DepthFormat::D16: + return 16; + case DepthFormat::D24: + case DepthFormat::D24S8: + return 24; + default: + LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); + UNIMPLEMENTED(); + } + } + struct { // Components are laid out in reverse byte order, most significant bits first. enum ColorFormat : u32 { @@ -489,14 +539,14 @@ struct Regs { INSERT_PADDING_WORDS(0xe0); - struct { - enum class Format : u64 { - BYTE = 0, - UBYTE = 1, - SHORT = 2, - FLOAT = 3, - }; + enum class VertexAttributeFormat : u64 { + BYTE = 0, + UBYTE = 1, + SHORT = 2, + FLOAT = 3, + }; + struct { BitField<0, 29, u32> base_address; u32 GetPhysicalBaseAddress() const { @@ -505,29 +555,29 @@ struct Regs { // Descriptor for internal vertex attributes union { - BitField< 0, 2, Format> format0; // size of one element + BitField< 0, 2, VertexAttributeFormat> format0; // size of one element BitField< 2, 2, u64> size0; // number of elements minus 1 - BitField< 4, 2, Format> format1; + BitField< 4, 2, VertexAttributeFormat> format1; BitField< 6, 2, u64> size1; - BitField< 8, 2, Format> format2; + BitField< 8, 2, VertexAttributeFormat> format2; BitField<10, 2, u64> size2; - BitField<12, 2, Format> format3; + BitField<12, 2, VertexAttributeFormat> format3; BitField<14, 2, u64> size3; - BitField<16, 2, Format> format4; + BitField<16, 2, VertexAttributeFormat> format4; BitField<18, 2, u64> size4; - BitField<20, 2, Format> format5; + BitField<20, 2, VertexAttributeFormat> format5; BitField<22, 2, u64> size5; - BitField<24, 2, Format> format6; + BitField<24, 2, VertexAttributeFormat> format6; BitField<26, 2, u64> size6; - BitField<28, 2, Format> format7; + BitField<28, 2, VertexAttributeFormat> format7; BitField<30, 2, u64> size7; - BitField<32, 2, Format> format8; + BitField<32, 2, VertexAttributeFormat> format8; BitField<34, 2, u64> size8; - BitField<36, 2, Format> format9; + BitField<36, 2, VertexAttributeFormat> format9; BitField<38, 2, u64> size9; - BitField<40, 2, Format> format10; + BitField<40, 2, VertexAttributeFormat> format10; BitField<42, 2, u64> size10; - BitField<44, 2, Format> format11; + BitField<44, 2, VertexAttributeFormat> format11; BitField<46, 2, u64> size11; BitField<48, 12, u64> attribute_mask; @@ -536,8 +586,8 @@ struct Regs { BitField<60, 4, u64> num_extra_attributes; }; - inline Format GetFormat(int n) const { - Format formats[] = { + inline VertexAttributeFormat GetFormat(int n) const { + VertexAttributeFormat formats[] = { format0, format1, format2, format3, format4, format5, format6, format7, format8, format9, format10, format11 @@ -555,14 +605,18 @@ struct Regs { } inline int GetElementSizeInBytes(int n) const { - return (GetFormat(n) == Format::FLOAT) ? 4 : - (GetFormat(n) == Format::SHORT) ? 2 : 1; + return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : + (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; } inline int GetStride(int n) const { return GetNumElements(n) * GetElementSizeInBytes(n); } + inline bool IsDefaultAttribute(int id) const { + return (id >= 12) || (attribute_mask & (1 << id)) != 0; + } + inline int GetNumTotalAttributes() const { return (int)num_extra_attributes+1; } @@ -625,7 +679,18 @@ struct Regs { u32 trigger_draw; u32 trigger_draw_indexed; - INSERT_PADDING_WORDS(0x2e); + INSERT_PADDING_WORDS(0x2); + + // These registers are used to setup the default "fall-back" vertex shader attributes + struct { + // Index of the current default attribute + u32 index; + + // Writing to these registers sets the "current" default attribute. + u32 set_value[3]; + } vs_default_attributes_setup; + + INSERT_PADDING_WORDS(0x28); enum class TriangleTopology : u32 { List = 0, @@ -669,7 +734,7 @@ struct Regs { BitField<56, 4, u64> attribute14_register; BitField<60, 4, u64> attribute15_register; - int GetRegisterForAttribute(int attribute_index) { + int GetRegisterForAttribute(int attribute_index) const { u64 fields[] = { attribute0_register, attribute1_register, attribute2_register, attribute3_register, attribute4_register, attribute5_register, attribute6_register, attribute7_register, @@ -766,8 +831,10 @@ struct Regs { ADD_FIELD(tev_stage1); ADD_FIELD(tev_stage2); ADD_FIELD(tev_stage3); + ADD_FIELD(tev_combiner_buffer_input); ADD_FIELD(tev_stage4); ADD_FIELD(tev_stage5); + ADD_FIELD(tev_combiner_buffer_color); ADD_FIELD(output_merger); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); @@ -775,6 +842,7 @@ struct Regs { ADD_FIELD(num_vertices); ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw_indexed); + ADD_FIELD(vs_default_attributes_setup); ADD_FIELD(triangle_topology); ADD_FIELD(vs_bool_uniforms); ADD_FIELD(vs_int_uniforms); @@ -840,8 +908,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0); ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); +ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); +ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); @@ -849,6 +919,7 @@ ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); +ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); @@ -978,15 +1049,4 @@ union CommandHeader { BitField<31, 1, u32> group_commands; }; -// TODO: Ugly, should fix PhysicalToVirtualAddress instead -inline static u32 PAddrToVAddr(u32 addr) { - if (addr >= Memory::VRAM_PADDR && addr < Memory::VRAM_PADDR + Memory::VRAM_SIZE) { - return addr - Memory::VRAM_PADDR + Memory::VRAM_VADDR; - } else if (addr >= Memory::FCRAM_PADDR && addr < Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) { - return addr - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; - } else { - return 0; - } -} - } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index dd46f0ec3..59eff48f9 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -6,8 +6,11 @@ #include "common/common_types.h" #include "common/math_util.h" +#include "common/profiler.h" #include "core/hw/gpu.h" +#include "core/memory.h" + #include "debug_utils/debug_utils.h" #include "math.h" #include "color.h" @@ -30,7 +33,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { const u32 coarse_y = y & ~7; u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; - u8* dst_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + dst_offset; + u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; switch (registers.framebuffer.color_format) { case registers.framebuffer.RGBA8: @@ -67,7 +70,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { const u32 coarse_y = y & ~7; u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; - u8* src_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + src_offset; + u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; switch (registers.framebuffer.color_format) { case registers.framebuffer.RGBA8: @@ -90,12 +93,12 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { UNIMPLEMENTED(); } - return {}; + return {0, 0, 0, 0}; } static u32 GetDepth(int x, int y) { const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); y = (registers.framebuffer.height - y); @@ -122,7 +125,7 @@ static u32 GetDepth(int x, int y) { static void SetDepth(int x, int y, u32 value) { const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); - u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); y = (registers.framebuffer.height - y); @@ -186,6 +189,8 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, return Math::Cross(vec1, vec2).z; }; +static Common::Profiling::TimingCategory rasterization_category("Rasterization"); + /** * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing * culling via recursion. @@ -195,6 +200,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, const VertexShader::OutputVertex& v2, bool reversed = false) { + Common::Profiling::ScopeTimer timer(rasterization_category); + // vertex positions in rasterizer coordinates static auto FloatToFix = [](float24 flt) { // TODO: Rounding here is necessary to prevent garbage pixels at @@ -342,10 +349,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, case Regs::TextureConfig::MirroredRepeat: { - int coord = (int)((unsigned)val % (2 * size)); + unsigned int coord = ((unsigned)val % (2 * size)); if (coord >= size) coord = 2 * size - 1 - coord; - return coord; + return (int)coord; } default: @@ -361,7 +368,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); - u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); @@ -376,7 +383,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, // with some basic arithmetic. Alpha combiners can be configured separately but work // analogously. Math::Vec4<u8> combiner_output; - for (const auto& tev_stage : tev_stages) { + Math::Vec4<u8> combiner_buffer = { + registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, + registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a + }; + + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + const auto& tev_stage = tev_stages[tev_stage_index]; using Source = Regs::TevStageConfig::Source; using ColorModifier = Regs::TevStageConfig::ColorModifier; using AlphaModifier = Regs::TevStageConfig::AlphaModifier; @@ -398,6 +411,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, case Source::Texture2: return texture_color[2]; + case Source::PreviousBuffer: + return combiner_buffer; + case Source::Constant: return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; @@ -407,7 +423,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, default: LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); UNIMPLEMENTED(); - return {}; + return {0, 0, 0, 0}; } }; @@ -490,6 +506,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, return result.Cast<u8>(); } + case Operation::AddSigned: + { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); + result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); + result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); + result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); + return result.Cast<u8>(); + } + case Operation::Lerp: return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); @@ -524,7 +550,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); UNIMPLEMENTED(); - return {}; + return {0, 0, 0}; } }; @@ -578,7 +604,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, }; auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); - combiner_output = Math::MakeVec(color_output, alpha_output); + combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); + combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); + combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); + combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); + + if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { + combiner_buffer.r() = combiner_output.r(); + combiner_buffer.g() = combiner_output.g(); + combiner_buffer.b() = combiner_output.b(); + } + + if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { + combiner_buffer.a() = combiner_output.a(); + } } if (registers.output_merger.alpha_test.enable) { @@ -624,9 +663,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, // TODO: Does depth indeed only get written even if depth testing is enabled? if (registers.output_merger.depth_test_enable) { - u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); + unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); + u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); u32 ref_z = GetDepth(x >> 4, y >> 4); bool pass = false; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index b77f29c11..b62409538 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -4,7 +4,7 @@ #pragma once -#include "common/common.h" +#include "common/common_types.h" class RendererBase : NonCopyable { public: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4273a177f..71ceb021b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,9 +5,11 @@ #include "core/hw/gpu.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" -#include "core/mem_map.h" +#include "core/memory.h" +#include "core/settings.h" #include "common/emu_window.h" +#include "common/logging/log.h" #include "common/profiler_reporting.h" #include "video_core/video_core.h" @@ -117,15 +119,15 @@ void RendererOpenGL::SwapBuffers() { void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, const TextureInfo& texture) { - const VAddr framebuffer_vaddr = Memory::PhysicalToVirtualAddress( - framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2); + const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? + framebuffer.address_left1 : framebuffer.address_left2; LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", framebuffer.stride * framebuffer.height, - framebuffer_vaddr, (int)framebuffer.width, + framebuffer_addr, (int)framebuffer.width, (int)framebuffer.height, (int)framebuffer.format); - const u8* framebuffer_data = Memory::GetPointer(framebuffer_vaddr); + const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr); int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); size_t pixel_stride = framebuffer.stride / bpp; @@ -172,7 +174,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color * Initializes the OpenGL state and creates persistent objects. */ void RendererOpenGL::InitOpenGLObjects() { - glClearColor(1.0f, 1.0f, 1.0f, 0.0f); + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); glDisable(GL_DEPTH_TEST); // Link shaders and get variable locations diff --git a/src/video_core/utils.h b/src/video_core/utils.h index bda793fa5..ffb3e73a3 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -13,10 +13,10 @@ namespace VideoCore { /// Structure for the TGA texture format (for dumping) struct TGAHeader { char idlength; - char colourmaptype; + char colormaptype; char datatypecode; - short int colourmaporigin; - short int colourmaplength; + short int colormaporigin; + short int colormaplength; short int x_origin; short int y_origin; short width; diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index e8d865172..981d1a356 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -8,10 +8,9 @@ #include <common/file_util.h> -#include <core/mem_map.h> - #include <nihstro/shader_bytecode.h> +#include "common/profiler.h" #include "pica.h" #include "vertex_shader.h" @@ -35,6 +34,8 @@ static struct { std::array<Math::Vec4<u8>,4> i; } shader_uniforms; +static Math::Vec4<float24> vs_default_attributes[16]; + // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. static std::array<u32, 1024> shader_memory; @@ -60,6 +61,10 @@ Math::Vec4<u8>& GetIntUniform(u32 index) { return shader_uniforms.i[index]; } +Math::Vec4<float24>& GetDefaultAttribute(u32 index) { + return vs_default_attributes[index]; +} + const std::array<u32, 1024>& GetShaderBinary() { return shader_memory; } @@ -229,6 +234,15 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case OpCode::Id::FLR: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); + } + break; + case OpCode::Id::MAX: for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -360,12 +374,15 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Type::MultiplyAdd: { - if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { + if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || + (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; - const float24* src1_ = LookupSourceRegister(instr.mad.src1); - const float24* src2_ = LookupSourceRegister(instr.mad.src2); - const float24* src3_ = LookupSourceRegister(instr.mad.src3); + bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); + + const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); + const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); @@ -556,7 +573,11 @@ static void ProcessShaderCode(VertexShaderState& state) { } } +static Common::Profiling::TimingCategory shader_category("Vertex Shader"); + OutputVertex RunShader(const InputVertex& input, int num_attributes) { + Common::Profiling::ScopeTimer timer(shader_category); + VertexShaderState state; const u32* main = &shader_memory[registers.vs_main_offset]; @@ -568,22 +589,23 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { const auto& attribute_register_map = registers.vs_input_register_map; float24 dummy_register; boost::fill(state.input_register_table, &dummy_register); - if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; - if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; - if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; - if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; - if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; - if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; - if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; - if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; - if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; - if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; - if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; - if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; - if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; - if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; - if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; - if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; + + if (num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; + if (num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; + if (num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; + if (num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; + if (num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; + if (num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; + if (num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; + if (num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; + if (num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; + if (num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; + if (num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; + if (num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; + if (num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; + if (num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; + if (num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; + if (num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; state.conditional_code[0] = false; state.conditional_code[1] = false; diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 3a68a3409..c26709bbc 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -74,6 +74,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); Math::Vec4<float24>& GetFloatUniform(u32 index); bool& GetBoolUniform(u32 index); Math::Vec4<u8>& GetIntUniform(u32 index); +Math::Vec4<float24>& GetDefaultAttribute(u32 index); const std::array<u32, 1024>& GetShaderBinary(); const std::array<u32, 1024>& GetSwizzlePatterns(); diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index b9d4ede3a..42e3bdd5b 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common.h" +#include "common/logging/log.h" #include "common/emu_window.h" #include "core/core.h" diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 1b51d39bf..f885bec21 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -4,7 +4,6 @@ #pragma once -#include "common/common.h" #include "common/emu_window.h" #include "renderer_base.h" |
