aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/color.h2
-rw-r--r--src/video_core/command_processor.cpp100
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp15
-rw-r--r--src/video_core/pica.h152
-rw-r--r--src/video_core/rasterizer.cpp70
-rw-r--r--src/video_core/renderer_base.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp14
-rw-r--r--src/video_core/utils.h6
-rw-r--r--src/video_core/vertex_shader.cpp66
-rw-r--r--src/video_core/vertex_shader.h1
-rw-r--r--src/video_core/video_core.cpp2
-rw-r--r--src/video_core/video_core.h1
12 files changed, 306 insertions, 125 deletions
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 43d635e2c..4d2026eb0 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -5,6 +5,8 @@
#pragma once
#include "common/common_types.h"
+#include "common/swap.h"
+
#include "video_core/math.h"
namespace Color {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index e031871e8..1ea7cad07 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -27,6 +27,10 @@ static int float_regs_counter = 0;
static u32 uniform_write_buffer[4];
+static int default_attr_counter = 0;
+
+static u32 default_attr_write_buffer[3];
+
Common::Profiling::TimingCategory category_drawing("Drawing");
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
@@ -71,12 +75,9 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
u32 vertex_attribute_sources[16];
boost::fill(vertex_attribute_sources, 0xdeadbeef);
u32 vertex_attribute_strides[16];
- u32 vertex_attribute_formats[16];
+ Regs::VertexAttributeFormat vertex_attribute_formats[16];
- // HACK: Initialize vertex_attribute_elements to zero to prevent infinite loops below.
- // This is one of the hacks required to deal with uninitalized vertex attributes.
- // TODO: Fix this properly.
- u32 vertex_attribute_elements[16] = {};
+ u32 vertex_attribute_elements[16];
u32 vertex_attribute_element_size[16];
// Setup attribute data from loaders
@@ -90,7 +91,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
u32 attribute_index = loader_config.GetComponent(component);
vertex_attribute_sources[attribute_index] = load_address;
vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
- vertex_attribute_formats[attribute_index] = static_cast<u32>(attribute_config.GetFormat(attribute_index));
+ vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index);
load_address += attribute_config.GetStride(attribute_index);
@@ -101,7 +102,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
const auto& index_info = registers.index_array;
- const u8* index_address_8 = Memory::GetPointer(PAddrToVAddr(base_address + index_info.offset));
+ const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
const u16* index_address_16 = (u16*)index_address_8;
bool index_u16 = index_info.format != 0;
@@ -126,26 +127,29 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
input.attr[0].w = debug_token;
for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
- for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
-
- // TODO(neobrain): Ocarina of Time 3D has GetNumTotalAttributes return 8,
- // yet only provides 2 valid source data addresses. Need to figure out
- // what's wrong there, until then we just continue when address lookup fails
- if (srcdata == nullptr)
- continue;
-
- const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata :
- (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata :
- (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata :
- *(float*)srcdata;
- input.attr[i][comp] = float24::FromFloat32(srcval);
- LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
- comp, i, vertex, index,
- attribute_config.GetPhysicalBaseAddress(),
- vertex_attribute_sources[i] - base_address,
- vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
- input.attr[i][comp].ToFloat32());
+ if (attribute_config.IsDefaultAttribute(i)) {
+ input.attr[i] = VertexShader::GetDefaultAttribute(i);
+ LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
+ i, vertex, index,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
+ input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else {
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]);
+
+ const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
+ *(float*)srcdata;
+
+ input.attr[i][comp] = float24::FromFloat32(srcval);
+ LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
+ comp, i, vertex, index,
+ attribute_config.GetPhysicalBaseAddress(),
+ vertex_attribute_sources[i] - base_address,
+ vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
+ input.attr[i][comp].ToFloat32());
+ }
}
}
@@ -224,7 +228,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
// it directly write the values?
uniform_write_buffer[float_regs_counter++] = value;
- // Uniforms are written in a packed format such that 4 float24 values are encoded in
+ // Uniforms are written in a packed format such that four float24 values are encoded in
// three 32-bit numbers. We write to internal memory once a full such vector is
// written.
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
@@ -259,6 +263,46 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
}
break;
}
+
+ // Load default vertex input attributes
+ case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
+ case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
+ case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235):
+ {
+ // TODO: Does actual hardware indeed keep an intermediate buffer or does
+ // it directly write the values?
+ default_attr_write_buffer[default_attr_counter++] = value;
+
+ // Default attributes are written in a packed format such that four float24 values are encoded in
+ // three 32-bit numbers. We write to internal memory once a full such vector is
+ // written.
+ if (default_attr_counter >= 3) {
+ default_attr_counter = 0;
+
+ auto& setup = registers.vs_default_attributes_setup;
+
+ if (setup.index >= 16) {
+ LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
+ break;
+ }
+
+ Math::Vec4<float24>& attribute = VertexShader::GetDefaultAttribute(setup.index);
+
+ // NOTE: The destination component order indeed is "backwards"
+ attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
+ attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
+ attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
+ attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF);
+
+ LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
+ attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
+ attribute.w.ToFloat32());
+
+ // TODO: Verify that this actually modifies the register!
+ setup.index = setup.index + 1;
+ }
+ break;
+ }
// Load shader program code
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc):
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 83982b4f2..883df48a5 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -393,6 +393,17 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
}
}
+ case Regs::TextureFormat::I4:
+ {
+ u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
+ const u8* source_ptr = source + morton_offset / 2;
+
+ u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
+ i = Color::Convert4To8(i);
+
+ return { i, i, i, 255 };
+ }
+
case Regs::TextureFormat::A4:
{
u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
@@ -507,7 +518,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
// Add modifier
unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value();
- static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{
+ static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{
{ 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 },
{ 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 }
}};
@@ -597,7 +608,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
png_init_io(png_ptr, fp.GetHandle());
- // Write header (8 bit colour depth)
+ // Write header (8 bit color depth)
png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height,
8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index fe20cd77d..3fbf95721 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -10,10 +10,11 @@
#include <map>
#include <vector>
+#include "common/assert.h"
#include "common/bit_field.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
-
-#include "core/mem_map.h"
+#include "common/logging/log.h"
namespace Pica {
@@ -153,7 +154,7 @@ struct Regs {
I8 = 7,
A8 = 8,
IA4 = 9,
-
+ I4 = 10,
A4 = 11,
ETC1 = 12, // compressed
ETC1A4 = 13, // compressed
@@ -223,7 +224,8 @@ struct Regs {
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
- // 0x7-0xc = primary color??
+
+ PreviousBuffer = 0xd,
Constant = 0xe,
Previous = 0xf,
};
@@ -296,7 +298,18 @@ struct Regs {
BitField<24, 8, u32> const_a;
};
- INSERT_PADDING_WORDS(0x1);
+ union {
+ BitField< 0, 2, u32> color_scale;
+ BitField<16, 2, u32> alpha_scale;
+ };
+
+ inline unsigned GetColorMultiplier() const {
+ return (color_scale < 3) ? (1 << color_scale) : 1;
+ }
+
+ inline unsigned GetAlphaMultiplier() const {
+ return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
+ }
};
TevStageConfig tev_stage0;
@@ -306,11 +319,36 @@ struct Regs {
TevStageConfig tev_stage2;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage3;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_PADDING_WORDS(0x3);
+
+ union {
+ // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
+ // these masks are set
+ BitField< 8, 4, u32> update_mask_rgb;
+ BitField<12, 4, u32> update_mask_a;
+
+ bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
+ }
+
+ bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_a & (1 << stage_index));
+ }
+ } tev_combiner_buffer_input;
+
+ INSERT_PADDING_WORDS(0xf);
TevStageConfig tev_stage4;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage5;
- INSERT_PADDING_WORDS(0x3);
+
+ union {
+ BitField< 0, 8, u32> r;
+ BitField< 8, 8, u32> g;
+ BitField<16, 8, u32> b;
+ BitField<24, 8, u32> a;
+ } tev_combiner_buffer_color;
+
+ INSERT_PADDING_WORDS(0x2);
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
return { tev_stage0, tev_stage1,
@@ -423,9 +461,7 @@ struct Regs {
D24S8 = 3
};
- /*
- * Returns the number of bytes in the specified depth format
- */
+ // Returns the number of bytes in the specified depth format
static u32 BytesPerDepthPixel(DepthFormat format) {
switch (format) {
case DepthFormat::D16:
@@ -440,6 +476,20 @@ struct Regs {
}
}
+ // Returns the number of bits per depth component of the specified depth format
+ static u32 DepthBitsPerPixel(DepthFormat format) {
+ switch (format) {
+ case DepthFormat::D16:
+ return 16;
+ case DepthFormat::D24:
+ case DepthFormat::D24S8:
+ return 24;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+ UNIMPLEMENTED();
+ }
+ }
+
struct {
// Components are laid out in reverse byte order, most significant bits first.
enum ColorFormat : u32 {
@@ -489,14 +539,14 @@ struct Regs {
INSERT_PADDING_WORDS(0xe0);
- struct {
- enum class Format : u64 {
- BYTE = 0,
- UBYTE = 1,
- SHORT = 2,
- FLOAT = 3,
- };
+ enum class VertexAttributeFormat : u64 {
+ BYTE = 0,
+ UBYTE = 1,
+ SHORT = 2,
+ FLOAT = 3,
+ };
+ struct {
BitField<0, 29, u32> base_address;
u32 GetPhysicalBaseAddress() const {
@@ -505,29 +555,29 @@ struct Regs {
// Descriptor for internal vertex attributes
union {
- BitField< 0, 2, Format> format0; // size of one element
+ BitField< 0, 2, VertexAttributeFormat> format0; // size of one element
BitField< 2, 2, u64> size0; // number of elements minus 1
- BitField< 4, 2, Format> format1;
+ BitField< 4, 2, VertexAttributeFormat> format1;
BitField< 6, 2, u64> size1;
- BitField< 8, 2, Format> format2;
+ BitField< 8, 2, VertexAttributeFormat> format2;
BitField<10, 2, u64> size2;
- BitField<12, 2, Format> format3;
+ BitField<12, 2, VertexAttributeFormat> format3;
BitField<14, 2, u64> size3;
- BitField<16, 2, Format> format4;
+ BitField<16, 2, VertexAttributeFormat> format4;
BitField<18, 2, u64> size4;
- BitField<20, 2, Format> format5;
+ BitField<20, 2, VertexAttributeFormat> format5;
BitField<22, 2, u64> size5;
- BitField<24, 2, Format> format6;
+ BitField<24, 2, VertexAttributeFormat> format6;
BitField<26, 2, u64> size6;
- BitField<28, 2, Format> format7;
+ BitField<28, 2, VertexAttributeFormat> format7;
BitField<30, 2, u64> size7;
- BitField<32, 2, Format> format8;
+ BitField<32, 2, VertexAttributeFormat> format8;
BitField<34, 2, u64> size8;
- BitField<36, 2, Format> format9;
+ BitField<36, 2, VertexAttributeFormat> format9;
BitField<38, 2, u64> size9;
- BitField<40, 2, Format> format10;
+ BitField<40, 2, VertexAttributeFormat> format10;
BitField<42, 2, u64> size10;
- BitField<44, 2, Format> format11;
+ BitField<44, 2, VertexAttributeFormat> format11;
BitField<46, 2, u64> size11;
BitField<48, 12, u64> attribute_mask;
@@ -536,8 +586,8 @@ struct Regs {
BitField<60, 4, u64> num_extra_attributes;
};
- inline Format GetFormat(int n) const {
- Format formats[] = {
+ inline VertexAttributeFormat GetFormat(int n) const {
+ VertexAttributeFormat formats[] = {
format0, format1, format2, format3,
format4, format5, format6, format7,
format8, format9, format10, format11
@@ -555,14 +605,18 @@ struct Regs {
}
inline int GetElementSizeInBytes(int n) const {
- return (GetFormat(n) == Format::FLOAT) ? 4 :
- (GetFormat(n) == Format::SHORT) ? 2 : 1;
+ return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 :
+ (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
}
inline int GetStride(int n) const {
return GetNumElements(n) * GetElementSizeInBytes(n);
}
+ inline bool IsDefaultAttribute(int id) const {
+ return (id >= 12) || (attribute_mask & (1 << id)) != 0;
+ }
+
inline int GetNumTotalAttributes() const {
return (int)num_extra_attributes+1;
}
@@ -625,7 +679,18 @@ struct Regs {
u32 trigger_draw;
u32 trigger_draw_indexed;
- INSERT_PADDING_WORDS(0x2e);
+ INSERT_PADDING_WORDS(0x2);
+
+ // These registers are used to setup the default "fall-back" vertex shader attributes
+ struct {
+ // Index of the current default attribute
+ u32 index;
+
+ // Writing to these registers sets the "current" default attribute.
+ u32 set_value[3];
+ } vs_default_attributes_setup;
+
+ INSERT_PADDING_WORDS(0x28);
enum class TriangleTopology : u32 {
List = 0,
@@ -669,7 +734,7 @@ struct Regs {
BitField<56, 4, u64> attribute14_register;
BitField<60, 4, u64> attribute15_register;
- int GetRegisterForAttribute(int attribute_index) {
+ int GetRegisterForAttribute(int attribute_index) const {
u64 fields[] = {
attribute0_register, attribute1_register, attribute2_register, attribute3_register,
attribute4_register, attribute5_register, attribute6_register, attribute7_register,
@@ -766,8 +831,10 @@ struct Regs {
ADD_FIELD(tev_stage1);
ADD_FIELD(tev_stage2);
ADD_FIELD(tev_stage3);
+ ADD_FIELD(tev_combiner_buffer_input);
ADD_FIELD(tev_stage4);
ADD_FIELD(tev_stage5);
+ ADD_FIELD(tev_combiner_buffer_color);
ADD_FIELD(output_merger);
ADD_FIELD(framebuffer);
ADD_FIELD(vertex_attributes);
@@ -775,6 +842,7 @@ struct Regs {
ADD_FIELD(num_vertices);
ADD_FIELD(trigger_draw);
ADD_FIELD(trigger_draw_indexed);
+ ADD_FIELD(vs_default_attributes_setup);
ADD_FIELD(triangle_topology);
ADD_FIELD(vs_bool_uniforms);
ADD_FIELD(vs_int_uniforms);
@@ -840,8 +908,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0);
ASSERT_REG_POSITION(tev_stage1, 0xc8);
ASSERT_REG_POSITION(tev_stage2, 0xd0);
ASSERT_REG_POSITION(tev_stage3, 0xd8);
+ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
ASSERT_REG_POSITION(tev_stage4, 0xf0);
ASSERT_REG_POSITION(tev_stage5, 0xf8);
+ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
ASSERT_REG_POSITION(output_merger, 0x100);
ASSERT_REG_POSITION(framebuffer, 0x110);
ASSERT_REG_POSITION(vertex_attributes, 0x200);
@@ -849,6 +919,7 @@ ASSERT_REG_POSITION(index_array, 0x227);
ASSERT_REG_POSITION(num_vertices, 0x228);
ASSERT_REG_POSITION(trigger_draw, 0x22e);
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
+ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
ASSERT_REG_POSITION(triangle_topology, 0x25e);
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
@@ -978,15 +1049,4 @@ union CommandHeader {
BitField<31, 1, u32> group_commands;
};
-// TODO: Ugly, should fix PhysicalToVirtualAddress instead
-inline static u32 PAddrToVAddr(u32 addr) {
- if (addr >= Memory::VRAM_PADDR && addr < Memory::VRAM_PADDR + Memory::VRAM_SIZE) {
- return addr - Memory::VRAM_PADDR + Memory::VRAM_VADDR;
- } else if (addr >= Memory::FCRAM_PADDR && addr < Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
- return addr - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR;
- } else {
- return 0;
- }
-}
-
} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index dd46f0ec3..59eff48f9 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -6,8 +6,11 @@
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/profiler.h"
#include "core/hw/gpu.h"
+#include "core/memory.h"
+
#include "debug_utils/debug_utils.h"
#include "math.h"
#include "color.h"
@@ -30,7 +33,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
const u32 coarse_y = y & ~7;
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
- u8* dst_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + dst_offset;
+ u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
switch (registers.framebuffer.color_format) {
case registers.framebuffer.RGBA8:
@@ -67,7 +70,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
const u32 coarse_y = y & ~7;
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
- u8* src_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + src_offset;
+ u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
switch (registers.framebuffer.color_format) {
case registers.framebuffer.RGBA8:
@@ -90,12 +93,12 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
UNIMPLEMENTED();
}
- return {};
+ return {0, 0, 0, 0};
}
static u32 GetDepth(int x, int y) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
- u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
y = (registers.framebuffer.height - y);
@@ -122,7 +125,7 @@ static u32 GetDepth(int x, int y) {
static void SetDepth(int x, int y, u32 value) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
- u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
y = (registers.framebuffer.height - y);
@@ -186,6 +189,8 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
return Math::Cross(vec1, vec2).z;
};
+static Common::Profiling::TimingCategory rasterization_category("Rasterization");
+
/**
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
* culling via recursion.
@@ -195,6 +200,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
const VertexShader::OutputVertex& v2,
bool reversed = false)
{
+ Common::Profiling::ScopeTimer timer(rasterization_category);
+
// vertex positions in rasterizer coordinates
static auto FloatToFix = [](float24 flt) {
// TODO: Rounding here is necessary to prevent garbage pixels at
@@ -342,10 +349,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
case Regs::TextureConfig::MirroredRepeat:
{
- int coord = (int)((unsigned)val % (2 * size));
+ unsigned int coord = ((unsigned)val % (2 * size));
if (coord >= size)
coord = 2 * size - 1 - coord;
- return coord;
+ return (int)coord;
}
default:
@@ -361,7 +368,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
- u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
+ u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
@@ -376,7 +383,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
// with some basic arithmetic. Alpha combiners can be configured separately but work
// analogously.
Math::Vec4<u8> combiner_output;
- for (const auto& tev_stage : tev_stages) {
+ Math::Vec4<u8> combiner_buffer = {
+ registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
+ registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
+ };
+
+ for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
+ const auto& tev_stage = tev_stages[tev_stage_index];
using Source = Regs::TevStageConfig::Source;
using ColorModifier = Regs::TevStageConfig::ColorModifier;
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
@@ -398,6 +411,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
case Source::Texture2:
return texture_color[2];
+ case Source::PreviousBuffer:
+ return combiner_buffer;
+
case Source::Constant:
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
@@ -407,7 +423,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
UNIMPLEMENTED();
- return {};
+ return {0, 0, 0, 0};
}
};
@@ -490,6 +506,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
return result.Cast<u8>();
}
+ case Operation::AddSigned:
+ {
+ // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
+ auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
+ result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
+ result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
+ result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
+ return result.Cast<u8>();
+ }
+
case Operation::Lerp:
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
@@ -524,7 +550,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
UNIMPLEMENTED();
- return {};
+ return {0, 0, 0};
}
};
@@ -578,7 +604,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
};
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
- combiner_output = Math::MakeVec(color_output, alpha_output);
+ combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
+ combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
+ combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
+ combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
+
+ if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
+ combiner_buffer.r() = combiner_output.r();
+ combiner_buffer.g() = combiner_output.g();
+ combiner_buffer.b() = combiner_output.b();
+ }
+
+ if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
+ combiner_buffer.a() = combiner_output.a();
+ }
}
if (registers.output_merger.alpha_test.enable) {
@@ -624,9 +663,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (registers.output_merger.depth_test_enable) {
- u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
- v1.screenpos[2].ToFloat32() * w1 +
- v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+ unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
+ u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
+ v1.screenpos[2].ToFloat32() * w1 +
+ v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
u32 ref_z = GetDepth(x >> 4, y >> 4);
bool pass = false;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index b77f29c11..b62409538 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -4,7 +4,7 @@
#pragma once
-#include "common/common.h"
+#include "common/common_types.h"
class RendererBase : NonCopyable {
public:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 4273a177f..71ceb021b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,9 +5,11 @@
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
-#include "core/mem_map.h"
+#include "core/memory.h"
+#include "core/settings.h"
#include "common/emu_window.h"
+#include "common/logging/log.h"
#include "common/profiler_reporting.h"
#include "video_core/video_core.h"
@@ -117,15 +119,15 @@ void RendererOpenGL::SwapBuffers() {
void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
const TextureInfo& texture) {
- const VAddr framebuffer_vaddr = Memory::PhysicalToVirtualAddress(
- framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2);
+ const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
+ framebuffer.address_left1 : framebuffer.address_left2;
LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x",
framebuffer.stride * framebuffer.height,
- framebuffer_vaddr, (int)framebuffer.width,
+ framebuffer_addr, (int)framebuffer.width,
(int)framebuffer.height, (int)framebuffer.format);
- const u8* framebuffer_data = Memory::GetPointer(framebuffer_vaddr);
+ const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
size_t pixel_stride = framebuffer.stride / bpp;
@@ -172,7 +174,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
* Initializes the OpenGL state and creates persistent objects.
*/
void RendererOpenGL::InitOpenGLObjects() {
- glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
glDisable(GL_DEPTH_TEST);
// Link shaders and get variable locations
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index bda793fa5..ffb3e73a3 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -13,10 +13,10 @@ namespace VideoCore {
/// Structure for the TGA texture format (for dumping)
struct TGAHeader {
char idlength;
- char colourmaptype;
+ char colormaptype;
char datatypecode;
- short int colourmaporigin;
- short int colourmaplength;
+ short int colormaporigin;
+ short int colormaplength;
short int x_origin;
short int y_origin;
short width;
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index e8d865172..981d1a356 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -8,10 +8,9 @@
#include <common/file_util.h>
-#include <core/mem_map.h>
-
#include <nihstro/shader_bytecode.h>
+#include "common/profiler.h"
#include "pica.h"
#include "vertex_shader.h"
@@ -35,6 +34,8 @@ static struct {
std::array<Math::Vec4<u8>,4> i;
} shader_uniforms;
+static Math::Vec4<float24> vs_default_attributes[16];
+
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
// For now, we just keep these local arrays around.
static std::array<u32, 1024> shader_memory;
@@ -60,6 +61,10 @@ Math::Vec4<u8>& GetIntUniform(u32 index) {
return shader_uniforms.i[index];
}
+Math::Vec4<float24>& GetDefaultAttribute(u32 index) {
+ return vs_default_attributes[index];
+}
+
const std::array<u32, 1024>& GetShaderBinary() {
return shader_memory;
}
@@ -229,6 +234,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
+ case OpCode::Id::FLR:
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
+ }
+ break;
+
case OpCode::Id::MAX:
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -360,12 +374,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
case OpCode::Type::MultiplyAdd:
{
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) {
+ if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
+ (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
- const float24* src1_ = LookupSourceRegister(instr.mad.src1);
- const float24* src2_ = LookupSourceRegister(instr.mad.src2);
- const float24* src3_ = LookupSourceRegister(instr.mad.src3);
+ bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
+
+ const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
+ const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
+ const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
@@ -556,7 +573,11 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
}
+static Common::Profiling::TimingCategory shader_category("Vertex Shader");
+
OutputVertex RunShader(const InputVertex& input, int num_attributes) {
+ Common::Profiling::ScopeTimer timer(shader_category);
+
VertexShaderState state;
const u32* main = &shader_memory[registers.vs_main_offset];
@@ -568,22 +589,23 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
const auto& attribute_register_map = registers.vs_input_register_map;
float24 dummy_register;
boost::fill(state.input_register_table, &dummy_register);
- if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
- if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
- if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
- if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
- if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
- if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
- if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
- if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
- if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
- if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
- if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
- if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
- if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
- if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
- if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
- if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
+
+ if (num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
+ if (num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
+ if (num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
+ if (num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
+ if (num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
+ if (num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
+ if (num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
+ if (num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
+ if (num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
+ if (num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
+ if (num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
+ if (num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
+ if (num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
+ if (num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
+ if (num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
+ if (num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
state.conditional_code[0] = false;
state.conditional_code[1] = false;
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index 3a68a3409..c26709bbc 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -74,6 +74,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes);
Math::Vec4<float24>& GetFloatUniform(u32 index);
bool& GetBoolUniform(u32 index);
Math::Vec4<u8>& GetIntUniform(u32 index);
+Math::Vec4<float24>& GetDefaultAttribute(u32 index);
const std::array<u32, 1024>& GetShaderBinary();
const std::array<u32, 1024>& GetSwizzlePatterns();
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index b9d4ede3a..42e3bdd5b 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -2,7 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/common.h"
+#include "common/logging/log.h"
#include "common/emu_window.h"
#include "core/core.h"
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 1b51d39bf..f885bec21 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -4,7 +4,6 @@
#pragma once
-#include "common/common.h"
#include "common/emu_window.h"
#include "renderer_base.h"