aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/color.h132
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp84
-rw-r--r--src/video_core/pica.h18
-rw-r--r--src/video_core/rasterizer.cpp84
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp60
-rw-r--r--src/video_core/utils.h50
-rw-r--r--src/video_core/vertex_shader.cpp80
-rw-r--r--src/video_core/video_core.cpp3
-rw-r--r--src/video_core/video_core.h1
11 files changed, 338 insertions, 181 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 13c3f7b22..4c1e6449a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -19,6 +19,7 @@ set(HEADERS
renderer_opengl/gl_shaders.h
renderer_opengl/renderer_opengl.h
clipper.h
+ color.h
command_processor.h
gpu_debugger.h
math.h
diff --git a/src/video_core/color.h b/src/video_core/color.h
index e86ac1265..35da901f2 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -5,28 +5,152 @@
#pragma once
#include "common/common_types.h"
+#include "video_core/math.h"
namespace Color {
/// Convert a 1-bit color component to 8 bit
-static inline u8 Convert1To8(u8 value) {
+inline u8 Convert1To8(u8 value) {
return value * 255;
}
/// Convert a 4-bit color component to 8 bit
-static inline u8 Convert4To8(u8 value) {
+inline u8 Convert4To8(u8 value) {
return (value << 4) | value;
}
/// Convert a 5-bit color component to 8 bit
-static inline u8 Convert5To8(u8 value) {
+inline u8 Convert5To8(u8 value) {
return (value << 3) | (value >> 2);
}
/// Convert a 6-bit color component to 8 bit
-static inline u8 Convert6To8(u8 value) {
+inline u8 Convert6To8(u8 value) {
return (value << 2) | (value >> 4);
}
+/// Convert a 8-bit color component to 1 bit
+inline u8 Convert8To1(u8 value) {
+ return value >> 7;
+}
+
+/// Convert a 8-bit color component to 4 bit
+inline u8 Convert8To4(u8 value) {
+ return value >> 4;
+}
+
+/// Convert a 8-bit color component to 5 bit
+inline u8 Convert8To5(u8 value) {
+ return value >> 3;
+}
+
+/// Convert a 8-bit color component to 6 bit
+inline u8 Convert8To6(u8 value) {
+ return value >> 2;
+}
+
+/**
+ * Decode a color stored in RGBA8 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Math::Vec4<u8>
+ */
+inline const Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+ return { bytes[3], bytes[2], bytes[1], bytes[0] };
+}
+
+/**
+ * Decode a color stored in RGB8 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Math::Vec4<u8>
+ */
+inline const Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+ return { bytes[2], bytes[1], bytes[0], 255 };
+}
+
+/**
+ * Decode a color stored in RGB565 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Math::Vec4<u8>
+ */
+inline const Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+ const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+ return { Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
+ Convert5To8(pixel & 0x1F), 255 };
+}
+
+/**
+ * Decode a color stored in RGB5A1 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Math::Vec4<u8>
+ */
+inline const Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+ const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+ return { Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
+ Convert5To8((pixel >> 1) & 0x1F), Convert1To8(pixel & 0x1) };
+}
+
+/**
+ * Decode a color stored in RGBA4 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Math::Vec4<u8>
+ */
+inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+ const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+ return { Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
+ Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) };
+}
+
+/**
+ * Encode a color as RGBA8 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
+ bytes[3] = color.r();
+ bytes[2] = color.g();
+ bytes[1] = color.b();
+ bytes[0] = color.a();
+}
+
+/**
+ * Encode a color as RGB8 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
+ bytes[2] = color.r();
+ bytes[1] = color.g();
+ bytes[0] = color.b();
+}
+
+/**
+ * Encode a color as RGB565 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
+ *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) |
+ (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
+}
+
+/**
+ * Encode a color as RGB5A1 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
+ *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) |
+ (Convert8To5(color.g()) << 6) | (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
+}
+
+/**
+ * Encode a color as RGBA4 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
+ *reinterpret_cast<u16_le*>(bytes) = (Convert8To4(color.r()) << 12) |
+ (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
+}
} // namespace
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 586ad62b6..e031871e8 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -4,6 +4,8 @@
#include <boost/range/algorithm/fill.hpp>
+#include "common/profiler.h"
+
#include "clipper.h"
#include "command_processor.h"
#include "math.h"
@@ -25,6 +27,8 @@ static int float_regs_counter = 0;
static u32 uniform_write_buffer[4];
+Common::Profiling::TimingCategory category_drawing("Drawing");
+
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
if (id >= registers.NumIds())
@@ -53,6 +57,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw):
case PICA_REG_INDEX(trigger_draw_indexed):
{
+ Common::Profiling::ScopeTimer scope_timer(category_drawing);
+
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
if (g_debug_context)
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 0beb72e6b..745c4f4ed 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -23,6 +23,7 @@
#include "video_core/color.h"
#include "video_core/math.h"
#include "video_core/pica.h"
+#include "video_core/utils.h"
#include "debug_utils.h"
@@ -306,111 +307,69 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
}
const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
- // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
- // of which is composed of four 2x2 subtiles each of which is composed of four texels.
- // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
- // texels are laid out in a 2x2 subtile like this:
- // 2 3
- // 0 1
- //
- // The full 8x8 tile has the texels arranged like this:
- //
- // 42 43 46 47 58 59 62 63
- // 40 41 44 45 56 57 60 61
- // 34 35 38 39 50 51 54 55
- // 32 33 36 37 48 49 52 53
- // 10 11 14 15 26 27 30 31
- // 08 09 12 13 24 25 28 29
- // 02 03 06 07 18 19 22 23
- // 00 01 04 05 16 17 20 21
-
- const unsigned int block_width = 8;
- const unsigned int block_height = 8;
-
const unsigned int coarse_x = x & ~7;
const unsigned int coarse_y = y & ~7;
- // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
- // arranged in a Z-order curve. More details on the bit manipulation at:
- // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
- unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210
- i = (i ^ (i << 2)) & 0x1313; // ---2 --10
- i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
- i = (i | (i >> 7)) & 0x3F;
-
if (info.format != Regs::TextureFormat::ETC1 &&
info.format != Regs::TextureFormat::ETC1A4) {
// TODO(neobrain): Fix code design to unify vertical block offsets!
source += coarse_y * info.stride;
}
- const unsigned int offset = coarse_x * block_height;
-
+
// TODO: Assert that width/height are multiples of block dimensions
switch (info.format) {
case Regs::TextureFormat::RGBA8:
{
- const u8* source_ptr = source + offset * 4 + i * 4;
- return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
+ auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4));
+ return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() };
}
case Regs::TextureFormat::RGB8:
{
- const u8* source_ptr = source + offset * 3 + i * 3;
- return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
+ auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3));
+ return { res.r(), res.g(), res.b(), 255 };
}
- case Regs::TextureFormat::RGBA5551:
+ case Regs::TextureFormat::RGB5A1:
{
- const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2);
- u8 r = (source_ptr >> 11) & 0x1F;
- u8 g = ((source_ptr) >> 6) & 0x1F;
- u8 b = (source_ptr >> 1) & 0x1F;
- u8 a = source_ptr & 1;
- return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g),
- Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a));
+ auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2));
+ return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() };
}
case Regs::TextureFormat::RGB565:
{
- const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2);
- u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
- u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
- u8 b = Color::Convert5To8((source_ptr) & 0x1F);
- return Math::MakeVec<u8>(r, g, b, 255);
+ auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2));
+ return { res.r(), res.g(), res.b(), 255 };
}
case Regs::TextureFormat::RGBA4:
{
- const u8* source_ptr = source + offset * 2 + i * 2;
- u8 r = Color::Convert4To8(source_ptr[1] >> 4);
- u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
- u8 b = Color::Convert4To8(source_ptr[0] >> 4);
- u8 a = Color::Convert4To8(source_ptr[0] & 0xF);
- return { r, g, b, disable_alpha ? (u8)255 : a };
+ auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2));
+ return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() };
}
case Regs::TextureFormat::IA8:
{
- const u8* source_ptr = source + offset * 2 + i * 2;
+ const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
if (disable_alpha) {
// Show intensity as red, alpha as green
return { source_ptr[1], source_ptr[0], 0, 255 };
} else {
- return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
+ return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] };
}
}
case Regs::TextureFormat::I8:
{
- const u8* source_ptr = source + offset + i;
+ const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
return { *source_ptr, *source_ptr, *source_ptr, 255 };
}
case Regs::TextureFormat::A8:
{
- const u8* source_ptr = source + offset + i;
+ const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
if (disable_alpha) {
return { *source_ptr, *source_ptr, *source_ptr, 255 };
@@ -421,7 +380,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
case Regs::TextureFormat::IA4:
{
- const u8* source_ptr = source + offset + i;
+ const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
u8 a = Color::Convert4To8((*source_ptr) & 0xF);
@@ -436,9 +395,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
case Regs::TextureFormat::A4:
{
- const u8* source_ptr = source + offset / 2 + i / 2;
+ u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
+ const u8* source_ptr = source + morton_offset / 2;
- u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
+ u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
a = Color::Convert4To8(a);
if (disable_alpha) {
@@ -545,7 +505,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
}
// Add modifier
- unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value();
+ unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value();
static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{
{ 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 },
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index e4a5ef78e..b14de9278 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -39,13 +39,6 @@ namespace Pica {
struct Regs {
-// helper macro to properly align structure members.
-// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
-// depending on the current source line to make sure variable names are unique.
-#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
-#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
-#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
-
INSERT_PADDING_WORDS(0x10);
u32 trigger_irq;
@@ -152,7 +145,7 @@ struct Regs {
enum class TextureFormat : u32 {
RGBA8 = 0,
RGB8 = 1,
- RGBA5551 = 2,
+ RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
IA8 = 5,
@@ -174,7 +167,7 @@ struct Regs {
case TextureFormat::RGB8:
return 6;
- case TextureFormat::RGBA5551:
+ case TextureFormat::RGB5A1:
case TextureFormat::RGB565:
case TextureFormat::RGBA4:
case TextureFormat::IA8:
@@ -416,10 +409,11 @@ struct Regs {
} output_merger;
struct {
+ // Components are laid out in reverse byte order, most significant bits first.
enum ColorFormat : u32 {
RGBA8 = 0,
RGB8 = 1,
- RGBA5551 = 2,
+ RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
};
@@ -709,10 +703,6 @@ struct Regs {
INSERT_PADDING_WORDS(0x22);
-#undef INSERT_PADDING_WORDS_HELPER1
-#undef INSERT_PADDING_WORDS_HELPER2
-#undef INSERT_PADDING_WORDS
-
// Map register indices to names readable by humans
// Used for debugging purposes, so performance is not an issue here
static std::string GetCommandName(int index) {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 81df09baf..5861c1926 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,12 +7,14 @@
#include "common/common_types.h"
#include "common/math_util.h"
+#include "core/hw/gpu.h"
+#include "debug_utils/debug_utils.h"
#include "math.h"
+#include "color.h"
#include "pica.h"
#include "rasterizer.h"
#include "vertex_shader.h"
-
-#include "debug_utils/debug_utils.h"
+#include "video_core/utils.h"
namespace Pica {
@@ -20,59 +22,101 @@ namespace Rasterizer {
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
- u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
// NOTE: The framebuffer height register contains the actual FB height minus one.
y = (registers.framebuffer.height - y);
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
+ u8* dst_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + dst_offset;
+
switch (registers.framebuffer.color_format) {
case registers.framebuffer.RGBA8:
- {
- u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
- *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
+ Color::EncodeRGBA8(color, dst_pixel);
+ break;
+
+ case registers.framebuffer.RGB8:
+ Color::EncodeRGB8(color, dst_pixel);
+ break;
+
+ case registers.framebuffer.RGB5A1:
+ Color::EncodeRGB5A1(color, dst_pixel);
+ break;
+
+ case registers.framebuffer.RGB565:
+ Color::EncodeRGB565(color, dst_pixel);
+ break;
+
+ case registers.framebuffer.RGBA4:
+ Color::EncodeRGBA4(color, dst_pixel);
break;
- }
default:
- LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
+ LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
UNIMPLEMENTED();
}
}
static const Math::Vec4<u8> GetPixel(int x, int y) {
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
- u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
y = (registers.framebuffer.height - y);
- u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
- Math::Vec4<u8> ret;
- ret.a() = value >> 24;
- ret.r() = (value >> 16) & 0xFF;
- ret.g() = (value >> 8) & 0xFF;
- ret.b() = value & 0xFF;
- return ret;
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
+ u8* src_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + src_offset;
+
+ switch (registers.framebuffer.color_format) {
+ case registers.framebuffer.RGBA8:
+ return Color::DecodeRGBA8(src_pixel);
+
+ case registers.framebuffer.RGB8:
+ return Color::DecodeRGB8(src_pixel);
+
+ case registers.framebuffer.RGB5A1:
+ return Color::DecodeRGB5A1(src_pixel);
+
+ case registers.framebuffer.RGB565:
+ return Color::DecodeRGB565(src_pixel);
+
+ case registers.framebuffer.RGBA4:
+ return Color::DecodeRGBA4(src_pixel);
+
+ default:
+ LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
+ UNIMPLEMENTED();
+ }
+
+ return {};
}
static u32 GetDepth(int x, int y) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
- u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
+ u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
y = (registers.framebuffer.height - y);
+
+ const u32 coarse_y = y & ~7;
+ u32 stride = registers.framebuffer.width * 2;
// Assuming 16-bit depth buffer format until actual format handling is implemented
- return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
+ return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
}
static void SetDepth(int x, int y, u16 value) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
- u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
+ u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
y = (registers.framebuffer.height - y);
+ const u32 coarse_y = y & ~7;
+ u32 stride = registers.framebuffer.width * 2;
+
// Assuming 16-bit depth buffer format until actual format handling is implemented
- *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
+ *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
}
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 272695174..95ab96340 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -4,7 +4,10 @@
#include "core/hw/gpu.h"
#include "core/mem_map.h"
+
#include "common/emu_window.h"
+#include "common/profiler_reporting.h"
+
#include "video_core/video_core.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -75,9 +78,18 @@ void RendererOpenGL::SwapBuffers() {
DrawScreens();
+ auto& profiler = Common::Profiling::GetProfilingManager();
+ profiler.FinishFrame();
+ {
+ auto aggregator = Common::Profiling::GetTimingResultsAggregator();
+ aggregator->AddFrame(profiler.GetPreviousFrameResults());
+ }
+
// Swap buffers
render_window->PollEvents();
render_window->SwapBuffers();
+
+ profiler.BeginFrame();
}
/**
@@ -242,28 +254,26 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreens() {
- auto viewport_extent = GetViewportExtent();
- glViewport(viewport_extent.left, viewport_extent.top, viewport_extent.GetWidth(), viewport_extent.GetHeight()); // TODO: Or bottom?
+ auto layout = render_window->GetFramebufferLayout();
+
+ glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT);
glUseProgram(program_id);
// Set projection matrix
- std::array<GLfloat, 3*2> ortho_matrix = MakeOrthographicMatrix((float)resolution_width, (float)resolution_height);
+ std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
+ (float)layout.height);
glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
// Bind texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glUniform1i(uniform_color_texture, 0);
- const float max_width = std::max((float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenBottomWidth);
- const float top_x = 0.5f * (max_width - VideoCore::kScreenTopWidth);
- const float bottom_x = 0.5f * (max_width - VideoCore::kScreenBottomWidth);
-
- DrawSingleScreenRotated(textures[0], top_x, 0,
- (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
- DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
- (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);
+ DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
+ (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
+ DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
+ (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
m_current_frame++;
}
@@ -280,34 +290,6 @@ void RendererOpenGL::SetWindow(EmuWindow* window) {
render_window = window;
}
-MathUtil::Rectangle<unsigned> RendererOpenGL::GetViewportExtent() {
- unsigned framebuffer_width;
- unsigned framebuffer_height;
- std::tie(framebuffer_width, framebuffer_height) = render_window->GetFramebufferSize();
-
- float window_aspect_ratio = static_cast<float>(framebuffer_height) / framebuffer_width;
- float emulation_aspect_ratio = static_cast<float>(resolution_height) / resolution_width;
-
- MathUtil::Rectangle<unsigned> viewport_extent;
- if (window_aspect_ratio > emulation_aspect_ratio) {
- // Window is narrower than the emulation content => apply borders to the top and bottom
- unsigned viewport_height = static_cast<unsigned>(std::round(emulation_aspect_ratio * framebuffer_width));
- viewport_extent.left = 0;
- viewport_extent.top = (framebuffer_height - viewport_height) / 2;
- viewport_extent.right = viewport_extent.left + framebuffer_width;
- viewport_extent.bottom = viewport_extent.top + viewport_height;
- } else {
- // Otherwise, apply borders to the left and right sides of the window.
- unsigned viewport_width = static_cast<unsigned>(std::round(framebuffer_height / emulation_aspect_ratio));
- viewport_extent.left = (framebuffer_width - viewport_width) / 2;
- viewport_extent.top = 0;
- viewport_extent.right = viewport_extent.left + viewport_width;
- viewport_extent.bottom = viewport_extent.top + framebuffer_height;
- }
-
- return viewport_extent;
-}
-
/// Initialize the renderer
void RendererOpenGL::Init() {
render_window->MakeCurrent();
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 6fd640425..bda793fa5 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -35,4 +35,54 @@ struct TGAHeader {
*/
void DumpTGA(std::string filename, short width, short height, u8* raw_data);
+/**
+ * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
+ * arranged in a Z-order curve. More details on the bit manipulation at:
+ * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
+ */
+static inline u32 MortonInterleave(u32 x, u32 y) {
+ u32 i = (x & 7) | ((y & 7) << 8); // ---- -210
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
+ i = (i | (i >> 7)) & 0x3F;
+ return i;
+}
+
+/**
+ * Calculates the offset of the position of the pixel in Morton order
+ */
+static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
+ // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
+ // of which is composed of four 2x2 subtiles each of which is composed of four texels.
+ // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
+ // texels are laid out in a 2x2 subtile like this:
+ // 2 3
+ // 0 1
+ //
+ // The full 8x8 tile has the texels arranged like this:
+ //
+ // 42 43 46 47 58 59 62 63
+ // 40 41 44 45 56 57 60 61
+ // 34 35 38 39 50 51 54 55
+ // 32 33 36 37 48 49 52 53
+ // 10 11 14 15 26 27 30 31
+ // 08 09 12 13 24 25 28 29
+ // 02 03 06 07 18 19 22 23
+ // 00 01 04 05 16 17 20 21
+ //
+ // This pattern is what's called Z-order curve, or Morton order.
+
+ const unsigned int block_width = 8;
+ const unsigned int block_height = 8;
+
+ const unsigned int coarse_x = x & ~7;
+ const unsigned int coarse_y = y & ~7;
+
+ u32 i = VideoCore::MortonInterleave(x, y);
+
+ const unsigned int offset = coarse_x * block_height;
+
+ return (i + offset) * bytes_per_pixel;
+}
+
} // namespace
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index def868ac7..4eb3e743e 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -17,6 +17,7 @@
#include "vertex_shader.h"
#include "debug_utils/debug_utils.h"
+using nihstro::OpCode;
using nihstro::Instruction;
using nihstro::RegisterType;
using nihstro::SourceRegister;
@@ -90,6 +91,7 @@ struct VertexShaderState {
u8 repeat_counter; // How often to repeat until this call stack element is removed
u8 loop_increment; // Which value to add to the loop counter after an iteration
// TODO: Should this be a signed value? Does it even matter?
+ u32 loop_address; // The address where we'll return to after each loop iteration
};
// TODO: Is there a maximal size for this?
@@ -115,6 +117,8 @@ static void ProcessShaderCode(VertexShaderState& state) {
if (top.repeat_counter-- == 0) {
state.program_counter = &shader_memory[top.return_address];
state.call_stack.pop();
+ } else {
+ state.program_counter = &shader_memory[top.loop_address];
}
// TODO: Is "trying again" accurate to hardware?
@@ -129,7 +133,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions,
u32 return_offset, u8 repeat_count, u8 loop_increment) {
state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
- state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment });
+ state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
};
u32 binary_offset = state.program_counter - shader_memory.data();
@@ -151,10 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
};
- switch (instr.opcode.GetInfo().type) {
- case Instruction::OpCodeType::Arithmetic:
+ switch (instr.opcode.Value().GetInfo().type) {
+ case OpCode::Type::Arithmetic:
{
- bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
+ bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed);
// TODO: We don't really support this properly: For instance, the address register
// offset needs to be applied to SRC2 instead, etc.
// For now, we just abort in this situation.
@@ -194,15 +198,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
src2[3] = src2[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
- : (instr.common.dest < 0x10) ? dummy_vec4_float24
- : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
+ float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]
+ : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
+ : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
- switch (instr.opcode.EffectiveOpCode()) {
- case Instruction::OpCode::ADD:
+ switch (instr.opcode.Value().EffectiveOpCode()) {
+ case OpCode::Id::ADD:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -214,7 +218,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MUL:
+ case OpCode::Id::MUL:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -226,7 +230,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MAX:
+ case OpCode::Id::MAX:
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -235,11 +239,11 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::DP3:
- case Instruction::OpCode::DP4:
+ case OpCode::Id::DP3:
+ case OpCode::Id::DP4:
{
float24 dot = float24::FromFloat32(0.f);
- int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
+ int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i)
dot = dot + src1[i] * src2[i];
@@ -253,7 +257,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
// Reciprocal
- case Instruction::OpCode::RCP:
+ case OpCode::Id::RCP:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -268,7 +272,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
// Reciprocal Square Root
- case Instruction::OpCode::RSQ:
+ case OpCode::Id::RSQ:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -282,7 +286,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MOVA:
+ case OpCode::Id::MOVA:
{
for (int i = 0; i < 2; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -295,7 +299,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MOV:
+ case OpCode::Id::MOV:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -306,7 +310,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::CMP:
+ case OpCode::Id::CMP:
for (int i = 0; i < 2; ++i) {
// TODO: Can you restrict to one compare via dest masking?
@@ -347,7 +351,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
DEBUG_ASSERT(false);
break;
}
@@ -355,9 +359,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCodeType::MultiplyAdd:
+ case OpCode::Type::MultiplyAdd:
{
- if (instr.opcode.EffectiveOpCode() == Instruction::OpCode::MAD) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) {
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
const float24* src1_ = LookupSourceRegister(instr.mad.src1);
@@ -405,9 +409,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
src3[3] = src3[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.mad.dest < 0x08) ? state.output_register_table[4*instr.mad.dest.GetIndex()]
- : (instr.mad.dest < 0x10) ? dummy_vec4_float24
- : (instr.mad.dest < 0x20) ? &state.temporary_registers[instr.mad.dest.GetIndex()][0]
+ float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]
+ : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
+ : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
for (int i = 0; i < 4; ++i) {
@@ -418,7 +422,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
} else {
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
}
break;
}
@@ -445,31 +449,31 @@ static void ProcessShaderCode(VertexShaderState& state) {
};
// Handle each instruction on its own
- switch (instr.opcode) {
- case Instruction::OpCode::END:
+ switch (instr.opcode.Value()) {
+ case OpCode::Id::END:
exit_loop = true;
break;
- case Instruction::OpCode::JMPC:
+ case OpCode::Id::JMPC:
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
}
break;
- case Instruction::OpCode::JMPU:
+ case OpCode::Id::JMPU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
}
break;
- case Instruction::OpCode::CALL:
+ case OpCode::Id::CALL:
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
binary_offset + 1, 0, 0);
break;
- case Instruction::OpCode::CALLU:
+ case OpCode::Id::CALLU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
instr.flow_control.dest_offset,
@@ -478,7 +482,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::CALLC:
+ case OpCode::Id::CALLC:
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
call(state,
instr.flow_control.dest_offset,
@@ -487,10 +491,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::NOP:
+ case OpCode::Id::NOP:
break;
- case Instruction::OpCode::IFU:
+ case OpCode::Id::IFU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
binary_offset + 1,
@@ -505,7 +509,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
- case Instruction::OpCode::IFC:
+ case OpCode::Id::IFC:
{
// TODO: Do we need to consider swizzlers here?
@@ -524,7 +528,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::LOOP:
+ case OpCode::Id::LOOP:
{
state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y;
@@ -539,7 +543,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
break;
}
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 0a236595c..b9d4ede3a 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -18,7 +18,6 @@ namespace VideoCore {
EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window
RendererBase* g_renderer = nullptr; ///< Renderer plugin
-int g_current_frame = 0;
/// Initialize the video core
void Init(EmuWindow* emu_window) {
@@ -27,8 +26,6 @@ void Init(EmuWindow* emu_window) {
g_renderer->SetWindow(g_emu_window);
g_renderer->Init();
- g_current_frame = 0;
-
LOG_DEBUG(Render, "initialized OK");
}
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index b782f17bd..1b51d39bf 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -30,7 +30,6 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height
// ---------------------
extern RendererBase* g_renderer; ///< Renderer plugin
-extern int g_current_frame; ///< Current frame
extern EmuWindow* g_emu_window; ///< Emu window
/// Start the video core