diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/color.h | 132 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/pica.h | 18 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 60 | ||||
| -rw-r--r-- | src/video_core/utils.h | 50 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/video_core.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/video_core.h | 1 |
11 files changed, 338 insertions, 181 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 13c3f7b22..4c1e6449a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -19,6 +19,7 @@ set(HEADERS renderer_opengl/gl_shaders.h renderer_opengl/renderer_opengl.h clipper.h + color.h command_processor.h gpu_debugger.h math.h diff --git a/src/video_core/color.h b/src/video_core/color.h index e86ac1265..35da901f2 100644 --- a/src/video_core/color.h +++ b/src/video_core/color.h @@ -5,28 +5,152 @@ #pragma once #include "common/common_types.h" +#include "video_core/math.h" namespace Color { /// Convert a 1-bit color component to 8 bit -static inline u8 Convert1To8(u8 value) { +inline u8 Convert1To8(u8 value) { return value * 255; } /// Convert a 4-bit color component to 8 bit -static inline u8 Convert4To8(u8 value) { +inline u8 Convert4To8(u8 value) { return (value << 4) | value; } /// Convert a 5-bit color component to 8 bit -static inline u8 Convert5To8(u8 value) { +inline u8 Convert5To8(u8 value) { return (value << 3) | (value >> 2); } /// Convert a 6-bit color component to 8 bit -static inline u8 Convert6To8(u8 value) { +inline u8 Convert6To8(u8 value) { return (value << 2) | (value >> 4); } +/// Convert a 8-bit color component to 1 bit +inline u8 Convert8To1(u8 value) { + return value >> 7; +} + +/// Convert a 8-bit color component to 4 bit +inline u8 Convert8To4(u8 value) { + return value >> 4; +} + +/// Convert a 8-bit color component to 5 bit +inline u8 Convert8To5(u8 value) { + return value >> 3; +} + +/// Convert a 8-bit color component to 6 bit +inline u8 Convert8To6(u8 value) { + return value >> 2; +} + +/** + * Decode a color stored in RGBA8 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Math::Vec4<u8> + */ +inline const Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { + return { bytes[3], bytes[2], bytes[1], bytes[0] }; +} + +/** + * Decode a color stored in RGB8 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Math::Vec4<u8> + */ +inline const Math::Vec4<u8> DecodeRGB8(const u8* bytes) { + return { bytes[2], bytes[1], bytes[0], 255 }; +} + +/** + * Decode a color stored in RGB565 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Math::Vec4<u8> + */ +inline const Math::Vec4<u8> DecodeRGB565(const u8* bytes) { + const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); + return { Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), + Convert5To8(pixel & 0x1F), 255 }; +} + +/** + * Decode a color stored in RGB5A1 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Math::Vec4<u8> + */ +inline const Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { + const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); + return { Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), + Convert5To8((pixel >> 1) & 0x1F), Convert1To8(pixel & 0x1) }; +} + +/** + * Decode a color stored in RGBA4 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Math::Vec4<u8> + */ +inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { + const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); + return { Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), + Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) }; +} + +/** + * Encode a color as RGBA8 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { + bytes[3] = color.r(); + bytes[2] = color.g(); + bytes[1] = color.b(); + bytes[0] = color.a(); +} + +/** + * Encode a color as RGB8 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { + bytes[2] = color.r(); + bytes[1] = color.g(); + bytes[0] = color.b(); +} + +/** + * Encode a color as RGB565 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { + *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) | + (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); +} + +/** + * Encode a color as RGB5A1 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { + *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) | + (Convert8To5(color.g()) << 6) | (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); +} + +/** + * Encode a color as RGBA4 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { + *reinterpret_cast<u16_le*>(bytes) = (Convert8To4(color.r()) << 12) | + (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); +} } // namespace diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 586ad62b6..e031871e8 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -4,6 +4,8 @@ #include <boost/range/algorithm/fill.hpp> +#include "common/profiler.h" + #include "clipper.h" #include "command_processor.h" #include "math.h" @@ -25,6 +27,8 @@ static int float_regs_counter = 0; static u32 uniform_write_buffer[4]; +Common::Profiling::TimingCategory category_drawing("Drawing"); + static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (id >= registers.NumIds()) @@ -53,6 +57,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw): case PICA_REG_INDEX(trigger_draw_indexed): { + Common::Profiling::ScopeTimer scope_timer(category_drawing); + DebugUtils::DumpTevStageConfig(registers.GetTevStages()); if (g_debug_context) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 0beb72e6b..745c4f4ed 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -23,6 +23,7 @@ #include "video_core/color.h" #include "video_core/math.h" #include "video_core/pica.h" +#include "video_core/utils.h" #include "debug_utils.h" @@ -306,111 +307,69 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() } const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { - // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each - // of which is composed of four 2x2 subtiles each of which is composed of four texels. - // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. - // texels are laid out in a 2x2 subtile like this: - // 2 3 - // 0 1 - // - // The full 8x8 tile has the texels arranged like this: - // - // 42 43 46 47 58 59 62 63 - // 40 41 44 45 56 57 60 61 - // 34 35 38 39 50 51 54 55 - // 32 33 36 37 48 49 52 53 - // 10 11 14 15 26 27 30 31 - // 08 09 12 13 24 25 28 29 - // 02 03 06 07 18 19 22 23 - // 00 01 04 05 16 17 20 21 - - const unsigned int block_width = 8; - const unsigned int block_height = 8; - const unsigned int coarse_x = x & ~7; const unsigned int coarse_y = y & ~7; - // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are - // arranged in a Z-order curve. More details on the bit manipulation at: - // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ - unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 - i = (i ^ (i << 2)) & 0x1313; // ---2 --10 - i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 - i = (i | (i >> 7)) & 0x3F; - if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) { // TODO(neobrain): Fix code design to unify vertical block offsets! source += coarse_y * info.stride; } - const unsigned int offset = coarse_x * block_height; - + // TODO: Assert that width/height are multiples of block dimensions switch (info.format) { case Regs::TextureFormat::RGBA8: { - const u8* source_ptr = source + offset * 4 + i * 4; - return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; + auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); + return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() }; } case Regs::TextureFormat::RGB8: { - const u8* source_ptr = source + offset * 3 + i * 3; - return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; + auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); + return { res.r(), res.g(), res.b(), 255 }; } - case Regs::TextureFormat::RGBA5551: + case Regs::TextureFormat::RGB5A1: { - const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); - u8 r = (source_ptr >> 11) & 0x1F; - u8 g = ((source_ptr) >> 6) & 0x1F; - u8 b = (source_ptr >> 1) & 0x1F; - u8 a = source_ptr & 1; - return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g), - Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a)); + auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); + return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() }; } case Regs::TextureFormat::RGB565: { - const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); - u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); - u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); - u8 b = Color::Convert5To8((source_ptr) & 0x1F); - return Math::MakeVec<u8>(r, g, b, 255); + auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); + return { res.r(), res.g(), res.b(), 255 }; } case Regs::TextureFormat::RGBA4: { - const u8* source_ptr = source + offset * 2 + i * 2; - u8 r = Color::Convert4To8(source_ptr[1] >> 4); - u8 g = Color::Convert4To8(source_ptr[1] & 0xF); - u8 b = Color::Convert4To8(source_ptr[0] >> 4); - u8 a = Color::Convert4To8(source_ptr[0] & 0xF); - return { r, g, b, disable_alpha ? (u8)255 : a }; + auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); + return { res.r(), res.g(), res.b(), disable_alpha ? 255 : res.a() }; } case Regs::TextureFormat::IA8: { - const u8* source_ptr = source + offset * 2 + i * 2; + const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); if (disable_alpha) { // Show intensity as red, alpha as green return { source_ptr[1], source_ptr[0], 0, 255 }; } else { - return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; + return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] }; } } case Regs::TextureFormat::I8: { - const u8* source_ptr = source + offset + i; + const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); return { *source_ptr, *source_ptr, *source_ptr, 255 }; } case Regs::TextureFormat::A8: { - const u8* source_ptr = source + offset + i; + const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); if (disable_alpha) { return { *source_ptr, *source_ptr, *source_ptr, 255 }; @@ -421,7 +380,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::IA4: { - const u8* source_ptr = source + offset + i; + const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); u8 a = Color::Convert4To8((*source_ptr) & 0xF); @@ -436,9 +395,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::A4: { - const u8* source_ptr = source + offset / 2 + i / 2; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); + const u8* source_ptr = source + morton_offset / 2; - u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); + u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); a = Color::Convert4To8(a); if (disable_alpha) { @@ -545,7 +505,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture } // Add modifier - unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value(); + unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value(); static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{ { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e4a5ef78e..b14de9278 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -39,13 +39,6 @@ namespace Pica { struct Regs { -// helper macro to properly align structure members. -// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", -// depending on the current source line to make sure variable names are unique. -#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y -#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) -#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; - INSERT_PADDING_WORDS(0x10); u32 trigger_irq; @@ -152,7 +145,7 @@ struct Regs { enum class TextureFormat : u32 { RGBA8 = 0, RGB8 = 1, - RGBA5551 = 2, + RGB5A1 = 2, RGB565 = 3, RGBA4 = 4, IA8 = 5, @@ -174,7 +167,7 @@ struct Regs { case TextureFormat::RGB8: return 6; - case TextureFormat::RGBA5551: + case TextureFormat::RGB5A1: case TextureFormat::RGB565: case TextureFormat::RGBA4: case TextureFormat::IA8: @@ -416,10 +409,11 @@ struct Regs { } output_merger; struct { + // Components are laid out in reverse byte order, most significant bits first. enum ColorFormat : u32 { RGBA8 = 0, RGB8 = 1, - RGBA5551 = 2, + RGB5A1 = 2, RGB565 = 3, RGBA4 = 4, }; @@ -709,10 +703,6 @@ struct Regs { INSERT_PADDING_WORDS(0x22); -#undef INSERT_PADDING_WORDS_HELPER1 -#undef INSERT_PADDING_WORDS_HELPER2 -#undef INSERT_PADDING_WORDS - // Map register indices to names readable by humans // Used for debugging purposes, so performance is not an issue here static std::string GetCommandName(int index) { diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 81df09baf..5861c1926 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -7,12 +7,14 @@ #include "common/common_types.h" #include "common/math_util.h" +#include "core/hw/gpu.h" +#include "debug_utils/debug_utils.h" #include "math.h" +#include "color.h" #include "pica.h" #include "rasterizer.h" #include "vertex_shader.h" - -#include "debug_utils/debug_utils.h" +#include "video_core/utils.h" namespace Pica { @@ -20,59 +22,101 @@ namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); - u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); // Similarly to textures, the render framebuffer is laid out from bottom to top, too. // NOTE: The framebuffer height register contains the actual FB height minus one. y = (registers.framebuffer.height - y); + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; + u8* dst_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + dst_offset; + switch (registers.framebuffer.color_format) { case registers.framebuffer.RGBA8: - { - u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); - *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; + Color::EncodeRGBA8(color, dst_pixel); + break; + + case registers.framebuffer.RGB8: + Color::EncodeRGB8(color, dst_pixel); + break; + + case registers.framebuffer.RGB5A1: + Color::EncodeRGB5A1(color, dst_pixel); + break; + + case registers.framebuffer.RGB565: + Color::EncodeRGB565(color, dst_pixel); + break; + + case registers.framebuffer.RGBA4: + Color::EncodeRGBA4(color, dst_pixel); break; - } default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); UNIMPLEMENTED(); } } static const Math::Vec4<u8> GetPixel(int x, int y) { const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); - u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); y = (registers.framebuffer.height - y); - u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); - Math::Vec4<u8> ret; - ret.a() = value >> 24; - ret.r() = (value >> 16) & 0xFF; - ret.g() = (value >> 8) & 0xFF; - ret.b() = value & 0xFF; - return ret; + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; + u8* src_pixel = Memory::GetPointer(PAddrToVAddr(addr)) + src_offset; + + switch (registers.framebuffer.color_format) { + case registers.framebuffer.RGBA8: + return Color::DecodeRGBA8(src_pixel); + + case registers.framebuffer.RGB8: + return Color::DecodeRGB8(src_pixel); + + case registers.framebuffer.RGB5A1: + return Color::DecodeRGB5A1(src_pixel); + + case registers.framebuffer.RGB565: + return Color::DecodeRGB565(src_pixel); + + case registers.framebuffer.RGBA4: + return Color::DecodeRGBA4(src_pixel); + + default: + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); + UNIMPLEMENTED(); + } + + return {}; } static u32 GetDepth(int x, int y) { const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); - u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); + u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); y = (registers.framebuffer.height - y); + + const u32 coarse_y = y & ~7; + u32 stride = registers.framebuffer.width * 2; // Assuming 16-bit depth buffer format until actual format handling is implemented - return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); + return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride); } static void SetDepth(int x, int y, u16 value) { const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); - u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); + u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); y = (registers.framebuffer.height - y); + const u32 coarse_y = y & ~7; + u32 stride = registers.framebuffer.width * 2; + // Assuming 16-bit depth buffer format until actual format handling is implemented - *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; + *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value; } // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 272695174..95ab96340 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -4,7 +4,10 @@ #include "core/hw/gpu.h" #include "core/mem_map.h" + #include "common/emu_window.h" +#include "common/profiler_reporting.h" + #include "video_core/video_core.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -75,9 +78,18 @@ void RendererOpenGL::SwapBuffers() { DrawScreens(); + auto& profiler = Common::Profiling::GetProfilingManager(); + profiler.FinishFrame(); + { + auto aggregator = Common::Profiling::GetTimingResultsAggregator(); + aggregator->AddFrame(profiler.GetPreviousFrameResults()); + } + // Swap buffers render_window->PollEvents(); render_window->SwapBuffers(); + + profiler.BeginFrame(); } /** @@ -242,28 +254,26 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x * Draws the emulated screens to the emulator window. */ void RendererOpenGL::DrawScreens() { - auto viewport_extent = GetViewportExtent(); - glViewport(viewport_extent.left, viewport_extent.top, viewport_extent.GetWidth(), viewport_extent.GetHeight()); // TODO: Or bottom? + auto layout = render_window->GetFramebufferLayout(); + + glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); glUseProgram(program_id); // Set projection matrix - std::array<GLfloat, 3*2> ortho_matrix = MakeOrthographicMatrix((float)resolution_width, (float)resolution_height); + std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, + (float)layout.height); glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); // Bind texture in Texture Unit 0 glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - const float max_width = std::max((float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenBottomWidth); - const float top_x = 0.5f * (max_width - VideoCore::kScreenTopWidth); - const float bottom_x = 0.5f * (max_width - VideoCore::kScreenBottomWidth); - - DrawSingleScreenRotated(textures[0], top_x, 0, - (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight); - DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight, - (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight); + DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, + (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); + DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, + (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); m_current_frame++; } @@ -280,34 +290,6 @@ void RendererOpenGL::SetWindow(EmuWindow* window) { render_window = window; } -MathUtil::Rectangle<unsigned> RendererOpenGL::GetViewportExtent() { - unsigned framebuffer_width; - unsigned framebuffer_height; - std::tie(framebuffer_width, framebuffer_height) = render_window->GetFramebufferSize(); - - float window_aspect_ratio = static_cast<float>(framebuffer_height) / framebuffer_width; - float emulation_aspect_ratio = static_cast<float>(resolution_height) / resolution_width; - - MathUtil::Rectangle<unsigned> viewport_extent; - if (window_aspect_ratio > emulation_aspect_ratio) { - // Window is narrower than the emulation content => apply borders to the top and bottom - unsigned viewport_height = static_cast<unsigned>(std::round(emulation_aspect_ratio * framebuffer_width)); - viewport_extent.left = 0; - viewport_extent.top = (framebuffer_height - viewport_height) / 2; - viewport_extent.right = viewport_extent.left + framebuffer_width; - viewport_extent.bottom = viewport_extent.top + viewport_height; - } else { - // Otherwise, apply borders to the left and right sides of the window. - unsigned viewport_width = static_cast<unsigned>(std::round(framebuffer_height / emulation_aspect_ratio)); - viewport_extent.left = (framebuffer_width - viewport_width) / 2; - viewport_extent.top = 0; - viewport_extent.right = viewport_extent.left + viewport_width; - viewport_extent.bottom = viewport_extent.top + framebuffer_height; - } - - return viewport_extent; -} - /// Initialize the renderer void RendererOpenGL::Init() { render_window->MakeCurrent(); diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 6fd640425..bda793fa5 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -35,4 +35,54 @@ struct TGAHeader { */ void DumpTGA(std::string filename, short width, short height, u8* raw_data); +/** + * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are + * arranged in a Z-order curve. More details on the bit manipulation at: + * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ + */ +static inline u32 MortonInterleave(u32 x, u32 y) { + u32 i = (x & 7) | ((y & 7) << 8); // ---- -210 + i = (i ^ (i << 2)) & 0x1313; // ---2 --10 + i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 + i = (i | (i >> 7)) & 0x3F; + return i; +} + +/** + * Calculates the offset of the position of the pixel in Morton order + */ +static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + // + // This pattern is what's called Z-order curve, or Morton order. + + const unsigned int block_width = 8; + const unsigned int block_height = 8; + + const unsigned int coarse_x = x & ~7; + const unsigned int coarse_y = y & ~7; + + u32 i = VideoCore::MortonInterleave(x, y); + + const unsigned int offset = coarse_x * block_height; + + return (i + offset) * bytes_per_pixel; +} + } // namespace diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index def868ac7..4eb3e743e 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -17,6 +17,7 @@ #include "vertex_shader.h" #include "debug_utils/debug_utils.h" +using nihstro::OpCode; using nihstro::Instruction; using nihstro::RegisterType; using nihstro::SourceRegister; @@ -90,6 +91,7 @@ struct VertexShaderState { u8 repeat_counter; // How often to repeat until this call stack element is removed u8 loop_increment; // Which value to add to the loop counter after an iteration // TODO: Should this be a signed value? Does it even matter? + u32 loop_address; // The address where we'll return to after each loop iteration }; // TODO: Is there a maximal size for this? @@ -115,6 +117,8 @@ static void ProcessShaderCode(VertexShaderState& state) { if (top.repeat_counter-- == 0) { state.program_counter = &shader_memory[top.return_address]; state.call_stack.pop(); + } else { + state.program_counter = &shader_memory[top.loop_address]; } // TODO: Is "trying again" accurate to hardware? @@ -129,7 +133,7 @@ static void ProcessShaderCode(VertexShaderState& state) { static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset, u8 repeat_count, u8 loop_increment) { state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment }); + state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); }; u32 binary_offset = state.program_counter - shader_memory.data(); @@ -151,10 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) { } }; - switch (instr.opcode.GetInfo().type) { - case Instruction::OpCodeType::Arithmetic: + switch (instr.opcode.Value().GetInfo().type) { + case OpCode::Type::Arithmetic: { - bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); // TODO: We don't really support this properly: For instance, the address register // offset needs to be applied to SRC2 instead, etc. // For now, we just abort in this situation. @@ -194,15 +198,15 @@ static void ProcessShaderCode(VertexShaderState& state) { src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? dummy_vec4_float24 - : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()] + : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24 + : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode.EffectiveOpCode()) { - case Instruction::OpCode::ADD: + switch (instr.opcode.Value().EffectiveOpCode()) { + case OpCode::Id::ADD: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -214,7 +218,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MUL: + case OpCode::Id::MUL: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -226,7 +230,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MAX: + case OpCode::Id::MAX: for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -235,11 +239,11 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::DP3: - case Instruction::OpCode::DP4: + case OpCode::Id::DP3: + case OpCode::Id::DP4: { float24 dot = float24::FromFloat32(0.f); - int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; + int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) dot = dot + src1[i] * src2[i]; @@ -253,7 +257,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } // Reciprocal - case Instruction::OpCode::RCP: + case OpCode::Id::RCP: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -268,7 +272,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } // Reciprocal Square Root - case Instruction::OpCode::RSQ: + case OpCode::Id::RSQ: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -282,7 +286,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MOVA: + case OpCode::Id::MOVA: { for (int i = 0; i < 2; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -295,7 +299,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MOV: + case OpCode::Id::MOV: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -306,7 +310,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::CMP: + case OpCode::Id::CMP: for (int i = 0; i < 2; ++i) { // TODO: Can you restrict to one compare via dest masking? @@ -347,7 +351,7 @@ static void ProcessShaderCode(VertexShaderState& state) { default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); DEBUG_ASSERT(false); break; } @@ -355,9 +359,9 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCodeType::MultiplyAdd: + case OpCode::Type::MultiplyAdd: { - if (instr.opcode.EffectiveOpCode() == Instruction::OpCode::MAD) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; const float24* src1_ = LookupSourceRegister(instr.mad.src1); @@ -405,9 +409,9 @@ static void ProcessShaderCode(VertexShaderState& state) { src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest < 0x08) ? state.output_register_table[4*instr.mad.dest.GetIndex()] - : (instr.mad.dest < 0x10) ? dummy_vec4_float24 - : (instr.mad.dest < 0x20) ? &state.temporary_registers[instr.mad.dest.GetIndex()][0] + float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()] + : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24 + : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; for (int i = 0; i < 4; ++i) { @@ -418,7 +422,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } } else { LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); } break; } @@ -445,31 +449,31 @@ static void ProcessShaderCode(VertexShaderState& state) { }; // Handle each instruction on its own - switch (instr.opcode) { - case Instruction::OpCode::END: + switch (instr.opcode.Value()) { + case OpCode::Id::END: exit_loop = true; break; - case Instruction::OpCode::JMPC: + case OpCode::Id::JMPC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; } break; - case Instruction::OpCode::JMPU: + case OpCode::Id::JMPU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; } break; - case Instruction::OpCode::CALL: + case OpCode::Id::CALL: call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, binary_offset + 1, 0, 0); break; - case Instruction::OpCode::CALLU: + case OpCode::Id::CALLU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, instr.flow_control.dest_offset, @@ -478,7 +482,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::CALLC: + case OpCode::Id::CALLC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { call(state, instr.flow_control.dest_offset, @@ -487,10 +491,10 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::NOP: + case OpCode::Id::NOP: break; - case Instruction::OpCode::IFU: + case OpCode::Id::IFU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, binary_offset + 1, @@ -505,7 +509,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; - case Instruction::OpCode::IFC: + case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? @@ -524,7 +528,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::LOOP: + case OpCode::Id::LOOP: { state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; @@ -539,7 +543,7 @@ static void ProcessShaderCode(VertexShaderState& state) { default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); break; } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 0a236595c..b9d4ede3a 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -18,7 +18,6 @@ namespace VideoCore { EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window RendererBase* g_renderer = nullptr; ///< Renderer plugin -int g_current_frame = 0; /// Initialize the video core void Init(EmuWindow* emu_window) { @@ -27,8 +26,6 @@ void Init(EmuWindow* emu_window) { g_renderer->SetWindow(g_emu_window); g_renderer->Init(); - g_current_frame = 0; - LOG_DEBUG(Render, "initialized OK"); } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index b782f17bd..1b51d39bf 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -30,7 +30,6 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height // --------------------- extern RendererBase* g_renderer; ///< Renderer plugin -extern int g_current_frame; ///< Current frame extern EmuWindow* g_emu_window; ///< Emu window /// Start the video core |
