aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/gpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu.h')
-rw-r--r--src/video_core/gpu.h249
1 files changed, 175 insertions, 74 deletions
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 1a2d747be..21410e125 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,14 +13,15 @@
#include "common/common_types.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
+#include "video_core/cdma_pusher.h"
#include "video_core/dma_pusher.h"
using CacheAddr = std::uintptr_t;
-inline CacheAddr ToCacheAddr(const void* host_ptr) {
+[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
return reinterpret_cast<CacheAddr>(host_ptr);
}
-inline u8* FromCacheAddr(CacheAddr cache_addr) {
+[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) {
return reinterpret_cast<u8*>(cache_addr);
}
@@ -33,58 +34,68 @@ class System;
namespace VideoCore {
class RendererBase;
+class ShaderNotify;
} // namespace VideoCore
namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
- RGBA32_FLOAT = 0xC0,
- RGBA32_UINT = 0xC2,
- RGBA16_UNORM = 0xC6,
- RGBA16_SNORM = 0xC7,
- RGBA16_UINT = 0xC9,
- RGBA16_FLOAT = 0xCA,
- RG32_FLOAT = 0xCB,
- RG32_UINT = 0xCD,
- RGBX16_FLOAT = 0xCE,
- BGRA8_UNORM = 0xCF,
- BGRA8_SRGB = 0xD0,
- RGB10_A2_UNORM = 0xD1,
- RGBA8_UNORM = 0xD5,
- RGBA8_SRGB = 0xD6,
- RGBA8_SNORM = 0xD7,
- RGBA8_UINT = 0xD9,
- RG16_UNORM = 0xDA,
- RG16_SNORM = 0xDB,
- RG16_SINT = 0xDC,
- RG16_UINT = 0xDD,
- RG16_FLOAT = 0xDE,
- R11G11B10_FLOAT = 0xE0,
+ R32B32G32A32_FLOAT = 0xC0,
+ R32G32B32A32_SINT = 0xC1,
+ R32G32B32A32_UINT = 0xC2,
+ R16G16B16A16_UNORM = 0xC6,
+ R16G16B16A16_SNORM = 0xC7,
+ R16G16B16A16_SINT = 0xC8,
+ R16G16B16A16_UINT = 0xC9,
+ R16G16B16A16_FLOAT = 0xCA,
+ R32G32_FLOAT = 0xCB,
+ R32G32_SINT = 0xCC,
+ R32G32_UINT = 0xCD,
+ R16G16B16X16_FLOAT = 0xCE,
+ B8G8R8A8_UNORM = 0xCF,
+ B8G8R8A8_SRGB = 0xD0,
+ A2B10G10R10_UNORM = 0xD1,
+ A2B10G10R10_UINT = 0xD2,
+ A8B8G8R8_UNORM = 0xD5,
+ A8B8G8R8_SRGB = 0xD6,
+ A8B8G8R8_SNORM = 0xD7,
+ A8B8G8R8_SINT = 0xD8,
+ A8B8G8R8_UINT = 0xD9,
+ R16G16_UNORM = 0xDA,
+ R16G16_SNORM = 0xDB,
+ R16G16_SINT = 0xDC,
+ R16G16_UINT = 0xDD,
+ R16G16_FLOAT = 0xDE,
+ B10G11R11_FLOAT = 0xE0,
R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
- B5G6R5_UNORM = 0xE8,
- BGR5A1_UNORM = 0xE9,
- RG8_UNORM = 0xEA,
- RG8_SNORM = 0xEB,
+ R5G6B5_UNORM = 0xE8,
+ A1R5G5B5_UNORM = 0xE9,
+ R8G8_UNORM = 0xEA,
+ R8G8_SNORM = 0xEB,
+ R8G8_SINT = 0xEC,
+ R8G8_UINT = 0xED,
R16_UNORM = 0xEE,
R16_SNORM = 0xEF,
R16_SINT = 0xF0,
R16_UINT = 0xF1,
R16_FLOAT = 0xF2,
R8_UNORM = 0xF3,
+ R8_SNORM = 0xF4,
+ R8_SINT = 0xF5,
R8_UINT = 0xF6,
};
enum class DepthFormat : u32 {
- Z32_FLOAT = 0xA,
- Z16_UNORM = 0x13,
- S8_Z24_UNORM = 0x14,
- Z24_X8_UNORM = 0x15,
- Z24_S8_UNORM = 0x16,
- Z24_C8_UNORM = 0x18,
- Z32_S8_X24_FLOAT = 0x19,
+ D32_FLOAT = 0xA,
+ D16_UNORM = 0x13,
+ S8_UINT_Z24_UNORM = 0x14,
+ D24X8_UNORM = 0x15,
+ D24S8_UNORM = 0x16,
+ D24C8_UNORM = 0x18,
+ D32_FLOAT_S8X24_UINT = 0x19,
};
struct CommandListHeader;
@@ -95,9 +106,9 @@ class DebugContext;
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
- ABGR8 = 1,
- RGB565 = 4,
- BGRA8 = 5,
+ A8B8G8R8_UNORM = 1,
+ RGB565_UNORM = 4,
+ B8G8R8A8_UNORM = 5,
};
VAddr address;
@@ -132,60 +143,102 @@ class MemoryManager;
class GPU {
public:
- explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- bool is_async);
-
- virtual ~GPU();
-
struct MethodCall {
u32 method{};
u32 argument{};
u32 subchannel{};
u32 method_count{};
- bool IsLastCall() const {
- return method_count <= 1;
- }
-
MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0)
: method(method), argument(argument), subchannel(subchannel),
method_count(method_count) {}
+
+ [[nodiscard]] bool IsLastCall() const {
+ return method_count <= 1;
+ }
};
+ explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
+ virtual ~GPU();
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
+ /// Calls a GPU multivalue method.
+ void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
+ /// Flush all current written commands into the host GPU for execution.
void FlushCommands();
+ /// Synchronizes CPU writes with Host GPU memory.
+ void SyncGuestHost();
+ /// Signal the ending of command list.
+ virtual void OnCommandListEnd();
+
+ /// Request a host GPU memory flush from the CPU.
+ [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
+
+ /// Obtains current flush request fence id.
+ [[nodiscard]] u64 CurrentFlushRequestFence() const {
+ return current_flush_fence.load(std::memory_order_relaxed);
+ }
+
+ /// Tick pending requests within the GPU.
+ void TickWork();
/// Returns a reference to the Maxwell3D GPU engine.
- Engines::Maxwell3D& Maxwell3D();
+ [[nodiscard]] Engines::Maxwell3D& Maxwell3D();
/// Returns a const reference to the Maxwell3D GPU engine.
- const Engines::Maxwell3D& Maxwell3D() const;
+ [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const;
/// Returns a reference to the KeplerCompute GPU engine.
- Engines::KeplerCompute& KeplerCompute();
+ [[nodiscard]] Engines::KeplerCompute& KeplerCompute();
/// Returns a reference to the KeplerCompute GPU engine.
- const Engines::KeplerCompute& KeplerCompute() const;
+ [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const;
/// Returns a reference to the GPU memory manager.
- Tegra::MemoryManager& MemoryManager();
+ [[nodiscard]] Tegra::MemoryManager& MemoryManager();
/// Returns a const reference to the GPU memory manager.
- const Tegra::MemoryManager& MemoryManager() const;
+ [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const;
/// Returns a reference to the GPU DMA pusher.
- Tegra::DmaPusher& DmaPusher();
+ [[nodiscard]] Tegra::DmaPusher& DmaPusher();
- VideoCore::RendererBase& Renderer() {
+ /// Returns a const reference to the GPU DMA pusher.
+ [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
+
+ /// Returns a reference to the GPU CDMA pusher.
+ [[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
+
+ /// Returns a const reference to the GPU CDMA pusher.
+ [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
+
+ /// Returns a reference to the underlying renderer.
+ [[nodiscard]] VideoCore::RendererBase& Renderer() {
return *renderer;
}
- const VideoCore::RendererBase& Renderer() const {
+ /// Returns a const reference to the underlying renderer.
+ [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
return *renderer;
}
+ /// Returns a reference to the shader notifier.
+ [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
+ return *shader_notify;
+ }
+
+ /// Returns a const reference to the shader notifier.
+ [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
+ return *shader_notify;
+ }
+
// Waits for the GPU to finish working
virtual void WaitIdle() const = 0;
@@ -194,27 +247,46 @@ public:
void IncrementSyncPoint(u32 syncpoint_id);
- u32 GetSyncpointValue(u32 syncpoint_id) const;
+ [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
- bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+ [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
- u64 GetTicks() const;
+ [[nodiscard]] u64 GetTicks() const;
- std::unique_lock<std::mutex> LockSync() {
+ [[nodiscard]] std::unique_lock<std::mutex> LockSync() {
return std::unique_lock{sync_mutex};
}
- bool IsAsync() const {
+ [[nodiscard]] bool IsAsync() const {
return is_async;
}
- /// Returns a const reference to the GPU DMA pusher.
- const Tegra::DmaPusher& DmaPusher() const;
+ [[nodiscard]] bool UseNvdec() const {
+ return use_nvdec;
+ }
+
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+
+ [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+ FenceAction result{};
+ result.op.Assign(op);
+ result.syncpoint_id.Assign(syncpoint_id);
+ return {result.raw};
+ }
+ };
struct Regs {
- static constexpr size_t NUM_REGS = 0x100;
+ static constexpr size_t NUM_REGS = 0x40;
union {
struct {
@@ -223,7 +295,7 @@ public:
u32 address_high;
u32 address_low;
- GPUVAddr SemaphoreAddress() const {
+ [[nodiscard]] GPUVAddr SemaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
@@ -233,7 +305,7 @@ public:
u32 semaphore_trigger;
INSERT_UNION_PADDING_WORDS(0xC);
- // The puser and the puller share the reference counter, the pusher only has read
+ // The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_UNION_PADDING_WORDS(0x5);
@@ -241,10 +313,7 @@ public:
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
- union {
- BitField<4, 4, u32> operation;
- BitField<8, 8, u32> id;
- } fence_action;
+ FenceAction fence_action;
INSERT_UNION_PADDING_WORDS(0xE2);
// Puller state
@@ -263,9 +332,18 @@ public:
/// core timing events.
virtual void Start() = 0;
+ /// Obtain the CPU Context
+ virtual void ObtainContext() = 0;
+
+ /// Release the CPU Context
+ virtual void ReleaseContext() = 0;
+
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+ /// Push GPU command buffer entries to be processed
+ virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
+
/// Swap buffers (render frame)
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
@@ -283,6 +361,8 @@ protected:
private:
void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessWaitForInterruptMethod();
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
@@ -293,17 +373,22 @@ private:
/// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
+ /// Calls a GPU engine multivalue method.
+ void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
/// Determines where the method should be executed.
- bool ExecuteMethodOnEngine(const MethodCall& method_call);
+ [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
protected:
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
Core::System& system;
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
+ std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+ std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
+ const bool use_nvdec;
private:
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
@@ -316,15 +401,31 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+ /// Shader build notifier
+ std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
std::mutex sync_mutex;
+ std::mutex device_mutex;
std::condition_variable sync_cv;
+ struct FlushRequest {
+ FlushRequest(u64 fence, VAddr addr, std::size_t size)
+ : fence{fence}, addr{addr}, size{size} {}
+ u64 fence;
+ VAddr addr;
+ std::size_t size;
+ };
+
+ std::list<FlushRequest> flush_requests;
+ std::atomic<u64> current_flush_fence{};
+ u64 last_flush_fence{};
+ std::mutex flush_request_mutex;
+
const bool is_async;
};