aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h18
-rw-r--r--src/video_core/engines/fermi_2d.cpp30
-rw-r--r--src/video_core/engines/fermi_2d.h12
-rw-r--r--src/video_core/engines/kepler_compute.h18
-rw-r--r--src/video_core/engines/kepler_memory.h4
-rw-r--r--src/video_core/engines/maxwell_3d.h116
-rw-r--r--src/video_core/engines/maxwell_dma.h10
-rw-r--r--src/video_core/engines/shader_header.h50
-rw-r--r--src/video_core/gpu.h8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h3
-rw-r--r--src/video_core/shader/control_flow.cpp214
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp4
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp4
-rw-r--r--src/video_core/shader/decode/image.cpp50
-rw-r--r--src/video_core/shader/decode/texture.cpp101
-rw-r--r--src/video_core/shader/node.h101
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/textures/texture.h2
28 files changed, 461 insertions, 423 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2442ddfd6..4408b5001 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,6 +12,10 @@
#include <utility>
#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <boost/range/iterator_range.hpp>
+
#include "common/alignment.h"
#include "common/common_types.h"
#include "core/core.h"
@@ -30,7 +34,7 @@ public:
using BufferInfo = std::pair<const TBufferType*, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false) {
+ bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +47,13 @@ public:
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
- if (size < max_stream_size) {
+ if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
- return StreamBufferUpload(host_ptr, size, alignment);
+ if (use_fast_cbuf) {
+ return ConstBufferUpload(host_ptr, size);
+ } else {
+ return StreamBufferUpload(host_ptr, size, alignment);
+ }
}
}
@@ -152,6 +160,10 @@ protected:
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
+ virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
+ return {};
+ }
+
/// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart();
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 7ff44f06d..85d308e26 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,6 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
}
}
+std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
+ const u32 line_a = src_2 - src_1;
+ const u32 line_b = dst_2 - dst_1;
+ const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
+ return {line_b - (excess * line_b) / line_a, excess};
+}
+
void Fermi2D::HandleSurfaceCopy() {
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
static_cast<u32>(regs.operation));
@@ -47,10 +54,27 @@ void Fermi2D::HandleSurfaceCopy() {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
+ u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
+ u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
+ const auto [new_dst_w, src_excess_x] =
+ DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
+ const auto [new_dst_h, src_excess_y] =
+ DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
+ dst_blit_x2 = new_dst_w + regs.blit_dst_x;
+ src_blit_x2 = src_blit_x2 - src_excess_x;
+ dst_blit_y2 = new_dst_h + regs.blit_dst_y;
+ src_blit_y2 = src_blit_y2 - src_excess_y;
+ const auto [new_src_w, dst_excess_x] =
+ DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
+ const auto [new_src_h, dst_excess_y] =
+ DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
+ src_blit_x2 = new_src_w + src_blit_x1;
+ dst_blit_x2 = dst_blit_x2 - dst_excess_x;
+ src_blit_y2 = new_src_h + src_blit_y1;
+ dst_blit_y2 = dst_blit_y2 - dst_excess_y;
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
- regs.blit_dst_x + regs.blit_dst_width,
- regs.blit_dst_y + regs.blit_dst_height};
+ const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
+ dst_blit_y2};
Config copy_config;
copy_config.operation = regs.operation;
copy_config.filter = regs.blit_control.filter;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0901cf2fa..dba342c70 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -99,19 +99,19 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x80);
+ INSERT_UNION_PADDING_WORDS(0x80);
Surface dst;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Surface src;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
Operation operation;
- INSERT_PADDING_WORDS(0x177);
+ INSERT_UNION_PADDING_WORDS(0x177);
union {
u32 raw;
@@ -119,7 +119,7 @@ public:
BitField<4, 1, Filter> filter;
} blit_control;
- INSERT_PADDING_WORDS(0x8);
+ INSERT_UNION_PADDING_WORDS(0x8);
u32 blit_dst_x;
u32 blit_dst_y;
@@ -130,7 +130,7 @@ public:
u64 blit_src_x;
u64 blit_src_y;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index b185c98c7..5259d92bd 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -51,7 +51,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -63,7 +63,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address;
@@ -72,11 +72,11 @@ public:
}
} launch_desc_loc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 launch;
- INSERT_PADDING_WORDS(0x4A7);
+ INSERT_UNION_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
@@ -88,7 +88,7 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
struct {
u32 address_high;
@@ -100,7 +100,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x22);
+ INSERT_UNION_PADDING_WORDS(0x22);
struct {
u32 address_high;
@@ -111,11 +111,11 @@ public:
}
} code_loc;
- INSERT_PADDING_WORDS(0x3FE);
+ INSERT_UNION_PADDING_WORDS(0x3FE);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x374);
+ INSERT_UNION_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -179,7 +179,7 @@ public:
};
INSERT_PADDING_WORDS(0x11);
- } launch_description;
+ } launch_description{};
struct {
u32 write_offset = 0;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index e0e25c321..396fb6e86 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -45,7 +45,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -57,7 +57,7 @@ public:
u32 data;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8cc842684..1aa7c274f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -496,7 +496,7 @@ public:
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
};
struct RenderTargetConfig {
@@ -517,7 +517,7 @@ public:
};
u32 layer_stride;
u32 base_layer;
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -542,7 +542,7 @@ public:
f32 translate_x;
f32 translate_y;
f32 translate_z;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Common::Rectangle<s32> GetRect() const {
return {
@@ -606,7 +606,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x45);
+ INSERT_UNION_PADDING_WORDS(0x45);
struct {
u32 upload_address;
@@ -615,7 +615,7 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x17);
+ INSERT_UNION_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
@@ -626,7 +626,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x44);
+ INSERT_UNION_PADDING_WORDS(0x44);
struct {
union {
@@ -636,11 +636,11 @@ public:
};
} sync_info;
- INSERT_PADDING_WORDS(0x11E);
+ INSERT_UNION_PADDING_WORDS(0x11E);
u32 tfb_enabled;
- INSERT_PADDING_WORDS(0x2E);
+ INSERT_UNION_PADDING_WORDS(0x2E);
std::array<RenderTargetConfig, NumRenderTargets> rt;
@@ -648,49 +648,49 @@ public:
std::array<ViewPort, NumViewports> viewports;
- INSERT_PADDING_WORDS(0x1D);
+ INSERT_UNION_PADDING_WORDS(0x1D);
struct {
u32 first;
u32 count;
} vertex_buffer;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
float clear_color[4];
float clear_depth;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
s32 clear_stencil;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
u32 polygon_offset_fill_enable;
- INSERT_PADDING_WORDS(0xD);
+ INSERT_UNION_PADDING_WORDS(0xD);
std::array<ScissorTest, NumViewports> scissor_test;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
s32 stencil_back_func_ref;
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0xC);
u32 color_mask_common;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
f32 depth_bounds[2];
- INSERT_PADDING_WORDS(0xA);
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 address_high;
@@ -710,7 +710,7 @@ public:
}
} zeta;
- INSERT_PADDING_WORDS(0x41);
+ INSERT_UNION_PADDING_WORDS(0x41);
union {
BitField<0, 4, u32> stencil;
@@ -719,11 +719,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
- INSERT_PADDING_WORDS(0x19);
+ INSERT_UNION_PADDING_WORDS(0x19);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
- INSERT_PADDING_WORDS(0xF);
+ INSERT_UNION_PADDING_WORDS(0xF);
struct {
union {
@@ -746,16 +746,16 @@ public:
}
} rt_control;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
- INSERT_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x27);
u32 depth_test_enable;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 independent_blend_enable;
@@ -763,7 +763,7 @@ public:
u32 alpha_test_enabled;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 d3d_cull_mode;
@@ -777,7 +777,7 @@ public:
float b;
float a;
} blend_color;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -786,7 +786,7 @@ public:
Blend::Factor factor_dest_rgb;
Blend::Equation equation_a;
Blend::Factor factor_source_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Blend::Factor factor_dest_a;
u32 enable_common;
@@ -802,7 +802,7 @@ public:
u32 stencil_front_func_mask;
u32 stencil_front_mask;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 frag_color_clamp;
@@ -811,12 +811,12 @@ public:
BitField<4, 1, u32> triangle_rast_flip;
} screen_y_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
u32 vb_element_base;
u32 vb_base_instance;
- INSERT_PADDING_WORDS(0x35);
+ INSERT_UNION_PADDING_WORDS(0x35);
union {
BitField<0, 1, u32> c0;
@@ -829,11 +829,11 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float point_size;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 zeta_enable;
@@ -842,7 +842,7 @@ public:
BitField<4, 1, u32> alpha_to_one;
} multisample_control;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 address_high;
@@ -866,11 +866,11 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float polygon_offset_factor;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
struct {
u32 tic_address_high;
@@ -883,7 +883,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 stencil_two_side_enable;
StencilOp stencil_back_op_fail;
@@ -891,13 +891,13 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
u32 framebuffer_srgb;
float polygon_offset_units;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
union {
BitField<2, 1, u32> coord_origin;
@@ -913,7 +913,7 @@ public:
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
}
} code_address;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
struct {
u32 vertex_end_gl;
@@ -925,14 +925,14 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0xA);
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 enabled;
u32 index;
} primitive_restart;
- INSERT_PADDING_WORDS(0x5F);
+ INSERT_UNION_PADDING_WORDS(0x5F);
struct {
u32 start_addr_high;
@@ -973,9 +973,9 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
- INSERT_PADDING_WORDS(0x1F);
+ INSERT_UNION_PADDING_WORDS(0x1F);
float polygon_offset_clamp;
@@ -989,17 +989,17 @@ public:
}
} instanced_arrays;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
Cull cull;
u32 pixel_center_integer;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 viewport_transform_enabled;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
union {
BitField<0, 1, u32> depth_range_0_1;
@@ -1007,13 +1007,13 @@ public:
BitField<4, 1, u32> depth_clamp_far;
} view_volume_clip_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
struct {
u32 enable;
LogicOperation operation;
} logic_op;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
union {
u32 raw;
@@ -1026,9 +1026,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
- INSERT_PADDING_WORDS(0xB);
+ INSERT_UNION_PADDING_WORDS(0xB);
std::array<ColorMask, NumRenderTargets> color_mask;
- INSERT_PADDING_WORDS(0x38);
+ INSERT_UNION_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -1050,7 +1050,7 @@ public:
}
} query;
- INSERT_PADDING_WORDS(0x3C);
+ INSERT_UNION_PADDING_WORDS(0x3C);
struct {
union {
@@ -1090,10 +1090,10 @@ public:
BitField<4, 4, ShaderProgram> program;
};
u32 offset;
- INSERT_PADDING_WORDS(14);
+ INSERT_UNION_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
u32 firmware[0x20];
@@ -1110,7 +1110,7 @@ public:
}
} const_buffer;
- INSERT_PADDING_WORDS(0x10);
+ INSERT_UNION_PADDING_WORDS(0x10);
struct {
union {
@@ -1118,14 +1118,14 @@ public:
BitField<0, 1, u32> valid;
BitField<4, 5, u32> index;
};
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
} cb_bind[MaxShaderStage];
- INSERT_PADDING_WORDS(0x56);
+ INSERT_UNION_PADDING_WORDS(0x56);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x395);
+ INSERT_UNION_PADDING_WORDS(0x395);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1137,14 +1137,14 @@ public:
}
} ssbo_info;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
struct {
u32 address[MaxShaderStage];
u32 size[MaxShaderStage];
} tex_info_buffers;
- INSERT_PADDING_WORDS(0xCC);
+ INSERT_UNION_PADDING_WORDS(0xCC);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 93808a9bb..4f40d1d1f 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -94,7 +94,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0xC0);
+ INSERT_UNION_PADDING_WORDS(0xC0);
struct {
union {
@@ -112,7 +112,7 @@ public:
};
} exec;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address_high;
@@ -139,7 +139,7 @@ public:
u32 x_count;
u32 y_count;
- INSERT_PADDING_WORDS(0xB8);
+ INSERT_UNION_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
@@ -162,11 +162,11 @@ public:
Parameters dst_params;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Parameters src_params;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_UNION_PADDING_WORDS(0x13);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index e86a7f04a..bc80661d8 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -38,37 +38,37 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0;
+ } common0{};
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1;
+ } common1{};
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2;
+ } common2{};
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3;
+ } common3{};
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<24, 4, u32> reserved;
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4;
+ } common4{};
union {
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
- INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
- INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
@@ -79,20 +79,20 @@ struct Header {
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
};
- INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // ImapReserved
- INSERT_PADDING_BYTES(3); // OmapSystemValuesA
- INSERT_PADDING_BYTES(1); // OmapSystemValuesB
- INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
- INSERT_PADDING_BYTES(2); // OmapColor
- INSERT_PADDING_BYTES(2); // OmapSystemValuesC
- INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // OmapReserved
+ INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // OmapColor
+ INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // OmapReserved
} vtg;
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
@@ -100,10 +100,10 @@ struct Header {
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
- INSERT_PADDING_BYTES(2); // ImapColor
- INSERT_PADDING_BYTES(2); // ImapSystemValuesC
- INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(2); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(2); // ImapReserved
struct {
u32 target;
union {
@@ -139,6 +139,8 @@ struct Header {
return result;
}
} ps;
+
+ std::array<u32, 0xF> raw{};
};
u64 GetLocalMemorySize() const {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index dbca19f35..ecc338ae9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -207,7 +207,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 address_high;
u32 address_low;
@@ -220,12 +220,12 @@ public:
u32 semaphore_sequence;
u32 semaphore_trigger;
- INSERT_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0xC);
// The puser and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
@@ -234,7 +234,7 @@ public:
BitField<4, 4, u32> operation;
BitField<8, 8, u32> id;
} fence_action;
- INSERT_PADDING_WORDS(0xE2);
+ INSERT_UNION_PADDING_WORDS(0xE2);
// Puller state
u32 acquire_mode;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f8a807c84..0375fca17 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,13 +8,17 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
CachedBufferBlock::~CachedBufferBlock() = default;
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size)
- : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
- rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
+ const Device& device, std::size_t stream_size)
+ : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+ if (!device.HasFastBufferSubData()) {
+ return;
+ }
+
+ static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+ glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+ for (const GLuint cbuf : cbufs) {
+ glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ }
+}
-OGLBufferCache::~OGLBufferCache() = default;
+OGLBufferCache::~OGLBufferCache() {
+ glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+}
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
static_cast<GLsizeiptr>(size));
}
+OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
+ std::size_t size) {
+ DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
+ const GLuint& cbuf = cbufs[cbuf_cursor++];
+ glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
+ return {&cbuf, 0};
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 022e7bfa9..8c7145443 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,10 +4,12 @@
#pragma once
+#include <array>
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
namespace OpenGL {
+class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class CachedBufferBlock;
using Buffer = std::shared_ptr<CachedBufferBlock>;
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class CachedBufferBlock : public VideoCommon::BufferBlock {
public:
@@ -38,14 +42,18 @@ private:
OGLBuffer gl_buffer{};
};
-class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
+class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size);
+ const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
+ void Acquire() noexcept {
+ cbuf_cursor = 0;
+ }
+
protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
@@ -61,6 +69,14 @@ protected:
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
+
+ BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+
+private:
+ std::size_t cbuf_cursor = 0;
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
+ cbufs;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 64de7e425..c65b24c69 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -51,8 +51,11 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
} // Anonymous namespace
Device::Device() {
+ const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::vector extensions = GetExtensions();
+ const bool is_nvidia = vendor == "NVIDIA Corporation";
+
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -64,6 +67,7 @@ Device::Device() {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
has_precise_bug = TestPreciseBug();
+ has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bb273c3d6..bf35bd0b6 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -54,6 +54,10 @@ public:
return has_precise_bug;
}
+ bool HasFastBufferSubData() const {
+ return has_fast_buffer_sub_data;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
@@ -69,6 +73,7 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_fast_buffer_sub_data{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6a4d2c83a..e560d70d5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -67,7 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
- system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
+ system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -558,6 +558,8 @@ void RasterizerOpenGL::DrawPrelude() {
SyncPolygonOffset();
SyncAlphaTest();
+ buffer_cache.Acquire();
+
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -879,7 +881,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment();
- const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+ device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(cbuf, offset, size);
}
@@ -935,10 +938,9 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
if (!entry.IsBindless()) {
return maxwell3d.GetStageTexture(stage, entry.GetOffset());
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
- tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second);
+ const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ const Tegra::Texture::TextureHandle tex_handle =
+ maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
return maxwell3d.GetTextureInfo(tex_handle);
}();
@@ -966,10 +968,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset());
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
- cbuf.first, cbuf.second);
+ const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+ Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
return compute.GetTextureInfo(tex_handle);
}();
@@ -1012,10 +1012,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset()).tic;
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
- cbuf.first, cbuf.second);
+ const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+ Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
return compute.GetTextureInfo(tex_handle).tic;
}();
SetupImage(bindpoint, tic, entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8a514cb8a..0ce59a852 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -735,7 +735,7 @@ private:
void DeclareImages() {
const auto& images{ir.GetImages()};
- for (const auto& [offset, image] : images) {
+ for (const auto& image : images) {
std::string qualifier = "coherent volatile";
if (image.IsRead() && !image.IsWritten()) {
qualifier += " readonly";
@@ -2466,16 +2466,16 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
cbuf.first);
}
+ for (const auto& [base, usage] : ir.GetGlobalMemory()) {
+ entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
+ usage.is_written);
+ }
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& [offset, image] : ir.GetImages()) {
+ for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
}
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
- usage.is_written);
- }
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
return entries;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index fead2a51e..b1e75e6cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -82,10 +82,9 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
+ std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
- std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
- std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index d47c63d9f..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -16,7 +16,9 @@
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
+
namespace {
+
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -68,15 +70,15 @@ struct CFGRebuildState {
const ProgramCode& program_code;
ConstBufferLocker& locker;
u32 start{};
- std::vector<BlockInfo> block_info{};
- std::list<u32> inspect_queries{};
- std::list<Query> queries{};
- std::unordered_map<u32, u32> registered{};
- std::set<u32> labels{};
- std::map<u32, u32> ssy_labels{};
- std::map<u32, u32> pbk_labels{};
- std::unordered_map<u32, BlockStack> stacks{};
- ASTManager* manager;
+ std::vector<BlockInfo> block_info;
+ std::list<u32> inspect_queries;
+ std::list<Query> queries;
+ std::unordered_map<u32, u32> registered;
+ std::set<u32> labels;
+ std::map<u32, u32> ssy_labels;
+ std::map<u32, u32> pbk_labels;
+ std::unordered_map<u32, BlockStack> stacks;
+ ASTManager* manager{};
};
enum class BlockCollision : u32 { None, Found, Inside };
@@ -109,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
}
Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(index + (negated ? 8 : 0));
+ return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
}
/**
@@ -136,15 +138,13 @@ struct BranchIndirectInfo {
s32 relative_position{};
};
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
- u32 start_address, u32 current_position) {
- const u32 shader_start = state.start;
- u32 pos = current_position;
- BranchIndirectInfo result{};
- u64 track_register = 0;
+struct BufferInfo {
+ u32 index;
+ u32 offset;
+};
- // Step 0 Get BRX Info
- const Instruction instr = {state.program_code[pos]};
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+ const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() != OpCode::Id::BRX) {
return std::nullopt;
@@ -152,86 +152,94 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
if (instr.brx.constant_buffer != 0) {
return std::nullopt;
}
- track_register = instr.gpr8.Value();
- result.relative_position = instr.brx.GetBranchExtend();
- pos--;
- bool found_track = false;
+ --pos;
+ return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
- // Step 1 Track LDC
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+ PackCallable pack) {
+ for (; pos >= state.start; --pos) {
+ if (IsSchedInstruction(pos, state.start)) {
continue;
}
- const Instruction instr = {state.program_code[pos]};
+ const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::LD_C) {
- if (instr.gpr0.Value() == track_register &&
- instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
- result.buffer = instr.cbuf36.index.Value();
- result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
- track_register = instr.gpr8.Value();
- pos--;
- found_track = true;
- break;
- }
+ if (!opcode) {
+ continue;
+ }
+ if (test(instr, opcode->get())) {
+ --pos;
+ return std::make_optional(pack(instr, opcode->get()));
}
- pos--;
}
+ return std::nullopt;
+}
- if (!found_track) {
- return std::nullopt;
- }
- found_track = false;
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+ u64 brx_tracked_register) {
+ return TrackInstruction<std::pair<BufferInfo, u64>>(
+ state, pos,
+ [brx_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::LD_C &&
+ instr.gpr0.Value() == brx_tracked_register &&
+ instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+ },
+ [](auto instr, const auto& opcode) {
+ const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+ static_cast<u32>(instr.cbuf36.GetOffset())};
+ return std::make_pair(info, instr.gpr8.Value());
+ });
+}
- // Step 2 Track SHL
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
- if (instr.gpr0.Value() == track_register) {
- track_register = instr.gpr8.Value();
- pos--;
- found_track = true;
- break;
- }
- }
- pos--;
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+ u64 ldc_tracked_register) {
+ return TrackInstruction<u64>(state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+ u64 shl_tracked_register) {
+ return TrackInstruction<u32>(state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+ const auto brx_info = GetBRXInfo(state, pos);
+ if (!brx_info) {
+ return std::nullopt;
}
+ const auto [relative_position, brx_tracked_register] = *brx_info;
- if (!found_track) {
+ const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+ if (!ldc_info) {
return std::nullopt;
}
- found_track = false;
+ const auto [buffer_info, ldc_tracked_register] = *ldc_info;
- // Step 3 Track IMNMX
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
- if (instr.gpr0.Value() == track_register) {
- track_register = instr.gpr8.Value();
- result.entries = instr.alu.GetSignedImm20_20() + 1;
- pos--;
- found_track = true;
- break;
- }
- }
- pos--;
+ const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+ if (!shl_tracked_register) {
+ return std::nullopt;
}
- if (!found_track) {
+ const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+ if (!entries) {
return std::nullopt;
}
- return result;
+
+ return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
}
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
@@ -420,30 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
- auto tmp = TrackBranchIndirectInfo(state, address, offset);
- if (tmp) {
- auto result = *tmp;
- std::vector<CaseBranch> branches{};
- s32 pc_target = offset + result.relative_position;
- for (u32 i = 0; i < result.entries; i++) {
- auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
- if (!k) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- u32 value = *k;
- u32 target = static_cast<u32>((value >> 3) + pc_target);
- insert_label(state, target);
- branches.emplace_back(value, target);
- }
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<MultiBranch>(
- static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
- return {ParseResult::ControlCaught, parse_info};
- } else {
+ const auto tmp = TrackBranchIndirectInfo(state, offset);
+ if (!tmp) {
LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+ return {ParseResult::AbnormalFlow, parse_info};
}
- return {ParseResult::AbnormalFlow, parse_info};
+
+ const auto result = *tmp;
+ const s32 pc_target = offset + result.relative_position;
+ std::vector<CaseBranch> branches;
+ for (u32 i = 0; i < result.entries; i++) {
+ auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ if (!key) {
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+ u32 value = *key;
+ u32 target = static_cast<u32>((value >> 3) + pc_target);
+ insert_label(state, target);
+ branches.emplace_back(value, target);
+ }
+ parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+ static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+ return {ParseResult::ControlCaught, parse_info};
}
default:
break;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
+ LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
- LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- if (instr.alu_half.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
} else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
if (instr.ffma.tab5980_0 != 1) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
}
if (instr.ffma.tab5980_1 != 0) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
}
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.hset2.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index fec8f2dbe..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz));
+ if (instr.hsetp2.ftz != 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index b02d2cb95..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset{static_cast<std::size_t>(image.index.Value())};
- if (const auto existing_image = TryUseExistingImage(offset, type)) {
- return *existing_image;
+ const auto offset = static_cast<u32>(image.index.Value());
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [offset](const Image& entry) { return entry.GetOffset() == offset; });
+ if (it != std::end(used_images)) {
+ ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, type);
}
Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register{GetRegister(reg)};
- const auto [base_image, cbuf_index, cbuf_offset]{
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
- if (const auto image = TryUseExistingImage(cbuf_key, type)) {
- return *image;
- }
-
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
- .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
- auto it = used_images.find(offset);
- if (it == used_images.end()) {
- return nullptr;
+ const Node image_register = GetRegister(reg);
+ const auto [base_image, buffer, offset] =
+ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [buffer = buffer, offset = offset](const Image& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != std::end(used_images)) {
+ ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- auto& image = it->second;
- ASSERT(image.GetType() == type);
- return &image;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, buffer, type);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0599ef34f..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
- if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
- }
-
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -107,10 +95,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
- if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
- }
-
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
@@ -119,15 +103,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
WriteTexInstructionFloat(
bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true);
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
- }
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -164,10 +145,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXQ: {
- if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
- }
-
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
@@ -205,10 +182,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
- if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
- }
-
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler =
@@ -254,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
- if (instr.tld.nodep_flag) {
- LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
- }
-
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
- const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+ const TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
- if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
- }
-
const Node4 components = GetTldsCode(instr, texture_type, is_array);
if (instr.tlds.fp32_flag) {
@@ -293,84 +258,86 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value());
- Tegra::Shader::TextureType type;
+ TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
- } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
+ } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
- type = Tegra::Shader::TextureType::Texture2D;
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == offset; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+ if (it != used_samplers.end()) {
+ ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
-} // namespace VideoCommon::Shader
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
+}
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
- const auto [base_sampler, cbuf_index, cbuf_offset] =
+ const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
- const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
- Tegra::Shader::TextureType type;
+
+ TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
- } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
+ } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
- type = Tegra::Shader::TextureType::Texture2D;
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [buffer = buffer, offset = offset](const Sampler& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(
+ Sampler(next_index, offset, buffer, type, is_array, is_shadow));
}
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
- bool is_tld4) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
- if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) {
+ if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 447fb5c1d..4300d9ff4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -230,62 +230,49 @@ using NodeBlock = std::vector<Node>;
class Sampler {
public:
/// This constructor is for bound samplers
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{false} {}
+ constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
/// This constructor is for bindless samplers
- explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
- /// This constructor is for serialization/deserialization
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{is_bindless} {}
-
- std::size_t GetOffset() const {
+ constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_bindless{true} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
- Tegra::Shader::TextureType GetType() const {
+ constexpr Tegra::Shader::TextureType GetType() const {
return type;
}
- bool IsArray() const {
+ constexpr bool IsArray() const {
return is_array;
}
- bool IsShadow() const {
+ constexpr bool IsShadow() const {
return is_shadow;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- bool operator<(const Sampler& rhs) const {
- return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
- std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
- rhs.is_bindless);
- }
-
private:
- /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
- /// instruction.
- std::size_t offset{};
- std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+ u32 index{}; ///< Emulated index given for the this sampler.
+ u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+ u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +281,13 @@ private:
class Image final {
public:
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
-
- constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
+ /// This constructor is for bound images
+ constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, type{type} {}
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless, bool is_written, bool is_read, bool is_atomic)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
- is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+ /// This constructor is for bindless samplers
+ constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
@@ -321,12 +303,16 @@ public:
is_atomic = true;
}
- constexpr std::size_t GetOffset() const {
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- constexpr std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +335,11 @@ public:
return is_atomic;
}
- constexpr std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- constexpr bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
- }
-
private:
- u64 offset{};
- std::size_t index{};
+ u32 index{};
+ u32 offset{};
+ u32 buffer{};
+
Tegra::Shader::ImageType type{};
bool is_bindless{};
bool is_written{};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 7582999a5..26c8fde22 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <list>
#include <map>
#include <optional>
#include <set>
@@ -95,11 +96,11 @@ public:
return used_cbufs;
}
- const std::set<Sampler>& GetSamplers() const {
+ const std::list<Sampler>& GetSamplers() const {
return used_samplers;
}
- const std::map<u64, Image>& GetImages() const {
+ const std::list<Image>& GetImages() const {
return used_images;
}
@@ -316,9 +317,6 @@ private:
/// Access a bindless image sampler.
Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
- /// Tries to access an existing image, updating it's state as needed
- Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -326,7 +324,7 @@ private:
Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool is_tld4 = false);
+ const Node4& components);
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components, bool ignore_mask = false);
@@ -402,8 +400,8 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
- std::set<Sampler> used_samplers;
- std::map<u64, Image> used_images;
+ std::list<Sampler> used_samplers;
+ std::list<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0429af9c1..27c8ce975 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -132,6 +132,8 @@ enum class SwizzleSource : u32 {
};
union TextureHandle {
+ TextureHandle(u32 raw) : raw{raw} {}
+
u32 raw;
BitField<0, 20, u32> tic_id;
BitField<20, 12, u32> tsc_id;