aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt63
-rw-r--r--src/video_core/buffer_cache/buffer_base.h13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp5
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h259
-rw-r--r--src/video_core/cdma_pusher.cpp50
-rw-r--r--src/video_core/cdma_pusher.h24
-rw-r--r--src/video_core/command_classes/host1x.cpp30
-rw-r--r--src/video_core/command_classes/sync_manager.cpp60
-rw-r--r--src/video_core/command_classes/sync_manager.h64
-rw-r--r--src/video_core/compatible_formats.cpp11
-rw-r--r--src/video_core/compatible_formats.h5
-rw-r--r--src/video_core/control/channel_state.cpp40
-rw-r--r--src/video_core/control/channel_state.h68
-rw-r--r--src/video_core/control/channel_state_cache.cpp14
-rw-r--r--src/video_core/control/channel_state_cache.h101
-rw-r--r--src/video_core/control/channel_state_cache.inc86
-rw-r--r--src/video_core/control/scheduler.cpp32
-rw-r--r--src/video_core/control/scheduler.h37
-rw-r--r--src/video_core/delayed_destruction_ring.h5
-rw-r--r--src/video_core/dirty_flags.cpp5
-rw-r--r--src/video_core/dirty_flags.h5
-rw-r--r--src/video_core/dma_pusher.cpp32
-rw-r--r--src/video_core/dma_pusher.h44
-rw-r--r--src/video_core/engines/const_buffer_info.h5
-rw-r--r--src/video_core/engines/engine_interface.h5
-rw-r--r--src/video_core/engines/engine_upload.cpp51
-rw-r--r--src/video_core/engines/engine_upload.h11
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/fermi_2d.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp19
-rw-r--r--src/video_core/engines/kepler_compute.h6
-rw-r--r--src/video_core/engines/kepler_memory.cpp20
-rw-r--r--src/video_core/engines/kepler_memory.h7
-rw-r--r--src/video_core/engines/maxwell_3d.cpp168
-rw-r--r--src/video_core/engines/maxwell_3d.h75
-rw-r--r--src/video_core/engines/maxwell_dma.cpp125
-rw-r--r--src/video_core/engines/maxwell_dma.h15
-rw-r--r--src/video_core/engines/puller.cpp306
-rw-r--r--src/video_core/engines/puller.h177
-rw-r--r--src/video_core/fence_manager.h111
-rw-r--r--src/video_core/framebuffer_config.h34
-rw-r--r--src/video_core/gpu.cpp711
-rw-r--r--src/video_core/gpu.h99
-rw-r--r--src/video_core/gpu_thread.cpp33
-rw-r--r--src/video_core/gpu_thread.h21
-rw-r--r--src/video_core/host1x/codecs/codec.cpp (renamed from src/video_core/command_classes/codecs/codec.cpp)87
-rw-r--r--src/video_core/host1x/codecs/codec.h (renamed from src/video_core/command_classes/codecs/codec.h)24
-rw-r--r--src/video_core/host1x/codecs/h264.cpp (renamed from src/video_core/command_classes/codecs/h264.cpp)38
-rw-r--r--src/video_core/host1x/codecs/h264.h (renamed from src/video_core/command_classes/codecs/h264.h)37
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp (renamed from src/video_core/command_classes/codecs/vp8.cpp)18
-rw-r--r--src/video_core/host1x/codecs/vp8.h (renamed from src/video_core/command_classes/codecs/vp8.h)20
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp (renamed from src/video_core/command_classes/codecs/vp9.cpp)36
-rw-r--r--src/video_core/host1x/codecs/vp9.h (renamed from src/video_core/command_classes/codecs/vp9.h)27
-rw-r--r--src/video_core/host1x/codecs/vp9_types.h (renamed from src/video_core/command_classes/codecs/vp9_types.h)7
-rw-r--r--src/video_core/host1x/control.cpp33
-rw-r--r--src/video_core/host1x/control.h (renamed from src/video_core/command_classes/host1x.h)23
-rw-r--r--src/video_core/host1x/host1x.cpp17
-rw-r--r--src/video_core/host1x/host1x.h57
-rw-r--r--src/video_core/host1x/nvdec.cpp (renamed from src/video_core/command_classes/nvdec.cpp)16
-rw-r--r--src/video_core/host1x/nvdec.h (renamed from src/video_core/command_classes/nvdec.h)19
-rw-r--r--src/video_core/host1x/nvdec_common.h (renamed from src/video_core/command_classes/nvdec_common.h)9
-rw-r--r--src/video_core/host1x/sync_manager.cpp50
-rw-r--r--src/video_core/host1x/sync_manager.h53
-rw-r--r--src/video_core/host1x/syncpoint_manager.cpp96
-rw-r--r--src/video_core/host1x/syncpoint_manager.h98
-rw-r--r--src/video_core/host1x/vic.cpp (renamed from src/video_core/command_classes/vic.cpp)43
-rw-r--r--src/video_core/host1x/vic.h (renamed from src/video_core/command_classes/vic.h)18
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt5
-rw-r--r--src/video_core/host_shaders/StringShaderHeader.cmake3
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp7
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_2d.comp5
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_3d.comp5
-rw-r--r--src/video_core/host_shaders/convert_abgr8_to_d24s8.frag5
-rw-r--r--src/video_core/host_shaders/convert_d24s8_to_abgr8.frag5
-rw-r--r--src/video_core/host_shaders/convert_depth_to_float.frag5
-rw-r--r--src/video_core/host_shaders/convert_float_to_depth.frag5
-rw-r--r--src/video_core/host_shaders/convert_s8d24_to_abgr8.frag22
-rw-r--r--src/video_core/host_shaders/fidelityfx_fsr.comp5
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert5
-rw-r--r--src/video_core/host_shaders/fxaa.frag5
-rw-r--r--src/video_core/host_shaders/fxaa.vert5
-rw-r--r--src/video_core/host_shaders/opengl_convert_s8d24.comp17
-rw-r--r--src/video_core/host_shaders/opengl_copy_bc4.comp5
-rw-r--r--src/video_core/host_shaders/opengl_present.frag5
-rw-r--r--src/video_core/host_shaders/opengl_present.vert5
-rw-r--r--src/video_core/host_shaders/opengl_present_scaleforce.frag23
-rw-r--r--src/video_core/host_shaders/pitch_unswizzle.comp5
-rw-r--r--src/video_core/host_shaders/present_bicubic.frag5
-rw-r--r--src/video_core/host_shaders/present_gaussian.frag5
-rw-r--r--src/video_core/host_shaders/source_shader.h.in3
-rw-r--r--src/video_core/host_shaders/vulkan_blit_color_float.frag5
-rw-r--r--src/video_core/host_shaders/vulkan_blit_depth_stencil.frag5
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp5
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp5
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp5
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp5
-rw-r--r--src/video_core/host_shaders/vulkan_present.frag5
-rw-r--r--src/video_core/host_shaders/vulkan_present.vert5
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag3
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag3
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp5
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp5
-rw-r--r--src/video_core/macro/macro.cpp40
-rw-r--r--src/video_core/macro/macro.h8
-rw-r--r--src/video_core/macro/macro_hle.cpp68
-rw-r--r--src/video_core/macro/macro_hle.h5
-rw-r--r--src/video_core/macro/macro_interpreter.cpp6
-rw-r--r--src/video_core/macro/macro_interpreter.h5
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp93
-rw-r--r--src/video_core/macro/macro_jit_x64.h5
-rw-r--r--src/video_core/memory_manager.cpp766
-rw-r--r--src/video_core/memory_manager.h179
-rw-r--r--src/video_core/query_cache.h41
-rw-r--r--src/video_core/rasterizer_accelerated.cpp22
-rw-r--r--src/video_core/rasterizer_accelerated.h5
-rw-r--r--src/video_core/rasterizer_interface.h25
-rw-r--r--src/video_core/renderer_base.cpp10
-rw-r--r--src/video_core/renderer_base.h8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h22
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_device.h12
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h11
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h29
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp242
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h37
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h9
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h89
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache_base.cpp5
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h24
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp46
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h13
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp31
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h8
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp23
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h17
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp8
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h5
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp391
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h9
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h9
-rw-r--r--src/video_core/renderer_vulkan/pipeline_statistics.cpp9
-rw-r--r--src/video_core/renderer_vulkan/pipeline_statistics.h5
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp21
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp141
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp80
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h32
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp65
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h36
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp150
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h52
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp52
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h18
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp93
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp57
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp153
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache_base.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h16
-rw-r--r--src/video_core/shader_cache.cpp52
-rw-r--r--src/video_core/shader_cache.h24
-rw-r--r--src/video_core/shader_environment.cpp18
-rw-r--r--src/video_core/shader_environment.h5
-rw-r--r--src/video_core/shader_notify.cpp6
-rw-r--r--src/video_core/shader_notify.h6
-rw-r--r--src/video_core/surface.cpp23
-rw-r--r--src/video_core/surface.h33
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp5
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h5
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp5
-rw-r--r--src/video_core/texture_cache/decode_bc4.h5
-rw-r--r--src/video_core/texture_cache/descriptor_table.h6
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp17
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h5
-rw-r--r--src/video_core/texture_cache/formatter.cpp5
-rw-r--r--src/video_core/texture_cache/formatter.h15
-rw-r--r--src/video_core/texture_cache/image_base.cpp18
-rw-r--r--src/video_core/texture_cache/image_base.h15
-rw-r--r--src/video_core/texture_cache/image_info.cpp7
-rw-r--r--src/video_core/texture_cache/image_info.h5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp5
-rw-r--r--src/video_core/texture_cache/image_view_base.h5
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp7
-rw-r--r--src/video_core/texture_cache/image_view_info.h5
-rw-r--r--src/video_core/texture_cache/render_targets.h7
-rw-r--r--src/video_core/texture_cache/samples_helper.h9
-rw-r--r--src/video_core/texture_cache/slot_vector.h7
-rw-r--r--src/video_core/texture_cache/texture_cache.cpp15
-rw-r--r--src/video_core/texture_cache/texture_cache.h321
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h104
-rw-r--r--src/video_core/texture_cache/types.h5
-rw-r--r--src/video_core/texture_cache/util.cpp29
-rw-r--r--src/video_core/texture_cache/util.h7
-rw-r--r--src/video_core/textures/astc.cpp76
-rw-r--r--src/video_core/textures/astc.h8
-rw-r--r--src/video_core/textures/decoders.cpp269
-rw-r--r--src/video_core/textures/decoders.h41
-rw-r--r--src/video_core/textures/texture.cpp6
-rw-r--r--src/video_core/textures/texture.h5
-rw-r--r--src/video_core/transform_feedback.cpp5
-rw-r--r--src/video_core/transform_feedback.h5
-rw-r--r--src/video_core/video_core.cpp6
-rw-r--r--src/video_core/video_core.h5
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp5
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h19
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp360
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h22
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp7
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_library.cpp10
-rw-r--r--src/video_core/vulkan_common/vulkan_library.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp13
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h6
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp22
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h30
269 files changed, 5911 insertions, 4260 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6a6325e38..40e6d1ec4 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,7 +1,10 @@
+# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
+# SPDX-License-Identifier: GPL-2.0-or-later
+
add_subdirectory(host_shaders)
if(LIBVA_FOUND)
- set_source_files_properties(command_classes/codecs/codec.cpp
+ set_source_files_properties(host1x/codecs/codec.cpp
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
endif()
@@ -12,26 +15,14 @@ add_library(video_core STATIC
buffer_cache/buffer_cache.h
cdma_pusher.cpp
cdma_pusher.h
- command_classes/codecs/codec.cpp
- command_classes/codecs/codec.h
- command_classes/codecs/h264.cpp
- command_classes/codecs/h264.h
- command_classes/codecs/vp8.cpp
- command_classes/codecs/vp8.h
- command_classes/codecs/vp9.cpp
- command_classes/codecs/vp9.h
- command_classes/codecs/vp9_types.h
- command_classes/host1x.cpp
- command_classes/host1x.h
- command_classes/nvdec.cpp
- command_classes/nvdec.h
- command_classes/nvdec_common.h
- command_classes/sync_manager.cpp
- command_classes/sync_manager.h
- command_classes/vic.cpp
- command_classes/vic.h
compatible_formats.cpp
compatible_formats.h
+ control/channel_state.cpp
+ control/channel_state.h
+ control/channel_state_cache.cpp
+ control/channel_state_cache.h
+ control/scheduler.cpp
+ control/scheduler.h
delayed_destruction_ring.h
dirty_flags.cpp
dirty_flags.h
@@ -51,7 +42,31 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
+ engines/puller.cpp
+ engines/puller.h
framebuffer_config.h
+ host1x/codecs/codec.cpp
+ host1x/codecs/codec.h
+ host1x/codecs/h264.cpp
+ host1x/codecs/h264.h
+ host1x/codecs/vp8.cpp
+ host1x/codecs/vp8.h
+ host1x/codecs/vp9.cpp
+ host1x/codecs/vp9.h
+ host1x/codecs/vp9_types.h
+ host1x/control.cpp
+ host1x/control.h
+ host1x/host1x.cpp
+ host1x/host1x.h
+ host1x/nvdec.cpp
+ host1x/nvdec.h
+ host1x/nvdec_common.h
+ host1x/sync_manager.cpp
+ host1x/sync_manager.h
+ host1x/syncpoint_manager.cpp
+ host1x/syncpoint_manager.h
+ host1x/vic.cpp
+ host1x/vic.h
macro/macro.cpp
macro/macro.h
macro/macro_hle.cpp
@@ -192,6 +207,7 @@ add_library(video_core STATIC
texture_cache/render_targets.h
texture_cache/samples_helper.h
texture_cache/slot_vector.h
+ texture_cache/texture_cache.cpp
texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
texture_cache/types.h
@@ -258,10 +274,6 @@ if (MSVC)
target_compile_options(video_core PRIVATE
/we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
/we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
- /we4456 # Declaration of 'identifier' hides previous local declaration
- /we4457 # Declaration of 'identifier' hides function parameter
- /we4458 # Declaration of 'identifier' hides class member
- /we4459 # Declaration of 'identifier' hides global declaration
)
else()
target_compile_options(video_core PRIVATE
@@ -269,7 +281,6 @@ else()
-Wno-error=sign-conversion
-Werror=pessimizing-move
-Werror=redundant-move
- -Werror=shadow
-Werror=type-limits
$<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
@@ -277,3 +288,7 @@ else()
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
)
endif()
+
+if (ARCHITECTURE_x86_64)
+ target_link_libraries(video_core PRIVATE dynarmic)
+endif()
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index be2113f5a..f9a6472cf 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -13,6 +12,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
+#include "common/settings.h"
#include "core/memory.h"
namespace VideoCommon {
@@ -37,7 +37,7 @@ struct NullBufferParams {};
template <class RasterizerInterface>
class BufferBase {
static constexpr u64 PAGES_PER_WORD = 64;
- static constexpr u64 BYTES_PER_PAGE = Core::Memory::PAGE_SIZE;
+ static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
/// Vector tracking modified pages tightly packed with small vector optimization
@@ -212,7 +212,7 @@ public:
void FlushCachedWrites() noexcept {
flags &= ~BufferFlagBits::CachedWrites;
const u64 num_words = NumWords();
- const u64* const cached_words = Array<Type::CachedCPU>();
+ u64* const cached_words = Array<Type::CachedCPU>();
u64* const untracked_words = Array<Type::Untracked>();
u64* const cpu_words = Array<Type::CPU>();
for (u64 word_index = 0; word_index < num_words; ++word_index) {
@@ -220,6 +220,9 @@ public:
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
untracked_words[word_index] |= cached_bits;
cpu_words[word_index] |= cached_bits;
+ if (!Settings::values.use_pessimistic_flushes) {
+ cached_words[word_index] = 0;
+ }
}
}
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index ab32294c8..a16308b60 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/microprofile.h"
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index fa26eb8b0..8e26b3f95 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1,17 +1,14 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
-#include <deque>
#include <memory>
#include <mutex>
#include <numeric>
#include <span>
-#include <unordered_map>
#include <vector>
#include <boost/container/small_vector.hpp>
@@ -22,10 +19,10 @@
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@@ -59,12 +56,12 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
template <typename P>
-class BufferCache {
+class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
- static constexpr u32 PAGE_BITS = 16;
- static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
+ static constexpr u32 YUZU_PAGEBITS = 16;
+ static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS;
static constexpr bool IS_OPENGL = P::IS_OPENGL;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
@@ -78,8 +75,9 @@ class BufferCache {
static constexpr BufferId NULL_BUFFER_ID{0};
- static constexpr u64 EXPECTED_MEMORY = 512_MiB;
- static constexpr u64 CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -118,10 +116,7 @@ public:
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- Runtime& runtime_);
+ Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
void TickFrame();
@@ -131,7 +126,7 @@ public:
void DownloadMemory(VAddr cpu_addr, u64 size);
- bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+ bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
@@ -218,8 +213,8 @@ private:
template <typename Func>
void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
- const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
- for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
+ const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE);
+ for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) {
const BufferId buffer_id = page_table[page];
if (!buffer_id) {
++page;
@@ -229,7 +224,7 @@ private:
func(buffer_id, buffer);
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
- page = Common::DivCeil(end_addr, PAGE_SIZE);
+ page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
}
}
@@ -264,8 +259,8 @@ private:
}
static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
- return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
- ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
+ return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
+ ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
}
void RunGarbageCollector();
@@ -355,7 +350,7 @@ private:
void NotifyBufferDeletion();
- [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+ [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;
[[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
PixelFormat format);
@@ -369,9 +364,6 @@ private:
void ClearDownload(IntervalType subtract_interval);
VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
SlotVector<Buffer> slot_buffers;
@@ -438,26 +430,43 @@ private:
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
u64 frame_tick = 0;
u64 total_used_memory = 0;
+ u64 minimum_memory = 0;
+ u64 critical_memory = 0;
- std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
+ std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table;
};
template <class P>
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- Runtime& runtime_)
- : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
+ Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
+ : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
+
+ if (!runtime.CanReportMemoryUsage()) {
+ minimum_memory = DEFAULT_EXPECTED_MEMORY;
+ critical_memory = DEFAULT_CRITICAL_MEMORY;
+ return;
+ }
+
+ const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
+ const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
+ const s64 min_spacing_critical = device_memory - 1_GiB;
+ const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
+ const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
+ const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
+ minimum_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
+ DEFAULT_EXPECTED_MEMORY));
+ critical_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
+ DEFAULT_CRITICAL_MEMORY));
}
template <class P>
void BufferCache<P>::RunGarbageCollector() {
- const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
+ const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
@@ -488,7 +497,11 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
- if (total_used_memory >= EXPECTED_MEMORY) {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
+ if (total_used_memory >= minimum_memory) {
RunGarbageCollector();
}
++frame_tick;
@@ -529,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
template <class P>
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
- const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
- const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+ const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
+ const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
if (!cpu_src_address || !cpu_dest_address) {
return false;
}
@@ -588,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
template <class P>
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
- const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+ const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
if (!cpu_dst_address) {
return false;
}
@@ -612,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = size,
@@ -650,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
if (is_indexed) {
BindHostIndexBuffer();
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
@@ -710,9 +723,9 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
enabled_storage_buffers[stage] |= 1U << ssbo_index;
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
- const auto& cbufs = maxwell3d.state.shader_stages[stage];
+ const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
- storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+ storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
}
template <class P>
@@ -747,12 +760,12 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
enabled_compute_storage_buffers |= 1U << ssbo_index;
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
- const auto& launch_desc = kepler_compute.launch_description;
+ const auto& launch_desc = kepler_compute->launch_description;
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
const auto& cbufs = launch_desc.const_buffer_config;
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
- compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+ compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
}
template <class P>
@@ -813,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
+ auto it = committed_ranges.begin();
+ while (it != committed_ranges.end()) {
+ auto& current_intervals = *it;
+ auto next_it = std::next(it);
+ while (next_it != committed_ranges.end()) {
+ for (auto& interval : *next_it) {
+ current_intervals.subtract(interval);
+ }
+ next_it++;
+ }
+ it++;
+ }
+
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@@ -903,8 +929,8 @@ void BufferCache<P>::PopAsyncFlushes() {}
template <class P>
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
- const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
- for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
+ for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
const BufferId image_id = page_table[page];
if (!image_id) {
++page;
@@ -915,7 +941,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
return true;
}
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
- page = Common::DivCeil(end_addr, PAGE_SIZE);
+ page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
}
return false;
}
@@ -923,8 +949,8 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
template <class P>
bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
const VAddr end_addr = addr + size;
- const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE);
- for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE);
+ for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
const BufferId buffer_id = page_table[page];
if (!buffer_id) {
++page;
@@ -936,15 +962,15 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
if (buf_start_addr < end_addr && addr < buf_end_addr) {
return true;
}
- page = Common::DivCeil(end_addr, PAGE_SIZE);
+ page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
}
return false;
}
template <class P>
bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
- const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
- for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
+ for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
const BufferId image_id = page_table[page];
if (!image_id) {
++page;
@@ -955,7 +981,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
return true;
}
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
- page = Common::DivCeil(end_addr, PAGE_SIZE);
+ page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
}
return false;
}
@@ -968,19 +994,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- const u32 new_offset = offset + maxwell3d.regs.index_array.first *
- maxwell3d.regs.index_array.FormatSizeInBytes();
+ const u32 new_offset = offset + maxwell3d->regs.index_array.first *
+ maxwell3d->regs.index_array.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
- runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
- maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
- buffer, offset, size);
+ runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
+ maxwell3d->regs.index_array.first,
+ maxwell3d->regs.index_array.count, buffer, offset, size);
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -991,7 +1017,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
flags[Dirty::VertexBuffer0 + index] = false;
- const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+ const u32 stride = maxwell3d->regs.vertex_array[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
}
@@ -1131,7 +1157,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
- if (maxwell3d.regs.tfb_enabled == 0) {
+ if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1216,16 +1242,19 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
template <class P>
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
- if (is_indexed) {
- UpdateIndexBuffer();
- }
- UpdateVertexBuffers();
- UpdateTransformFeedbackBuffers();
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- UpdateUniformBuffers(stage);
- UpdateStorageBuffers(stage);
- UpdateTextureBuffers(stage);
- }
+ do {
+ has_deleted_buffers = false;
+ if (is_indexed) {
+ UpdateIndexBuffer();
+ }
+ UpdateVertexBuffers();
+ UpdateTransformFeedbackBuffers();
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ UpdateUniformBuffers(stage);
+ UpdateStorageBuffers(stage);
+ UpdateTextureBuffers(stage);
+ }
+ } while (has_deleted_buffers);
}
template <class P>
@@ -1239,8 +1268,8 @@ template <class P>
void BufferCache<P>::UpdateIndexBuffer() {
// We have to check for the dirty flags and index count
// The index count is currently changed without updating the dirty flags
- const auto& index_array = maxwell3d.regs.index_array;
- auto& flags = maxwell3d.dirty.flags;
+ const auto& index_array = maxwell3d->regs.index_array;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
return;
}
@@ -1249,7 +1278,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
const GPUVAddr gpu_addr_end = index_array.EndAddress();
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
@@ -1266,8 +1295,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
template <class P>
void BufferCache<P>::UpdateVertexBuffers() {
- auto& flags = maxwell3d.dirty.flags;
- if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+ auto& flags = maxwell3d->dirty.flags;
+ if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
@@ -1279,20 +1308,25 @@ void BufferCache<P>::UpdateVertexBuffers() {
template <class P>
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
- if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+ if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
return;
}
- const auto& array = maxwell3d.regs.vertex_array[index];
- const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+ const auto& array = maxwell3d->regs.vertex_array[index];
+ const auto& limit = maxwell3d->regs.vertex_array_limit[index];
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
- const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
- const u32 size = address_size; // TODO: Analyze stride and number of vertices
- if (array.enable == 0 || size == 0 || !cpu_addr) {
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
+ u32 address_size = static_cast<u32>(
+ std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max())));
+ if (array.enable == 0 || address_size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
return;
}
+ if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
+ address_size =
+ static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
+ }
+ const u32 size = address_size; // TODO: Analyze stride and number of vertices
vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
@@ -1346,7 +1380,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
- if (maxwell3d.regs.tfb_enabled == 0) {
+ if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1356,10 +1390,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
- const auto& binding = maxwell3d.regs.tfb_bindings[index];
+ const auto& binding = maxwell3d->regs.tfb_bindings[index];
const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
const u32 size = binding.buffer_size;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
transform_feedback_buffers[index] = NULL_BINDING;
return;
@@ -1378,10 +1412,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
- const auto& launch_desc = kepler_compute.launch_description;
+ const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
const auto& cbuf = launch_desc.const_buffer_config[index];
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
if (cpu_addr) {
binding.cpu_addr = *cpu_addr;
binding.size = cbuf.size;
@@ -1436,7 +1470,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
if (cpu_addr == 0) {
return NULL_BUFFER_ID;
}
- const u64 page = cpu_addr >> PAGE_BITS;
+ const u64 page = cpu_addr >> YUZU_PAGEBITS;
const BufferId buffer_id = page_table[page];
if (!buffer_id) {
return CreateBuffer(cpu_addr, size);
@@ -1457,8 +1491,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
VAddr end = cpu_addr + wanted_size;
int stream_score = 0;
bool has_stream_leap = false;
- for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
- const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
+ for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE);
+ cpu_addr += YUZU_PAGESIZE) {
+ const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS];
if (!overlap_id) {
continue;
}
@@ -1469,19 +1504,27 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
overlap_ids.push_back(overlap_id);
overlap.Pick();
const VAddr overlap_cpu_addr = overlap.CpuAddr();
- if (overlap_cpu_addr < begin) {
+ const bool expands_left = overlap_cpu_addr < begin;
+ if (expands_left) {
cpu_addr = begin = overlap_cpu_addr;
}
- end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
-
+ const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes();
+ const bool expands_right = overlap_end > end;
+ if (overlap_end > end) {
+ end = overlap_end;
+ }
stream_score += overlap.StreamScore();
if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
- begin -= PAGE_SIZE * 256;
- cpu_addr = begin;
- end += PAGE_SIZE * 256;
+ if (expands_right) {
+ begin -= YUZU_PAGESIZE * 256;
+ cpu_addr = begin;
+ }
+ if (expands_left) {
+ end += YUZU_PAGESIZE * 256;
+ }
}
}
return OverlapResult{
@@ -1522,6 +1565,8 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ auto& new_buffer = slot_buffers[new_buffer_id];
+ runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1554,8 +1599,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
const VAddr cpu_addr_begin = buffer.CpuAddr();
const VAddr cpu_addr_end = cpu_addr_begin + size;
- const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
- const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+ const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE;
+ const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE);
for (u64 page = page_begin; page != page_end; ++page) {
if constexpr (insert) {
page_table[page] = buffer_id;
@@ -1650,7 +1695,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
template <class P>
bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
- std::span<u8> inlined_buffer) {
+ std::span<const u8> inlined_buffer) {
const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
if (!is_dirty) {
return false;
@@ -1786,7 +1831,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
flags[Dirty::IndexBuffer] = true;
flags[Dirty::VertexBuffers] = true;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1796,16 +1841,18 @@ void BufferCache<P>::NotifyBufferDeletion() {
}
template <class P>
-typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
- const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
- const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
+ bool is_written) const {
+ const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
+ const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
+ const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
const Binding binding{
.cpu_addr = *cpu_addr,
- .size = size,
+ .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
.buffer_id = BufferId{},
};
return binding;
@@ -1814,7 +1861,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
template <class P>
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
TextureBufferBinding binding;
if (!cpu_addr || size == 0) {
binding.cpu_addr = 0;
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a8c4b4415..28a2d2090 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -1,40 +1,23 @@
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: MIT
#include <bit>
-#include "command_classes/host1x.h"
-#include "command_classes/nvdec.h"
-#include "command_classes/vic.h"
#include "video_core/cdma_pusher.h"
-#include "video_core/command_classes/nvdec_common.h"
-#include "video_core/command_classes/sync_manager.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/control.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/nvdec.h"
+#include "video_core/host1x/nvdec_common.h"
+#include "video_core/host1x/sync_manager.h"
+#include "video_core/host1x/vic.h"
#include "video_core/memory_manager.h"
namespace Tegra {
-CDmaPusher::CDmaPusher(GPU& gpu_)
- : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
- vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
- host1x_processor(std::make_unique<Host1x>(gpu)),
- sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
+CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
+ : host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)),
+ vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)),
+ host1x_processor(std::make_unique<Host1x::Control>(host1x)),
+ sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {}
CDmaPusher::~CDmaPusher() = default;
@@ -128,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
- vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
+ vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
+ data);
break;
default:
break;
}
break;
- case ChClassId::Host1x:
+ case ChClassId::Control:
// This device is mainly for syncpoint synchronization
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
- host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
+ host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
break;
default:
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 87b49d6ea..83112dfce 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,15 +7,18 @@
#include <vector>
#include "common/bit_field.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
-class GPU;
+namespace Host1x {
+class Control;
class Host1x;
class Nvdec;
class SyncptIncrManager;
class Vic;
+} // namespace Host1x
enum class ChSubmissionMode : u32 {
SetClass = 0,
@@ -30,7 +32,7 @@ enum class ChSubmissionMode : u32 {
enum class ChClassId : u32 {
NoClass = 0x0,
- Host1x = 0x1,
+ Control = 0x1,
VideoEncodeMpeg = 0x20,
VideoEncodeNvEnc = 0x21,
VideoStreamingVi = 0x30,
@@ -88,7 +90,7 @@ enum class ThiMethod : u32 {
class CDmaPusher {
public:
- explicit CDmaPusher(GPU& gpu_);
+ explicit CDmaPusher(Host1x::Host1x& host1x);
~CDmaPusher();
/// Process the command entry
@@ -101,11 +103,11 @@ private:
/// Write arguments value to the ThiRegisters member at the specified offset
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
- GPU& gpu;
- std::shared_ptr<Tegra::Nvdec> nvdec_processor;
- std::unique_ptr<Tegra::Vic> vic_processor;
- std::unique_ptr<Tegra::Host1x> host1x_processor;
- std::unique_ptr<SyncptIncrManager> sync_manager;
+ Host1x::Host1x& host1x;
+ std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
+ std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
+ std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
+ std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
ChClassId current_class{};
ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
deleted file mode 100644
index b12494528..000000000
--- a/src/video_core/command_classes/host1x.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "video_core/command_classes/host1x.h"
-#include "video_core/gpu.h"
-
-Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
-
-Tegra::Host1x::~Host1x() = default;
-
-void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
- switch (method) {
- case Method::LoadSyncptPayload32:
- syncpoint_value = argument;
- break;
- case Method::WaitSyncpt:
- case Method::WaitSyncpt32:
- Execute(argument);
- break;
- default:
- UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
- break;
- }
-}
-
-void Tegra::Host1x::Execute(u32 data) {
- gpu.WaitFence(data, syncpoint_value);
-}
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
deleted file mode 100644
index 19dc9e0ab..000000000
--- a/src/video_core/command_classes/sync_manager.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
-
-#include <algorithm>
-#include "sync_manager.h"
-#include "video_core/gpu.h"
-
-namespace Tegra {
-SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
-SyncptIncrManager::~SyncptIncrManager() = default;
-
-void SyncptIncrManager::Increment(u32 id) {
- increments.emplace_back(0, 0, id, true);
- IncrementAllDone();
-}
-
-u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
- const u32 handle = current_id++;
- increments.emplace_back(handle, class_id, id);
- return handle;
-}
-
-void SyncptIncrManager::SignalDone(u32 handle) {
- const auto done_incr =
- std::find_if(increments.begin(), increments.end(),
- [handle](const SyncptIncr& incr) { return incr.id == handle; });
- if (done_incr != increments.cend()) {
- done_incr->complete = true;
- }
- IncrementAllDone();
-}
-
-void SyncptIncrManager::IncrementAllDone() {
- std::size_t done_count = 0;
- for (; done_count < increments.size(); ++done_count) {
- if (!increments[done_count].complete) {
- break;
- }
- gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
- }
- increments.erase(increments.begin(), increments.begin() + done_count);
-}
-} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
deleted file mode 100644
index 2c321ec58..000000000
--- a/src/video_core/command_classes/sync_manager.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
-
-#pragma once
-
-#include <mutex>
-#include <vector>
-#include "common/common_types.h"
-
-namespace Tegra {
-class GPU;
-struct SyncptIncr {
- u32 id;
- u32 class_id;
- u32 syncpt_id;
- bool complete;
-
- SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
- : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
-};
-
-class SyncptIncrManager {
-public:
- explicit SyncptIncrManager(GPU& gpu);
- ~SyncptIncrManager();
-
- /// Add syncpoint id and increment all
- void Increment(u32 id);
-
- /// Returns a handle to increment later
- u32 IncrementWhenDone(u32 class_id, u32 id);
-
- /// IncrememntAllDone, including handle
- void SignalDone(u32 handle);
-
- /// Increment all sequential pending increments that are already done.
- void IncrementAllDone();
-
-private:
- std::vector<SyncptIncr> increments;
- std::mutex increment_lock;
- u32 current_id{};
-
- GPU& gpu;
-};
-
-} // namespace Tegra
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index 8317d0636..4e75f33ca 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <cstddef>
@@ -132,9 +131,12 @@ constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
// PixelFormat::ASTC_2D_10X5_SRGB
// Missing formats:
-// PixelFormat::ASTC_2D_10X6_UNORM
// PixelFormat::ASTC_2D_10X6_SRGB
+constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{
+ PixelFormat::ASTC_2D_10X6_UNORM,
+};
+
constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
PixelFormat::ASTC_2D_10X8_UNORM,
PixelFormat::ASTC_2D_10X8_SRGB,
@@ -227,6 +229,7 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 55745e042..a7ce2c198 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
new file mode 100644
index 000000000..cdecc3a91
--- /dev/null
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Control {
+
+ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
+
+void ChannelState::Init(Core::System& system, GPU& gpu) {
+ ASSERT(memory_manager);
+ dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
+ maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
+ fermi_2d = std::make_unique<Engines::Fermi2D>();
+ kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
+ maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
+ kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+ initialized = true;
+}
+
+void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+ dma_pusher->BindRasterizer(rasterizer);
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_memory->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+ maxwell_dma->BindRasterizer(rasterizer);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
new file mode 100644
index 000000000..3a7b9872c
--- /dev/null
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+
+class GPU;
+
+namespace Engines {
+class Puller;
+class Fermi2D;
+class Maxwell3D;
+class MaxwellDMA;
+class KeplerCompute;
+class KeplerMemory;
+} // namespace Engines
+
+class MemoryManager;
+class DmaPusher;
+
+namespace Control {
+
+struct ChannelState {
+ explicit ChannelState(s32 bind_id);
+ ChannelState(const ChannelState& state) = delete;
+ ChannelState& operator=(const ChannelState&) = delete;
+ ChannelState(ChannelState&& other) noexcept = default;
+ ChannelState& operator=(ChannelState&& other) noexcept = default;
+
+ void Init(Core::System& system, GPU& gpu);
+
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+ s32 bind_id = -1;
+ /// 3D engine
+ std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+ /// 2D engine
+ std::unique_ptr<Engines::Fermi2D> fermi_2d;
+ /// Compute engine
+ std::unique_ptr<Engines::KeplerCompute> kepler_compute;
+ /// DMA engine
+ std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+ /// Inline memory engine
+ std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+ std::shared_ptr<MemoryManager> memory_manager;
+
+ std::unique_ptr<DmaPusher> dma_pusher;
+
+ bool initialized{};
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
new file mode 100644
index 000000000..4ebeb6356
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "video_core/control/channel_state_cache.inc"
+
+namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+ : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+ gpu_memory{*channel_state.memory_manager} {}
+
+template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
new file mode 100644
index 000000000..584a0c26c
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+class KeplerCompute;
+} // namespace Engines
+
+class MemoryManager;
+
+namespace Control {
+struct ChannelState;
+}
+
+} // namespace Tegra
+
+namespace VideoCommon {
+
+class ChannelInfo {
+public:
+ ChannelInfo() = delete;
+ explicit ChannelInfo(Tegra::Control::ChannelState& state);
+ ChannelInfo(const ChannelInfo& state) = delete;
+ ChannelInfo& operator=(const ChannelInfo&) = delete;
+ ChannelInfo(ChannelInfo&& other) = default;
+ ChannelInfo& operator=(ChannelInfo&& other) = default;
+
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+};
+
+template <class P>
+class ChannelSetupCaches {
+public:
+ /// Operations for seting the channel of execution.
+ virtual ~ChannelSetupCaches();
+
+ /// Create channel state.
+ virtual void CreateChannel(Tegra::Control::ChannelState& channel);
+
+ /// Bind a channel for execution.
+ void BindToChannel(s32 id);
+
+ /// Erase channel's state.
+ void EraseChannel(s32 id);
+
+ Tegra::MemoryManager* GetFromID(size_t id) const {
+ std::unique_lock<std::mutex> lk(config_mutex);
+ const auto ref = address_spaces.find(id);
+ return ref->second.gpu_memory;
+ }
+
+ std::optional<size_t> getStorageID(size_t id) const {
+ std::unique_lock<std::mutex> lk(config_mutex);
+ const auto ref = address_spaces.find(id);
+ if (ref == address_spaces.end()) {
+ return std::nullopt;
+ }
+ return ref->second.storage_id;
+ }
+
+protected:
+ static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+ P* channel_state;
+ size_t current_channel_id{UNSET_CHANNEL};
+ size_t current_address_space{};
+ Tegra::Engines::Maxwell3D* maxwell3d;
+ Tegra::Engines::KeplerCompute* kepler_compute;
+ Tegra::MemoryManager* gpu_memory;
+
+ std::deque<P> channel_storage;
+ std::deque<size_t> free_channel_ids;
+ std::unordered_map<s32, size_t> channel_map;
+ std::vector<size_t> active_channel_ids;
+ struct AddresSpaceRef {
+ size_t ref_count;
+ size_t storage_id;
+ Tegra::MemoryManager* gpu_memory;
+ };
+ std::unordered_map<size_t, AddresSpaceRef> address_spaces;
+ mutable std::mutex config_mutex;
+
+ virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {}
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
new file mode 100644
index 000000000..460313893
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <algorithm>
+
+#include "video_core/control/channel_state.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCommon {
+
+template <class P>
+ChannelSetupCaches<P>::~ChannelSetupCaches() = default;
+
+template <class P>
+void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+ std::unique_lock<std::mutex> lk(config_mutex);
+ ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+ auto new_id = [this, &channel]() {
+ if (!free_channel_ids.empty()) {
+ auto id = free_channel_ids.front();
+ free_channel_ids.pop_front();
+ new (&channel_storage[id]) P(channel);
+ return id;
+ }
+ channel_storage.emplace_back(channel);
+ return channel_storage.size() - 1;
+ }();
+ channel_map.emplace(channel.bind_id, new_id);
+ if (current_channel_id != UNSET_CHANNEL) {
+ channel_state = &channel_storage[current_channel_id];
+ }
+ active_channel_ids.push_back(new_id);
+ auto as_it = address_spaces.find(channel.memory_manager->GetID());
+ if (as_it != address_spaces.end()) {
+ as_it->second.ref_count++;
+ return;
+ }
+ AddresSpaceRef new_gpu_mem_ref{
+ .ref_count = 1,
+ .storage_id = address_spaces.size(),
+ .gpu_memory = channel.memory_manager.get(),
+ };
+ address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref);
+ OnGPUASRegister(channel.memory_manager->GetID());
+}
+
+/// Bind a channel for execution.
+template <class P>
+void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+ std::unique_lock<std::mutex> lk(config_mutex);
+ auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ current_channel_id = it->second;
+ channel_state = &channel_storage[current_channel_id];
+ maxwell3d = &channel_state->maxwell3d;
+ kepler_compute = &channel_state->kepler_compute;
+ gpu_memory = &channel_state->gpu_memory;
+ current_address_space = gpu_memory->GetID();
+}
+
+/// Erase channel's channel_state.
+template <class P>
+void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+ std::unique_lock<std::mutex> lk(config_mutex);
+ const auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ const auto this_id = it->second;
+ free_channel_ids.push_back(this_id);
+ channel_map.erase(it);
+ if (this_id == current_channel_id) {
+ current_channel_id = UNSET_CHANNEL;
+ channel_state = nullptr;
+ maxwell3d = nullptr;
+ kepler_compute = nullptr;
+ gpu_memory = nullptr;
+ } else if (current_channel_id != UNSET_CHANNEL) {
+ channel_state = &channel_storage[current_channel_id];
+ }
+ active_channel_ids.erase(
+ std::find(active_channel_ids.begin(), active_channel_ids.end(), this_id));
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
new file mode 100644
index 000000000..f7cbe204e
--- /dev/null
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,32 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <memory>
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
+#include "video_core/gpu.h"
+
+namespace Tegra::Control {
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+Scheduler::~Scheduler() = default;
+
+void Scheduler::Push(s32 channel, CommandList&& entries) {
+ std::unique_lock lk(scheduling_guard);
+ auto it = channels.find(channel);
+ ASSERT(it != channels.end());
+ auto channel_state = it->second;
+ gpu.BindChannel(channel_state->bind_id);
+ channel_state->dma_pusher->Push(std::move(entries));
+ channel_state->dma_pusher->DispatchCalls();
+}
+
+void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
+ s32 channel = new_channel->bind_id;
+ std::unique_lock lk(scheduling_guard);
+ channels.emplace(channel, new_channel);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
new file mode 100644
index 000000000..44addf61c
--- /dev/null
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+
+class GPU;
+
+namespace Control {
+
+struct ChannelState;
+
+class Scheduler {
+public:
+ explicit Scheduler(GPU& gpu_);
+ ~Scheduler();
+
+ void Push(s32 channel, CommandList&& entries);
+
+ void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
+
+private:
+ std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+ std::mutex scheduling_guard;
+ GPU& gpu;
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h
index 4f1d29c04..d13ee622b 100644
--- a/src/video_core/delayed_destruction_ring.h
+++ b/src/video_core/delayed_destruction_ring.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index b1be065c3..9dc4341f0 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <cstddef>
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index d63ad5a35..736082d83 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8d28bd884..9835e3ac1 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -1,12 +1,10 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/cityhash.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
@@ -14,7 +12,10 @@
namespace Tegra {
-DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
+DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+ Control::ChannelState& channel_state_)
+ : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
+ *this, channel_state_} {}
DmaPusher::~DmaPusher() = default;
@@ -23,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
- gpu.SyncGuestHost();
-
dma_pushbuffer_subindex = 0;
dma_state.is_last_call = true;
@@ -35,7 +34,6 @@ void DmaPusher::DispatchCalls() {
}
}
gpu.FlushCommands();
- gpu.SyncGuestHost();
gpu.OnCommandListEnd();
}
@@ -78,11 +76,11 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
if (Settings::IsGPULevelHigh()) {
- gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ memory_manager.ReadBlock(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
} else {
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
}
}
for (std::size_t index = 0; index < command_headers.size();) {
@@ -156,7 +154,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
void DmaPusher::CallMethod(u32 argument) const {
if (dma_state.method < non_puller_methods) {
- gpu.CallMethod(GPU::MethodCall{
+ puller.CallPullerMethod(Engines::Puller::MethodCall{
dma_state.method,
argument,
dma_state.subchannel,
@@ -170,12 +168,16 @@ void DmaPusher::CallMethod(u32 argument) const {
void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
if (dma_state.method < non_puller_methods) {
- gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
- dma_state.method_count);
+ puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+ dma_state.method_count);
} else {
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
num_methods, dma_state.method_count);
}
}
+void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+ puller.BindRasterizer(rasterizer);
+}
+
} // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 19f286fa7..938f0f11c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -11,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
+#include "video_core/engines/puller.h"
namespace Core {
class System;
@@ -18,7 +18,12 @@ class System;
namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+
class GPU;
+class MemoryManager;
enum class SubmissionMode : u32 {
IncreasingOld = 0,
@@ -32,24 +37,32 @@ enum class SubmissionMode : u32 {
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
+// Register documentation:
+// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h
+//
+// Register Description (approx):
+// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
enum class BufferMethods : u32 {
BindObject = 0x0,
+ Illegal = 0x1,
Nop = 0x2,
SemaphoreAddressHigh = 0x4,
SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
+ SemaphoreSequencePayload = 0x6,
+ SemaphoreOperation = 0x7,
+ NonStallInterrupt = 0x8,
WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
+ MemOpA = 0xA,
+ MemOpB = 0xB,
+ MemOpC = 0xC,
+ MemOpD = 0xD,
RefCnt = 0x14,
SemaphoreAcquire = 0x1A,
SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- WaitForInterrupt = 0x1E,
- Unk7c = 0x1F,
+ SyncpointPayload = 0x1C,
+ SyncpointOperation = 0x1D,
+ WaitForIdle = 0x1E,
+ CRCCheck = 0x1F,
Yield = 0x20,
NonPullerMethods = 0x40,
};
@@ -103,7 +116,8 @@ struct CommandList final {
*/
class DmaPusher final {
public:
- explicit DmaPusher(Core::System& system_, GPU& gpu_);
+ explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+ Control::ChannelState& channel_state_);
~DmaPusher();
void Push(CommandList&& entries) {
@@ -116,6 +130,8 @@ public:
subchannels[subchannel_id] = engine;
}
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
private:
static constexpr u32 non_puller_methods = 0x40;
static constexpr u32 max_subchannels = 8;
@@ -149,6 +165,8 @@ private:
GPU& gpu;
Core::System& system;
+ MemoryManager& memory_manager;
+ mutable Engines::Puller puller;
};
} // namespace Tegra
diff --git a/src/video_core/engines/const_buffer_info.h b/src/video_core/engines/const_buffer_info.h
index d8f672462..abafcf090 100644
--- a/src/video_core/engines/const_buffer_info.h
+++ b/src/video_core/engines/const_buffer_info.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index c7ffd68c5..26cde8584 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 351b110fe..a34819234 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -1,9 +1,9 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
+#include "common/algorithm.h"
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
@@ -35,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
if (!is_last_call) {
return;
}
+ ProcessData(inner_buffer);
+}
+
+void State::ProcessData(const u32* data, size_t num_data) {
+ std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
+ ProcessData(read_buffer);
+}
+
+void State::ProcessData(std::span<const u8> read_buffer) {
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
- rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
+ if (regs.line_count == 1) {
+ rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
+ } else {
+ for (u32 line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
+ memory_manager.WriteBlockUnsafe(
+ dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
+ regs.line_length_in);
+ }
+ memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
+ }
} else {
- UNIMPLEMENTED_IF(regs.dest.z != 0);
- UNIMPLEMENTED_IF(regs.dest.depth != 1);
- UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
- UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
+ u32 width = regs.dest.width;
+ u32 x_elements = regs.line_length_in;
+ u32 x_offset = regs.dest.x;
+ const u32 bpp_shift = Common::FoldRight(
+ 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+ width, x_elements, x_offset, static_cast<u32>(address));
+ width >>= bpp_shift;
+ x_elements >>= bpp_shift;
+ x_offset >>= bpp_shift;
+ const u32 bytes_per_pixel = 1U << bpp_shift;
const std::size_t dst_size = Tegra::Texture::CalculateSize(
- true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
+ true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
+ regs.dest.BlockHeight(), regs.dest.BlockDepth());
tmp_buffer.resize(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
- Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
- regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
- tmp_buffer.data());
+ Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
+ regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
+ x_elements, regs.line_count, regs.dest.BlockHeight(),
+ regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index c9c5ec8c3..f08f6e36a 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -1,9 +1,9 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
+#include <span>
#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -34,7 +34,7 @@ struct Registers {
u32 width;
u32 height;
u32 depth;
- u32 z;
+ u32 layer;
u32 x;
u32 y;
@@ -63,11 +63,14 @@ public:
void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
+ void ProcessData(const u32* data, size_t num_data);
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
private:
+ void ProcessData(std::span<const u8> read_buffer);
+
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index f26530ede..453e0fb01 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index d76c5ed56..1229aa35b 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,7 +8,6 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "common/math_util.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 5a1c12076..7c50bdbe0 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <bitset>
#include "common/assert.h"
@@ -10,7 +9,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -38,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
}
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
upload_state.ProcessData(method_argument, is_last_call);
- if (is_last_call) {
- }
break;
}
case KEPLER_COMPUTE_REG_INDEX(launch):
@@ -52,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ switch (method) {
+ case KEPLER_COMPUTE_REG_INDEX(data_upload):
+ upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ return;
+ default:
+ for (std::size_t i = 0; i < amount; i++) {
+ CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ }
+ break;
}
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index f8b8d06ac..aab309ecc 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -12,7 +11,6 @@
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
namespace Core {
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 8aed16caa..a3fbab1e5 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
@@ -9,8 +8,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
-#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -36,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
}
case KEPLERMEMORY_REG_INDEX(data): {
upload_state.ProcessData(method_argument, is_last_call);
- if (is_last_call) {
- }
break;
}
}
@@ -45,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ switch (method) {
+ case KEPLERMEMORY_REG_INDEX(data):
+ upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ return;
+ default:
+ for (std::size_t i = 0; i < amount; i++) {
+ CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ }
+ break;
}
}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 949e2fae1..5fe7489f0 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -1,18 +1,15 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
-#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/gpu.h"
namespace Core {
class System;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..3c6e44a25 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -1,12 +1,12 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <optional>
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -173,6 +173,8 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(shadow_ram_control):
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
return;
+ case MAXWELL3D_REG_INDEX(macros.upload_address):
+ return macro_engine->ClearCode(regs.macros.upload_address);
case MAXWELL3D_REG_INDEX(macros.data):
return macro_engine->AddCode(regs.macros.upload_address, argument);
case MAXWELL3D_REG_INDEX(macros.bind):
@@ -195,7 +197,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- return StartCBData(method);
+ return ProcessCBData(argument);
case MAXWELL3D_REG_INDEX(cb_bind[0]):
return ProcessCBBind(0);
case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +210,21 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index):
+ regs.index_array.count = regs.small_index.count;
+ regs.index_array.first = regs.small_index.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index_2):
+ regs.index_array.count = regs.small_index_2.count;
+ regs.index_array.first = regs.small_index_2.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ // a macro calls this one over and over, should it increase instancing?
+ // Used by Hades and likely other Vulkan games.
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(topology_override):
+ use_topology_override = true;
+ return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -222,11 +239,12 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return upload_state.ProcessExec(regs.exec_upload.linear != 0);
case MAXWELL3D_REG_INDEX(data_upload):
upload_state.ProcessData(argument, is_last_call);
- if (is_last_call) {
- }
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier();
+ case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
+ rasterizer->InvalidateGPUCache();
+ return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
}
@@ -248,14 +266,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- if (method == cb_data_state.current) {
- regs.reg_array[method] = method_argument;
- ProcessCBData(method_argument);
- return;
- } else if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
-
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -302,8 +312,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- ProcessCBMultiData(method, base_start, amount);
+ ProcessCBMultiData(base_start, amount);
break;
+ case MAXWELL3D_REG_INDEX(data_upload):
+ upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ return;
default:
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
@@ -360,6 +373,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
+void Maxwell3D::ProcessTopologyOverride() {
+ using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+ using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+ PrimitiveTopology topology{};
+
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ topology = regs.draw.topology;
+ break;
+ case PrimitiveTopologyOverride::Points:
+ topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
+
+ if (use_topology_override) {
+ regs.draw.topology.Assign(topology);
+ }
+}
+
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +412,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -409,18 +453,10 @@ void Maxwell3D::ProcessFirmwareCall4() {
}
void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
- struct LongQueryResult {
- u64_le value;
- u64_le timestamp;
- };
- static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
const GPUVAddr sequence_address{regs.query.QueryAddress()};
if (long_query) {
- // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
- // GPU, this command may actually take a while to complete in real hardware due to GPU
- // wait queues.
- LongQueryResult query_result{payload, system.GPU().GetTicks()};
- memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks());
+ memory_manager.Write<u64>(sequence_address, payload);
} else {
memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
}
@@ -434,10 +470,25 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
- if (regs.query.query_get.fence == 1) {
- rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+ if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) {
+ const GPUVAddr sequence_address{regs.query.QueryAddress()};
+ const u32 payload = regs.query.query_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u32>(sequence_address, payload);
+ });
+ rasterizer->SignalFence(std::move(operation));
} else {
- StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ const GPUVAddr sequence_address{regs.query.QueryAddress()};
+ const u32 payload = regs.query.query_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks());
+ memory_manager.Write<u64>(sequence_address, payload);
+ });
+ rasterizer->SyncOperation(std::move(operation));
}
break;
case Regs::QueryOperation::Acquire:
@@ -529,6 +580,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
@@ -555,8 +608,8 @@ void Maxwell3D::DrawArrays() {
std::optional<u64> Maxwell3D::GetQueryResult() {
switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- return 0;
+ case Regs::QuerySelect::Payload:
+ return regs.query.query_sequence;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
@@ -587,46 +640,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
-void Maxwell3D::ProcessCBData(u32 value) {
- const u32 id = cb_data_state.id;
- cb_data_state.buffer[id][cb_data_state.counter] = value;
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
- cb_data_state.counter++;
-}
-
-void Maxwell3D::StartCBData(u32 method) {
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
-}
-
-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
- if (cb_data_state.current != method) {
- if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- }
- const std::size_t id = cb_data_state.id;
- const std::size_t size = amount;
- std::size_t i = 0;
- for (; i < size; i++) {
- cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
- cb_data_state.counter++;
- }
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
-void Maxwell3D::FinishCBData() {
+void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
@@ -634,14 +648,16 @@ void Maxwell3D::FinishCBData() {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- const GPUVAddr address{buffer_address + cb_data_state.start_pos};
- const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
+ const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ const size_t copy_size = amount * sizeof(u32);
+ memory_manager.WriteBlock(address, start_base, copy_size);
- const u32 id = cb_data_state.id;
- memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
+}
- cb_data_state.id = null_cb_data;
- cb_data_state.current = null_cb_data;
+void Maxwell3D::ProcessCBData(u32 value) {
+ ProcessCBMultiData(&value, 1);
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..5f9eb208c 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -10,7 +9,6 @@
#include <limits>
#include <optional>
#include <type_traits>
-#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -95,7 +93,7 @@ public:
};
enum class QuerySelect : u32 {
- Zero = 0,
+ Payload = 0,
TimeElapsed = 2,
TransformFeedbackPrimitivesGenerated = 11,
PrimitivesGenerated = 18,
@@ -204,7 +202,7 @@ public:
case Size::Size_11_11_10:
return 3;
default:
- UNREACHABLE();
+ ASSERT(false);
return 1;
}
}
@@ -240,7 +238,7 @@ public:
case Size::Size_11_11_10:
return 4;
default:
- UNREACHABLE();
+ ASSERT(false);
return 1;
}
}
@@ -276,7 +274,7 @@ public:
case Size::Size_11_11_10:
return "11_11_10";
default:
- UNREACHABLE();
+ ASSERT(false);
return {};
}
}
@@ -298,7 +296,7 @@ public:
case Type::Float:
return "FLOAT";
}
- UNREACHABLE();
+ ASSERT(false);
return {};
}
@@ -338,7 +336,7 @@ public:
case 3:
return {x3, y3};
default:
- UNREACHABLE();
+ ASSERT(false);
return {0, 0};
}
}
@@ -367,6 +365,22 @@ public:
Patches = 0xe,
};
+ // Constants as from NVC0_3D_UNK1970_D3D
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+ enum class PrimitiveTopologyOverride : u32 {
+ None = 0x0,
+ Points = 0x1,
+ Lines = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1179,7 +1193,7 @@ public:
case IndexFormat::UnsignedInt:
return 4;
}
- UNREACHABLE();
+ ASSERT(false);
return 1;
}
@@ -1200,7 +1214,17 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS_NOINIT(0x7);
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index;
+
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index_2;
+
+ INSERT_PADDING_WORDS_NOINIT(0x5);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1268,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_PADDING_WORDS_NOINIT(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ PrimitiveTopologyOverride topology_override;
+
+ INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1520,10 +1548,8 @@ private:
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
- void StartCBData(u32 method);
void ProcessCBData(u32 value);
- void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
- void FinishCBData();
+ void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1557,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
+ /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+ void ProcessTopologyOverride();
+
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1555,20 +1584,10 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
std::unique_ptr<MacroEngine> macro_engine;
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct CBDataState {
- static constexpr size_t inline_size = 0x4000;
- std::array<std::array<u32, inline_size>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- };
- CBDataState cb_data_state;
-
Upload::State upload_state;
bool execute_on{true};
+ bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1704,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1714,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..3909d36c1 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -1,7 +1,7 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/algorithm.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -53,18 +53,15 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
- ASSERT(regs.dst_params.origin.x == 0);
- ASSERT(regs.dst_params.origin.y == 0);
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
- UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+ UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
}
@@ -79,6 +76,7 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -122,22 +120,40 @@ void MaxwellDMA::CopyPitchToPitch() {
void MaxwellDMA::CopyBlockLinearToPitch() {
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
- UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
+ const bool is_remapping = regs.launch_dma.remap_enable != 0;
+
// Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
- if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+ if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
regs.src_params.height > GOB_SIZE_Y) {
FastCopyBlockLinearToPitch();
return;
}
// Deswizzle the input and copy it over.
- UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
- const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const Parameters& src_params = regs.src_params;
- const u32 width = src_params.width;
+
+ const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+ const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
+ const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+ u32 width = src_params.width;
+ u32 x_elements = regs.line_length_in;
+ u32 x_offset = src_params.origin.x;
+ u32 bpp_shift = 0U;
+ if (!is_remapping) {
+ bpp_shift = Common::FoldRight(
+ 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+ width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
+ width >>= bpp_shift;
+ x_elements >>= bpp_shift;
+ x_offset >>= bpp_shift;
+ }
+
+ const u32 bytes_per_pixel = base_bpp << bpp_shift;
const u32 height = src_params.height;
const u32 depth = src_params.depth;
const u32 block_height = src_params.block_size.height;
@@ -155,29 +171,45 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
- UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
- block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
- read_buffer.data());
+ UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+ src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+ regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
- UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
+ UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
+
+ const bool is_remapping = regs.launch_dma.remap_enable != 0;
+ const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+ const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
const auto& dst_params = regs.dst_params;
- const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
- const u32 width = dst_params.width;
+
+ const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+ u32 width = dst_params.width;
+ u32 x_elements = regs.line_length_in;
+ u32 x_offset = dst_params.origin.x;
+ u32 bpp_shift = 0U;
+ if (!is_remapping) {
+ bpp_shift = Common::FoldRight(
+ 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+ width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
+ width >>= bpp_shift;
+ x_elements >>= bpp_shift;
+ x_offset >>= bpp_shift;
+ }
+
+ const u32 bytes_per_pixel = base_bpp << bpp_shift;
const u32 height = dst_params.height;
const u32 depth = dst_params.depth;
const u32 block_height = dst_params.block_size.height;
const u32 block_depth = dst_params.block_size.depth;
const size_t dst_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t dst_layer_size =
- CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
-
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
if (read_buffer.size() < src_size) {
@@ -187,31 +219,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
write_buffer.resize(dst_size);
}
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
} else {
- memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
// If the input is linear and the output is tiled, swizzle the input and copy it over.
- if (regs.dst_params.block_size.depth > 0) {
- ASSERT(dst_params.layer == 0);
- SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
- bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
- dst_params.origin.y, write_buffer.data(), read_buffer.data());
- } else {
- SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
- write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
- block_height, dst_params.origin.x, dst_params.origin.y);
- }
+ SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+ dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+ regs.pitch_in);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
- const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
+ const u32 bytes_per_pixel = 1U;
const size_t src_size = GOB_SIZE;
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
u32 pos_x = regs.src_params.origin.x;
@@ -237,11 +261,38 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
- UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
- bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
- write_buffer.data(), read_buffer.data());
+ UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
+ regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
+ regs.src_params.block_size.height, regs.src_params.block_size.depth,
+ regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::ReleaseSemaphore() {
+ const auto type = regs.launch_dma.semaphore_type;
+ const GPUVAddr address = regs.semaphore.address;
+ const u32 payload = regs.semaphore.payload;
+ switch (type) {
+ case LaunchDMA::SemaphoreType::NONE:
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
+ std::function<void()> operation(
+ [this, address, payload] { memory_manager.Write<u32>(address, payload); });
+ rasterizer->SignalFence(std::move(operation));
+ break;
+ }
+ case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
+ std::function<void()> operation([this, address, payload] {
+ memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks());
+ memory_manager.Write<u64>(address, payload);
+ });
+ rasterizer->SignalFence(std::move(operation));
+ break;
+ }
+ default:
+ ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ }
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..bc48320ce 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,10 +7,8 @@
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
-#include "video_core/gpu.h"
namespace Core {
class System;
@@ -192,10 +189,16 @@ public:
BitField<4, 3, Swizzle> dst_y;
BitField<8, 3, Swizzle> dst_z;
BitField<12, 3, Swizzle> dst_w;
+ BitField<0, 12, u32> dst_components_raw;
BitField<16, 2, u32> component_size_minus_one;
BitField<20, 2, u32> num_src_components_minus_one;
BitField<24, 2, u32> num_dst_components_minus_one;
};
+
+ Swizzle GetComponent(size_t i) const {
+ const u32 raw = dst_components_raw;
+ return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
+ }
};
static_assert(sizeof(RemapConst) == 12);
@@ -224,6 +227,8 @@ private:
void FastCopyBlockLinearToPitch();
+ void ReleaseSemaphore();
+
Core::System& system;
MemoryManager& memory_manager;
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
new file mode 100644
index 000000000..cca890792
--- /dev/null
+++ b/src/video_core/engines/puller.cpp
@@ -0,0 +1,306 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
+ Control::ChannelState& channel_state_)
+ : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
+ channel_state_} {}
+
+Puller::~Puller() = default;
+
+void Puller::ProcessBindMethod(const MethodCall& method_call) {
+ // Bind the current subchannel to the desired engine id.
+ LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+ method_call.argument);
+ const auto engine_id = static_cast<EngineID>(method_call.argument);
+ bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+ switch (engine_id) {
+ case EngineID::FERMI_TWOD_A:
+ dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_B:
+ dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+ }
+}
+
+void Puller::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case Puller::FenceOperation::Acquire:
+ // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+ // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ rasterizer->ReleaseFences();
+ break;
+ case Puller::FenceOperation::Increment:
+ rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+ }
+}
+
+void Puller::ProcessSemaphoreTriggerMethod() {
+ const auto semaphoreOperationMask = 0xF;
+ const auto op =
+ static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+ if (op == GpuSemaphoreOperation::WriteLong) {
+ const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+ const u32 payload = regs.semaphore_sequence;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
+ memory_manager.Write<u64>(sequence_address, payload);
+ });
+ rasterizer->SignalFence(std::move(operation));
+ } else {
+ do {
+ const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+ regs.acquire_source = true;
+ regs.acquire_value = regs.semaphore_sequence;
+ if (op == GpuSemaphoreOperation::AcquireEqual) {
+ regs.acquire_active = true;
+ regs.acquire_mode = false;
+ if (word != regs.acquire_value) {
+ rasterizer->ReleaseFences();
+ continue;
+ }
+ } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+ regs.acquire_active = true;
+ regs.acquire_mode = true;
+ if (word < regs.acquire_value) {
+ rasterizer->ReleaseFences();
+ continue;
+ }
+ } else if (op == GpuSemaphoreOperation::AcquireMask) {
+ if (word && regs.semaphore_sequence == 0) {
+ rasterizer->ReleaseFences();
+ continue;
+ }
+ } else {
+ LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+ }
+ } while (false);
+ }
+}
+
+void Puller::ProcessSemaphoreRelease() {
+ const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+ const u32 payload = regs.semaphore_release;
+ std::function<void()> operation([this, sequence_address, payload] {
+ memory_manager.Write<u32>(sequence_address, payload);
+ });
+ rasterizer->SyncOperation(std::move(operation));
+}
+
+void Puller::ProcessSemaphoreAcquire() {
+ u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+ const auto value = regs.semaphore_acquire;
+ while (word != value) {
+ regs.acquire_active = true;
+ regs.acquire_value = value;
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ rasterizer->ReleaseFences();
+ word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+ // TODO(kemathe73) figure out how to do the acquire_timeout
+ regs.acquire_mode = false;
+ regs.acquire_source = false;
+ }
+}
+
+/// Calls a GPU puller method.
+void Puller::CallPullerMethod(const MethodCall& method_call) {
+ regs.reg_array[method_call.method] = method_call.argument;
+ const auto method = static_cast<BufferMethods>(method_call.method);
+
+ switch (method) {
+ case BufferMethods::BindObject: {
+ ProcessBindMethod(method_call);
+ break;
+ }
+ case BufferMethods::Nop:
+ case BufferMethods::SemaphoreAddressHigh:
+ case BufferMethods::SemaphoreAddressLow:
+ case BufferMethods::SemaphoreSequencePayload:
+ case BufferMethods::SyncpointPayload:
+ break;
+ case BufferMethods::WrcacheFlush:
+ case BufferMethods::RefCnt:
+ rasterizer->SignalReference();
+ break;
+ case BufferMethods::SyncpointOperation:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForIdle:
+ rasterizer->WaitForIdle();
+ break;
+ case BufferMethods::SemaphoreOperation: {
+ ProcessSemaphoreTriggerMethod();
+ break;
+ }
+ case BufferMethods::NonStallInterrupt: {
+ LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
+ break;
+ }
+ case BufferMethods::MemOpA: {
+ LOG_ERROR(HW_GPU, "Memory Operation A");
+ break;
+ }
+ case BufferMethods::MemOpB: {
+ // Implement this better.
+ rasterizer->InvalidateGPUCache();
+ break;
+ }
+ case BufferMethods::MemOpC:
+ case BufferMethods::MemOpD: {
+ LOG_ERROR(HW_GPU, "Memory Operation C,D");
+ break;
+ }
+ case BufferMethods::SemaphoreAcquire: {
+ ProcessSemaphoreAcquire();
+ break;
+ }
+ case BufferMethods::SemaphoreRelease: {
+ ProcessSemaphoreRelease();
+ break;
+ }
+ case BufferMethods::Yield: {
+ // TODO(Kmather73): Research and implement this method.
+ LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+ break;
+ }
+ default:
+ LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
+ break;
+ }
+}
+
+/// Calls a GPU engine method.
+void Puller::CallEngineMethod(const MethodCall& method_call) {
+ const EngineID engine = bound_engines[method_call.subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_B:
+ channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
+ }
+}
+
+/// Calls a GPU engine multivalue method.
+void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::MAXWELL_B:
+ channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
+ }
+}
+
+/// Calls a GPU method.
+void Puller::CallMethod(const MethodCall& method_call) {
+ LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
+ method_call.subchannel);
+
+ ASSERT(method_call.subchannel < bound_engines.size());
+
+ if (ExecuteMethodOnEngine(method_call.method)) {
+ CallEngineMethod(method_call);
+ } else {
+ CallPullerMethod(method_call);
+ }
+}
+
+/// Calls a GPU multivalue method.
+void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+ ASSERT(subchannel < bound_engines.size());
+
+ if (ExecuteMethodOnEngine(method)) {
+ CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+ } else {
+ for (std::size_t i = 0; i < amount; i++) {
+ CallPullerMethod(MethodCall{
+ method,
+ base_start[i],
+ subchannel,
+ methods_pending - static_cast<u32>(i),
+ });
+ }
+ }
+}
+
+void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
+/// Determines where the method should be executed.
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
+ const auto buffer_method = static_cast<BufferMethods>(method);
+ return buffer_method >= BufferMethods::NonPullerMethods;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
new file mode 100644
index 000000000..d4175ee94
--- /dev/null
+++ b/src/video_core/engines/puller.h
@@ -0,0 +1,177 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+class DmaPusher;
+
+enum class EngineID {
+ FERMI_TWOD_A = 0x902D, // 2D Engine
+ MAXWELL_B = 0xB197, // 3D Engine
+ KEPLER_COMPUTE_B = 0xB1C0,
+ KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+ MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class Puller final {
+public:
+ struct MethodCall {
+ u32 method{};
+ u32 argument{};
+ u32 subchannel{};
+ u32 method_count{};
+
+ explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+ : method(method_), argument(argument_), subchannel(subchannel_),
+ method_count(method_count_) {}
+
+ [[nodiscard]] bool IsLastCall() const {
+ return method_count <= 1;
+ }
+ };
+
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+ };
+
+ explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
+ Control::ChannelState& channel_state);
+ ~Puller();
+
+ void CallMethod(const MethodCall& method_call);
+
+ void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+ void CallPullerMethod(const MethodCall& method_call);
+
+ void CallEngineMethod(const MethodCall& method_call);
+
+ void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
+private:
+ Tegra::GPU& gpu;
+
+ MemoryManager& memory_manager;
+ DmaPusher& dma_pusher;
+ Control::ChannelState& channel_state;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ static constexpr std::size_t NUM_REGS = 0x800;
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x40;
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS_NOINIT(0x4);
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ [[nodiscard]] GPUVAddr SemaphoreAddress() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } semaphore_address;
+
+ u32 semaphore_sequence;
+ u32 semaphore_trigger;
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ // The pusher and the puller share the reference counter, the pusher only has read
+ // access
+ u32 reference_count;
+ INSERT_PADDING_WORDS_NOINIT(0x5);
+
+ u32 semaphore_acquire;
+ u32 semaphore_release;
+ u32 fence_value;
+ FenceAction fence_action;
+ INSERT_PADDING_WORDS_NOINIT(0xE2);
+
+ // Puller state
+ u32 acquire_mode;
+ u32 acquire_source;
+ u32 acquire_active;
+ u32 acquire_timeout;
+ u32 acquire_value;
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessSemaphoreAcquire();
+ void ProcessSemaphoreRelease();
+ void ProcessSemaphoreTriggerMethod();
+ [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+
+ /// Mapping of command subchannels to their bound engine ids
+ std::array<EngineID, 8> bound_engines{};
+
+ enum class GpuSemaphoreOperation {
+ AcquireEqual = 0x1,
+ WriteLong = 0x2,
+ AcquireGequal = 0x4,
+ AcquireMask = 0x8,
+ };
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_REG_POSITION(semaphore_address, 0x4);
+ ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+ ASSERT_REG_POSITION(reference_count, 0x14);
+ ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+ ASSERT_REG_POSITION(semaphore_release, 0x1B);
+ ASSERT_REG_POSITION(fence_value, 0x1C);
+ ASSERT_REG_POSITION(fence_action, 0x1D);
+
+ ASSERT_REG_POSITION(acquire_mode, 0x100);
+ ASSERT_REG_POSITION(acquire_source, 0x101);
+ ASSERT_REG_POSITION(acquire_active, 0x102);
+ ASSERT_REG_POSITION(acquire_timeout, 0x103);
+ ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 34dc6c596..c390ac91b 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -1,46 +1,27 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
#include <queue>
#include "common/common_types.h"
-#include "common/settings.h"
-#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
class FenceBase {
public:
- explicit FenceBase(u32 payload_, bool is_stubbed_)
- : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
-
- explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
- : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
-
- GPUVAddr GetAddress() const {
- return address;
- }
-
- u32 GetPayload() const {
- return payload;
- }
-
- bool IsSemaphore() const {
- return is_semaphore;
- }
-
-private:
- GPUVAddr address;
- u32 payload;
- bool is_semaphore;
+ explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
protected:
bool is_stubbed;
@@ -60,30 +41,28 @@ public:
buffer_cache.AccumulateFlushes();
}
- void SignalSemaphore(GPUVAddr addr, u32 value) {
+ void SyncOperation(std::function<void()>&& func) {
+ uncommitted_operations.emplace_back(std::move(func));
+ }
+
+ void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
- TFence new_fence = CreateFence(addr, value, !should_flush);
+ uncommitted_operations.emplace_back(std::move(func));
+ CommitOperations();
+ TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
- rasterizer.SyncGuestHost();
}
void SignalSyncPoint(u32 value) {
- TryReleasePendingFences();
- const bool should_flush = ShouldFlush();
- CommitAsyncFlushes();
- TFence new_fence = CreateFence(value, !should_flush);
- fences.push(new_fence);
- QueueFence(new_fence);
- if (should_flush) {
- rasterizer.FlushCommands();
- }
- rasterizer.SyncGuestHost();
+ syncpoint_manager.IncrementGuest(value);
+ std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
+ SignalFence(std::move(func));
}
void WaitPendingFences() {
@@ -93,11 +72,10 @@ public:
WaitFence(current_fence);
}
PopAsyncFlushes();
- if (current_fence->IsSemaphore()) {
- gpu_memory.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
- } else {
- gpu.IncrementSyncPoint(current_fence->GetPayload());
+ auto operations = std::move(pending_operations.front());
+ pending_operations.pop_front();
+ for (auto& operation : operations) {
+ operation();
}
PopFence();
}
@@ -107,16 +85,14 @@ protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TQueryCache& query_cache_)
- : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+ : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
virtual ~FenceManager() = default;
- /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
+ /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
- virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
- /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
- virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+ virtual TFence CreateFence(bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -126,7 +102,7 @@ protected:
VideoCore::RasterizerInterface& rasterizer;
Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
+ Tegra::Host1x::SyncpointManager& syncpoint_manager;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
@@ -139,11 +115,10 @@ private:
return;
}
PopAsyncFlushes();
- if (current_fence->IsSemaphore()) {
- gpu_memory.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
- } else {
- gpu.IncrementSyncPoint(current_fence->GetPayload());
+ auto operations = std::move(pending_operations.front());
+ pending_operations.pop_front();
+ for (auto& operation : operations) {
+ operation();
}
PopFence();
}
@@ -162,16 +137,20 @@ private:
}
void PopAsyncFlushes() {
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- texture_cache.PopAsyncFlushes();
- buffer_cache.PopAsyncFlushes();
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.PopAsyncFlushes();
+ buffer_cache.PopAsyncFlushes();
+ }
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- texture_cache.CommitAsyncFlushes();
- buffer_cache.CommitAsyncFlushes();
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.CommitAsyncFlushes();
+ buffer_cache.CommitAsyncFlushes();
+ }
query_cache.CommitAsyncFlushes();
}
@@ -180,7 +159,13 @@ private:
fences.pop();
}
+ void CommitOperations() {
+ pending_operations.emplace_back(std::move(uncommitted_operations));
+ }
+
std::queue<TFence> fences;
+ std::deque<std::function<void()>> uncommitted_operations;
+ std::deque<std::deque<std::function<void()>>> pending_operations;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index b1d455e30..d93f5a37f 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -1,46 +1,26 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_types.h"
#include "common/math_util.h"
+#include "core/hle/service/nvflinger/buffer_transform_flags.h"
+#include "core/hle/service/nvflinger/pixel_format.h"
namespace Tegra {
+
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
- enum class PixelFormat : u32 {
- A8B8G8R8_UNORM = 1,
- RGB565_UNORM = 4,
- B8G8R8A8_UNORM = 5,
- };
-
- enum class TransformFlags : u32 {
- /// No transform flags are set
- Unset = 0x00,
- /// Flip source image horizontally (around the vertical axis)
- FlipH = 0x01,
- /// Flip source image vertically (around the horizontal axis)
- FlipV = 0x02,
- /// Rotate source image 90 degrees clockwise
- Rotate90 = 0x04,
- /// Rotate source image 180 degrees
- Rotate180 = 0x03,
- /// Rotate source image 270 degrees clockwise
- Rotate270 = 0x07,
- };
-
VAddr address{};
u32 offset{};
u32 width{};
u32 height{};
u32 stride{};
- PixelFormat pixel_format{};
-
- TransformFlags transform_flags{};
+ Service::android::PixelFormat pixel_format{};
+ Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect;
};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ba9ba082f..28b38273e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <atomic>
@@ -15,10 +14,11 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
-#include "core/hardware_interrupt_manager.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/perf_stats.h"
#include "video_core/cdma_pusher.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -27,75 +27,64 @@
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
#include "video_core/shader_notify.h"
namespace Tegra {
-MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-
struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
- : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
- system)},
- dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
- maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
- fermi_2d{std::make_unique<Engines::Fermi2D>()},
- kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
- maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
- kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
- gpu_thread{system_, is_async_} {}
+ gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
~Impl() = default;
- /// Binds a renderer to the GPU.
- void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
- renderer = std::move(renderer_);
- rasterizer = renderer->ReadRasterizer();
-
- memory_manager->BindRasterizer(rasterizer);
- maxwell_3d->BindRasterizer(rasterizer);
- fermi_2d->BindRasterizer(rasterizer);
- kepler_compute->BindRasterizer(rasterizer);
- kepler_memory->BindRasterizer(rasterizer);
- maxwell_dma->BindRasterizer(rasterizer);
+ std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
+ auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
+ channels.emplace(channel_id, channel_state);
+ scheduler->DeclareChannel(channel_state);
+ return channel_state;
}
- /// Calls a GPU method.
- void CallMethod(const GPU::MethodCall& method_call) {
- LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
- method_call.subchannel);
+ void BindChannel(s32 channel_id) {
+ if (bound_channel == channel_id) {
+ return;
+ }
+ auto it = channels.find(channel_id);
+ ASSERT(it != channels.end());
+ bound_channel = channel_id;
+ current_channel = it->second.get();
- ASSERT(method_call.subchannel < bound_engines.size());
+ rasterizer->BindChannel(*current_channel);
+ }
- if (ExecuteMethodOnEngine(method_call.method)) {
- CallEngineMethod(method_call);
- } else {
- CallPullerMethod(method_call);
- }
+ std::shared_ptr<Control::ChannelState> AllocateChannel() {
+ return CreateChannel(new_channel_id++);
}
- /// Calls a GPU multivalue method.
- void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+ void InitChannel(Control::ChannelState& to_init) {
+ to_init.Init(system, gpu);
+ to_init.BindRasterizer(rasterizer);
+ rasterizer->InitializeChannel(to_init);
+ }
- ASSERT(subchannel < bound_engines.size());
+ void InitAddressSpace(Tegra::MemoryManager& memory_manager) {
+ memory_manager.BindRasterizer(rasterizer);
+ }
- if (ExecuteMethodOnEngine(method)) {
- CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
- } else {
- for (std::size_t i = 0; i < amount; i++) {
- CallPullerMethod(GPU::MethodCall{
- method,
- base_start[i],
- subchannel,
- methods_pending - static_cast<u32>(i),
- });
- }
- }
+ void ReleaseChannel(Control::ChannelState& to_release) {
+ UNIMPLEMENTED();
+ }
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+ rasterizer = renderer->ReadRasterizer();
+ host1x.MemoryManager().BindRasterizer(rasterizer);
}
/// Flush all current written commands into the host GPU for execution.
@@ -104,85 +93,82 @@ struct GPU::Impl {
}
/// Synchronizes CPU writes with Host GPU memory.
- void SyncGuestHost() {
- rasterizer->SyncGuestHost();
+ void InvalidateGPUCache() {
+ rasterizer->InvalidateGPUCache();
}
/// Signal the ending of command list.
void OnCommandListEnd() {
- if (is_async) {
- // This command only applies to asynchronous GPU mode
- gpu_thread.OnCommandListEnd();
- }
+ gpu_thread.OnCommandListEnd();
}
/// Request a host GPU memory flush from the CPU.
- [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
- std::unique_lock lck{flush_request_mutex};
- const u64 fence = ++last_flush_fence;
- flush_requests.emplace_back(fence, addr, size);
+ template <typename Func>
+ [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
+ std::unique_lock lck{sync_request_mutex};
+ const u64 fence = ++last_sync_fence;
+ sync_requests.emplace_back(action);
return fence;
}
/// Obtains current flush request fence id.
- [[nodiscard]] u64 CurrentFlushRequestFence() const {
- return current_flush_fence.load(std::memory_order_relaxed);
+ [[nodiscard]] u64 CurrentSyncRequestFence() const {
+ return current_sync_fence.load(std::memory_order_relaxed);
+ }
+
+ void WaitForSyncOperation(const u64 fence) {
+ std::unique_lock lck{sync_request_mutex};
+ sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
}
/// Tick pending requests within the GPU.
void TickWork() {
- std::unique_lock lck{flush_request_mutex};
- while (!flush_requests.empty()) {
- auto& request = flush_requests.front();
- const u64 fence = request.fence;
- const VAddr addr = request.addr;
- const std::size_t size = request.size;
- flush_requests.pop_front();
- flush_request_mutex.unlock();
- rasterizer->FlushRegion(addr, size);
- current_flush_fence.store(fence);
- flush_request_mutex.lock();
+ std::unique_lock lck{sync_request_mutex};
+ while (!sync_requests.empty()) {
+ auto request = std::move(sync_requests.front());
+ sync_requests.pop_front();
+ sync_request_mutex.unlock();
+ request();
+ current_sync_fence.fetch_add(1, std::memory_order_release);
+ sync_request_mutex.lock();
+ sync_request_cv.notify_all();
}
}
/// Returns a reference to the Maxwell3D GPU engine.
[[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
- return *maxwell_3d;
+ ASSERT(current_channel);
+ return *current_channel->maxwell_3d;
}
/// Returns a const reference to the Maxwell3D GPU engine.
[[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
- return *maxwell_3d;
+ ASSERT(current_channel);
+ return *current_channel->maxwell_3d;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
- return *kepler_compute;
+ ASSERT(current_channel);
+ return *current_channel->kepler_compute;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
- return *kepler_compute;
- }
-
- /// Returns a reference to the GPU memory manager.
- [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
- return *memory_manager;
- }
-
- /// Returns a const reference to the GPU memory manager.
- [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
- return *memory_manager;
+ ASSERT(current_channel);
+ return *current_channel->kepler_compute;
}
/// Returns a reference to the GPU DMA pusher.
[[nodiscard]] Tegra::DmaPusher& DmaPusher() {
- return *dma_pusher;
+ ASSERT(current_channel);
+ return *current_channel->dma_pusher;
}
/// Returns a const reference to the GPU DMA pusher.
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
- return *dma_pusher;
+ ASSERT(current_channel);
+ return *current_channel->dma_pusher;
}
/// Returns a reference to the underlying renderer.
@@ -205,77 +191,6 @@ struct GPU::Impl {
return *shader_notify;
}
- /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
- void WaitFence(u32 syncpoint_id, u32 value) {
- // Synced GPU, is always in sync
- if (!is_async) {
- return;
- }
- if (syncpoint_id == UINT32_MAX) {
- // TODO: Research what this does.
- LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
- return;
- }
- MICROPROFILE_SCOPE(GPU_wait);
- std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=, this] {
- if (shutting_down.load(std::memory_order_relaxed)) {
- // We're shutting down, ensure no threads continue to wait for the next syncpoint
- return true;
- }
- return syncpoints.at(syncpoint_id).load() >= value;
- });
- }
-
- void IncrementSyncPoint(u32 syncpoint_id) {
- auto& syncpoint = syncpoints.at(syncpoint_id);
- syncpoint++;
- std::lock_guard lock{sync_mutex};
- sync_cv.notify_all();
- auto& interrupt = syncpt_interrupts.at(syncpoint_id);
- if (!interrupt.empty()) {
- u32 value = syncpoint.load();
- auto it = interrupt.begin();
- while (it != interrupt.end()) {
- if (value >= *it) {
- TriggerCpuInterrupt(syncpoint_id, *it);
- it = interrupt.erase(it);
- continue;
- }
- it++;
- }
- }
- }
-
- [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
- return syncpoints.at(syncpoint_id).load();
- }
-
- void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
- std::lock_guard lock{sync_mutex};
- auto& interrupt = syncpt_interrupts.at(syncpoint_id);
- bool contains = std::any_of(interrupt.begin(), interrupt.end(),
- [value](u32 in_value) { return in_value == value; });
- if (contains) {
- return;
- }
- interrupt.emplace_back(value);
- }
-
- [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
- std::lock_guard lock{sync_mutex};
- auto& interrupt = syncpt_interrupts.at(syncpoint_id);
- const auto iter =
- std::find_if(interrupt.begin(), interrupt.end(),
- [value](u32 interrupt_value) { return value == interrupt_value; });
-
- if (iter == interrupt.end()) {
- return false;
- }
- interrupt.erase(iter);
- return true;
- }
-
[[nodiscard]] u64 GetTicks() const {
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
@@ -307,7 +222,7 @@ struct GPU::Impl {
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
void Start() {
- gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+ gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
@@ -329,8 +244,8 @@ struct GPU::Impl {
}
/// Push GPU command entries to be processed
- void PushGPUEntries(Tegra::CommandList&& entries) {
- gpu_thread.SubmitList(std::move(entries));
+ void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+ gpu_thread.SubmitList(channel, std::move(entries));
}
/// Push GPU command buffer entries to be processed
@@ -340,7 +255,7 @@ struct GPU::Impl {
}
if (!cdma_pushers.contains(id)) {
- cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu));
+ cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(host1x));
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
@@ -377,308 +292,55 @@ struct GPU::Impl {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
- void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const {
- auto& interrupt_manager = system.InterruptManager();
- interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
- }
-
- void ProcessBindMethod(const GPU::MethodCall& method_call) {
- // Bind the current subchannel to the desired engine id.
- LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
- method_call.argument);
- const auto engine_id = static_cast<EngineID>(method_call.argument);
- bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
- switch (engine_id) {
- case EngineID::FERMI_TWOD_A:
- dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
- break;
- case EngineID::MAXWELL_B:
- dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
- break;
- case EngineID::KEPLER_COMPUTE_B:
- dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
- }
- }
-
- void ProcessFenceActionMethod() {
- switch (regs.fence_action.op) {
- case GPU::FenceOperation::Acquire:
- WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
- break;
- case GPU::FenceOperation::Increment:
- IncrementSyncPoint(regs.fence_action.syncpoint_id);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
- }
- }
-
- void ProcessWaitForInterruptMethod() {
- // TODO(bunnei) ImplementMe
- LOG_WARNING(HW_GPU, "(STUBBED) called");
- }
-
- void ProcessSemaphoreTriggerMethod() {
- const auto semaphoreOperationMask = 0xF;
- const auto op =
- static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
- if (op == GpuSemaphoreOperation::WriteLong) {
- struct Block {
- u32 sequence;
- u32 zeros = 0;
- u64 timestamp;
- };
-
- Block block{};
- block.sequence = regs.semaphore_sequence;
- // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
- // CoreTiming
- block.timestamp = GetTicks();
- memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
- sizeof(block));
- } else {
- const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
- if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
- (op == GpuSemaphoreOperation::AcquireGequal &&
- static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
- (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
- // Nothing to do in this case
+ void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
+ size_t current_request_counter{};
+ {
+ std::unique_lock<std::mutex> lk(request_swap_mutex);
+ if (free_swap_counters.empty()) {
+ current_request_counter = request_swap_counters.size();
+ request_swap_counters.emplace_back(num_fences);
} else {
- regs.acquire_source = true;
- regs.acquire_value = regs.semaphore_sequence;
- if (op == GpuSemaphoreOperation::AcquireEqual) {
- regs.acquire_active = true;
- regs.acquire_mode = false;
- } else if (op == GpuSemaphoreOperation::AcquireGequal) {
- regs.acquire_active = true;
- regs.acquire_mode = true;
- } else if (op == GpuSemaphoreOperation::AcquireMask) {
- // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
- // semaphore_sequence, gives a non-0 result
- LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
- } else {
- LOG_ERROR(HW_GPU, "Invalid semaphore operation");
- }
+ current_request_counter = free_swap_counters.front();
+ request_swap_counters[current_request_counter] = num_fences;
+ free_swap_counters.pop_front();
}
}
- }
-
- void ProcessSemaphoreRelease() {
- memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
- regs.semaphore_release);
- }
-
- void ProcessSemaphoreAcquire() {
- const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
- const auto value = regs.semaphore_acquire;
- if (word != value) {
- regs.acquire_active = true;
- regs.acquire_value = value;
- // TODO(kemathe73) figure out how to do the acquire_timeout
- regs.acquire_mode = false;
- regs.acquire_source = false;
- }
- }
-
- /// Calls a GPU puller method.
- void CallPullerMethod(const GPU::MethodCall& method_call) {
- regs.reg_array[method_call.method] = method_call.argument;
- const auto method = static_cast<BufferMethods>(method_call.method);
-
- switch (method) {
- case BufferMethods::BindObject: {
- ProcessBindMethod(method_call);
- break;
- }
- case BufferMethods::Nop:
- case BufferMethods::SemaphoreAddressHigh:
- case BufferMethods::SemaphoreAddressLow:
- case BufferMethods::SemaphoreSequence:
- break;
- case BufferMethods::UnkCacheFlush:
- rasterizer->SyncGuestHost();
- break;
- case BufferMethods::WrcacheFlush:
- rasterizer->SignalReference();
- break;
- case BufferMethods::FenceValue:
- break;
- case BufferMethods::RefCnt:
- rasterizer->SignalReference();
- break;
- case BufferMethods::FenceAction:
- ProcessFenceActionMethod();
- break;
- case BufferMethods::WaitForInterrupt:
- rasterizer->WaitForIdle();
- break;
- case BufferMethods::SemaphoreTrigger: {
- ProcessSemaphoreTriggerMethod();
- break;
- }
- case BufferMethods::NotifyIntr: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
- break;
- }
- case BufferMethods::Unk28: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
- break;
- }
- case BufferMethods::SemaphoreAcquire: {
- ProcessSemaphoreAcquire();
- break;
- }
- case BufferMethods::SemaphoreRelease: {
- ProcessSemaphoreRelease();
- break;
- }
- case BufferMethods::Yield: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
- break;
- }
- default:
- LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
- break;
- }
- }
-
- /// Calls a GPU engine method.
- void CallEngineMethod(const GPU::MethodCall& method_call) {
- const EngineID engine = bound_engines[method_call.subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::KEPLER_COMPUTE_B:
- kepler_compute->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
- }
-
- /// Calls a GPU engine multivalue method.
- void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- const EngineID engine = bound_engines[subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::KEPLER_COMPUTE_B:
- kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
- }
-
- /// Determines where the method should be executed.
- [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
- const auto buffer_method = static_cast<BufferMethods>(method);
- return buffer_method >= BufferMethods::NonPullerMethods;
- }
-
- struct Regs {
- static constexpr size_t NUM_REGS = 0x40;
-
- union {
- struct {
- INSERT_PADDING_WORDS_NOINIT(0x4);
- struct {
- u32 address_high;
- u32 address_low;
-
- [[nodiscard]] GPUVAddr SemaphoreAddress() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ const auto wait_fence =
+ RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
+ auto& syncpoint_manager = host1x.GetSyncpointManager();
+ if (num_fences == 0) {
+ renderer->SwapBuffers(framebuffer);
+ }
+ const auto executer = [this, current_request_counter,
+ framebuffer_copy = *framebuffer]() {
+ {
+ std::unique_lock<std::mutex> lk(request_swap_mutex);
+ if (--request_swap_counters[current_request_counter] != 0) {
+ return;
+ }
+ free_swap_counters.push_back(current_request_counter);
}
- } semaphore_address;
-
- u32 semaphore_sequence;
- u32 semaphore_trigger;
- INSERT_PADDING_WORDS_NOINIT(0xC);
-
- // The pusher and the puller share the reference counter, the pusher only has read
- // access
- u32 reference_count;
- INSERT_PADDING_WORDS_NOINIT(0x5);
-
- u32 semaphore_acquire;
- u32 semaphore_release;
- u32 fence_value;
- GPU::FenceAction fence_action;
- INSERT_PADDING_WORDS_NOINIT(0xE2);
-
- // Puller state
- u32 acquire_mode;
- u32 acquire_source;
- u32 acquire_active;
- u32 acquire_timeout;
- u32 acquire_value;
- };
- std::array<u32, NUM_REGS> reg_array;
- };
- } regs{};
+ renderer->SwapBuffers(&framebuffer_copy);
+ };
+ for (size_t i = 0; i < num_fences; i++) {
+ syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
+ }
+ });
+ gpu_thread.TickGPU();
+ WaitForSyncOperation(wait_fence);
+ }
GPU& gpu;
Core::System& system;
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+ Host1x::Host1x& host1x;
+
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
- /// Mapping of command subchannels to their bound engine ids
- std::array<EngineID, 8> bound_engines{};
- /// 3D engine
- std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
- /// 2D engine
- std::unique_ptr<Engines::Fermi2D> fermi_2d;
- /// Compute engine
- std::unique_ptr<Engines::KeplerCompute> kepler_compute;
- /// DMA engine
- std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
- /// Inline memory engine
- std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+ s32 new_channel_id{1};
/// Shader build notifier
std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -693,51 +355,25 @@ struct GPU::Impl {
std::condition_variable sync_cv;
- struct FlushRequest {
- explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
- : fence{fence_}, addr{addr_}, size{size_} {}
- u64 fence;
- VAddr addr;
- std::size_t size;
- };
-
- std::list<FlushRequest> flush_requests;
- std::atomic<u64> current_flush_fence{};
- u64 last_flush_fence{};
- std::mutex flush_request_mutex;
+ std::list<std::function<void()>> sync_requests;
+ std::atomic<u64> current_sync_fence{};
+ u64 last_sync_fence{};
+ std::mutex sync_request_mutex;
+ std::condition_variable sync_request_cv;
const bool is_async;
VideoCommon::GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(Regs, field_name) == position * 4, \
- "Field " #field_name " has invalid position")
-
- ASSERT_REG_POSITION(semaphore_address, 0x4);
- ASSERT_REG_POSITION(semaphore_sequence, 0x6);
- ASSERT_REG_POSITION(semaphore_trigger, 0x7);
- ASSERT_REG_POSITION(reference_count, 0x14);
- ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
- ASSERT_REG_POSITION(semaphore_release, 0x1B);
- ASSERT_REG_POSITION(fence_value, 0x1C);
- ASSERT_REG_POSITION(fence_action, 0x1D);
-
- ASSERT_REG_POSITION(acquire_mode, 0x100);
- ASSERT_REG_POSITION(acquire_source, 0x101);
- ASSERT_REG_POSITION(acquire_active, 0x102);
- ASSERT_REG_POSITION(acquire_timeout, 0x103);
- ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-
- enum class GpuSemaphoreOperation {
- AcquireEqual = 0x1,
- WriteLong = 0x2,
- AcquireGequal = 0x4,
- AcquireMask = 0x8,
- };
+ std::unique_ptr<Tegra::Control::Scheduler> scheduler;
+ std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
+ Tegra::Control::ChannelState* current_channel;
+ s32 bound_channel{-1};
+
+ std::deque<size_t> free_swap_counters;
+ std::deque<size_t> request_swap_counters;
+ std::mutex request_swap_mutex;
};
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -745,25 +381,36 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
GPU::~GPU() = default;
-void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
- impl->BindRenderer(std::move(renderer));
+std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
+ return impl->AllocateChannel();
+}
+
+void GPU::InitChannel(Control::ChannelState& to_init) {
+ impl->InitChannel(to_init);
+}
+
+void GPU::BindChannel(s32 channel_id) {
+ impl->BindChannel(channel_id);
}
-void GPU::CallMethod(const MethodCall& method_call) {
- impl->CallMethod(method_call);
+void GPU::ReleaseChannel(Control::ChannelState& to_release) {
+ impl->ReleaseChannel(to_release);
}
-void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
+void GPU::InitAddressSpace(Tegra::MemoryManager& memory_manager) {
+ impl->InitAddressSpace(memory_manager);
+}
+
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
+ impl->BindRenderer(std::move(renderer));
}
void GPU::FlushCommands() {
impl->FlushCommands();
}
-void GPU::SyncGuestHost() {
- impl->SyncGuestHost();
+void GPU::InvalidateGPUCache() {
+ impl->InvalidateGPUCache();
}
void GPU::OnCommandListEnd() {
@@ -771,17 +418,32 @@ void GPU::OnCommandListEnd() {
}
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
- return impl->RequestFlush(addr, size);
+ return impl->RequestSyncOperation(
+ [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
}
-u64 GPU::CurrentFlushRequestFence() const {
- return impl->CurrentFlushRequestFence();
+u64 GPU::CurrentSyncRequestFence() const {
+ return impl->CurrentSyncRequestFence();
+}
+
+void GPU::WaitForSyncOperation(u64 fence) {
+ return impl->WaitForSyncOperation(fence);
}
void GPU::TickWork() {
impl->TickWork();
}
+/// Gets a mutable reference to the Host1x interface
+Host1x::Host1x& GPU::Host1x() {
+ return impl->host1x;
+}
+
+/// Gets an immutable reference to the Host1x interface.
+const Host1x::Host1x& GPU::Host1x() const {
+ return impl->host1x;
+}
+
Engines::Maxwell3D& GPU::Maxwell3D() {
return impl->Maxwell3D();
}
@@ -798,14 +460,6 @@ const Engines::KeplerCompute& GPU::KeplerCompute() const {
return impl->KeplerCompute();
}
-Tegra::MemoryManager& GPU::MemoryManager() {
- return impl->MemoryManager();
-}
-
-const Tegra::MemoryManager& GPU::MemoryManager() const {
- return impl->MemoryManager();
-}
-
Tegra::DmaPusher& GPU::DmaPusher() {
return impl->DmaPusher();
}
@@ -830,24 +484,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
return impl->ShaderNotify();
}
-void GPU::WaitFence(u32 syncpoint_id, u32 value) {
- impl->WaitFence(syncpoint_id, value);
-}
-
-void GPU::IncrementSyncPoint(u32 syncpoint_id) {
- impl->IncrementSyncPoint(syncpoint_id);
-}
-
-u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
- return impl->GetSyncpointValue(syncpoint_id);
-}
-
-void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
- impl->RegisterSyncptInterrupt(syncpoint_id, value);
-}
-
-bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
- return impl->CancelSyncptInterrupt(syncpoint_id, value);
+void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
+ impl->RequestSwapBuffers(framebuffer, fences, num_fences);
}
u64 GPU::GetTicks() const {
@@ -882,8 +521,8 @@ void GPU::ReleaseContext() {
impl->ReleaseContext();
}
-void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
- impl->PushGPUEntries(std::move(entries));
+void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+ impl->PushGPUEntries(channel, std::move(entries));
}
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 26b8ea233..0a4a8b14f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,6 +7,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
#include "video_core/cdma_pusher.h"
#include "video_core/framebuffer_config.h"
@@ -89,73 +89,58 @@ class Maxwell3D;
class KeplerCompute;
} // namespace Engines
-enum class EngineID {
- FERMI_TWOD_A = 0x902D, // 2D Engine
- MAXWELL_B = 0xB197, // 3D Engine
- KEPLER_COMPUTE_B = 0xB1C0,
- KEPLER_INLINE_TO_MEMORY_B = 0xA140,
- MAXWELL_DMA_COPY_A = 0xB0B5,
-};
+namespace Control {
+struct ChannelState;
+}
+
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
class MemoryManager;
class GPU final {
public:
- struct MethodCall {
- u32 method{};
- u32 argument{};
- u32 subchannel{};
- u32 method_count{};
-
- explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
- : method(method_), argument(argument_), subchannel(subchannel_),
- method_count(method_count_) {}
-
- [[nodiscard]] bool IsLastCall() const {
- return method_count <= 1;
- }
- };
-
- enum class FenceOperation : u32 {
- Acquire = 0,
- Increment = 1,
- };
-
- union FenceAction {
- u32 raw;
- BitField<0, 1, FenceOperation> op;
- BitField<8, 24, u32> syncpoint_id;
- };
-
explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
~GPU();
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
- /// Calls a GPU method.
- void CallMethod(const MethodCall& method_call);
-
- /// Calls a GPU multivalue method.
- void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending);
-
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
- void SyncGuestHost();
+ void InvalidateGPUCache();
/// Signal the ending of command list.
void OnCommandListEnd();
+ std::shared_ptr<Control::ChannelState> AllocateChannel();
+
+ void InitChannel(Control::ChannelState& to_init);
+
+ void BindChannel(s32 channel_id);
+
+ void ReleaseChannel(Control::ChannelState& to_release);
+
+ void InitAddressSpace(Tegra::MemoryManager& memory_manager);
+
/// Request a host GPU memory flush from the CPU.
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
/// Obtains current flush request fence id.
- [[nodiscard]] u64 CurrentFlushRequestFence() const;
+ [[nodiscard]] u64 CurrentSyncRequestFence() const;
+
+ void WaitForSyncOperation(u64 fence);
/// Tick pending requests within the GPU.
void TickWork();
+ /// Gets a mutable reference to the Host1x interface
+ [[nodiscard]] Host1x::Host1x& Host1x();
+
+ /// Gets an immutable reference to the Host1x interface.
+ [[nodiscard]] const Host1x::Host1x& Host1x() const;
+
/// Returns a reference to the Maxwell3D GPU engine.
[[nodiscard]] Engines::Maxwell3D& Maxwell3D();
@@ -168,12 +153,6 @@ public:
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const;
- /// Returns a reference to the GPU memory manager.
- [[nodiscard]] Tegra::MemoryManager& MemoryManager();
-
- /// Returns a const reference to the GPU memory manager.
- [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const;
-
/// Returns a reference to the GPU DMA pusher.
[[nodiscard]] Tegra::DmaPusher& DmaPusher();
@@ -192,17 +171,6 @@ public:
/// Returns a const reference to the shader notifier.
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
- /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
- void WaitFence(u32 syncpoint_id, u32 value);
-
- void IncrementSyncPoint(u32 syncpoint_id);
-
- [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
-
- void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
-
- [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
-
[[nodiscard]] u64 GetTicks() const;
[[nodiscard]] bool IsAsync() const;
@@ -211,6 +179,9 @@ public:
void RendererFrameEndNotify();
+ void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+ std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences);
+
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
@@ -226,7 +197,7 @@ public:
void ReleaseContext();
/// Push GPU command entries to be processed
- void PushGPUEntries(Tegra::CommandList&& entries);
+ void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@@ -248,7 +219,7 @@ public:
private:
struct Impl;
- std::unique_ptr<Impl> impl;
+ mutable std::unique_ptr<Impl> impl;
};
} // namespace Tegra
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9547f277a..1bd477011 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/microprofile.h"
@@ -9,6 +8,7 @@
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -19,8 +19,8 @@ namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(std::stop_token stop_token, Core::System& system,
VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher, SynchState& state) {
- std::string name = "yuzu:GPU";
+ Tegra::Control::Scheduler& scheduler, SynchState& state) {
+ std::string name = "GPU";
MicroProfileOnThreadCreate(name.c_str());
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -37,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
break;
}
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
- dma_pusher.Push(std::move(submit_list->entries));
- dma_pusher.DispatchCalls();
+ scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -50,13 +49,13 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
} else {
- UNREACHABLE();
+ ASSERT(false);
}
state.signaled_fence.store(next.fence);
if (next.block) {
// We have to lock the write_lock to ensure that the condition_variable wait not get a
// race between the check and the lock itself.
- std::lock_guard lk(state.write_lock);
+ std::scoped_lock lk{state.write_lock};
state.cv.notify_all();
}
}
@@ -69,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher) {
+ Tegra::Control::Scheduler& scheduler) {
rasterizer = renderer.ReadRasterizer();
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
- std::ref(dma_pusher), std::ref(state));
+ std::ref(scheduler), std::ref(state));
}
-void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- PushCommand(SubmitListCommand(std::move(entries)));
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
+ PushCommand(SubmitListCommand(channel, std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -94,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
}
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
- PushCommand(GPUTickCommand(), true);
- ASSERT(fence <= gpu.CurrentFlushRequestFence());
+ TickGPU();
+ gpu.WaitForSyncOperation(fence);
+}
+
+void ThreadManager::TickGPU() {
+ PushCommand(GPUTickCommand());
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 00984188e..64628d3e3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -16,7 +15,9 @@
namespace Tegra {
struct FramebufferConfig;
-class DmaPusher;
+namespace Control {
+class Scheduler;
+}
} // namespace Tegra
namespace Core {
@@ -35,8 +36,10 @@ namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
- explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
+ explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+ : channel{channel_}, entries{std::move(entries_)} {}
+ s32 channel;
Tegra::CommandList entries;
};
@@ -97,7 +100,7 @@ struct CommandDataContainer {
/// Struct used to synchronize the GPU thread
struct SynchState final {
- using CommandQueue = Common::SPSCQueue<CommandDataContainer, true>;
+ using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>;
std::mutex write_lock;
CommandQueue queue;
u64 last_fence{};
@@ -113,10 +116,10 @@ public:
/// Creates and starts the GPU thread.
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher);
+ Tegra::Control::Scheduler& scheduler);
/// Push GPU command entries to be processed
- void SubmitList(Tegra::CommandList&& entries);
+ void SubmitList(s32 channel, Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
@@ -132,6 +135,8 @@ public:
void OnCommandListEnd();
+ void TickGPU();
+
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data, bool block = false);
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 04d0f3a2f..42e7d6e4f 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -1,18 +1,16 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
-#include <cstdio>
#include <fstream>
#include <vector>
#include "common/assert.h"
#include "common/settings.h"
-#include "video_core/command_classes/codecs/codec.h"
-#include "video_core/command_classes/codecs/h264.h"
-#include "video_core/command_classes/codecs/vp8.h"
-#include "video_core/command_classes/codecs/vp9.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/codecs/codec.h"
+#include "video_core/host1x/codecs/h264.h"
+#include "video_core/host1x/codecs/vp8.h"
+#include "video_core/host1x/codecs/vp9.h"
+#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
extern "C" {
@@ -57,16 +55,28 @@ AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pi
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
+
+// List all the currently available hwcontext in ffmpeg
+std::vector<AVHWDeviceType> ListSupportedContexts() {
+ std::vector<AVHWDeviceType> contexts{};
+ AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+ do {
+ current_device_type = av_hwdevice_iterate_types(current_device_type);
+ contexts.push_back(current_device_type);
+ } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
+ return contexts;
+}
+
} // namespace
void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr);
}
-Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
- : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
- vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
- vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
+Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
+ : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
+ vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
+ vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() {
if (!initialized) {
@@ -77,17 +87,6 @@ Codec::~Codec() {
av_buffer_unref(&av_gpu_decoder);
}
-// List all the currently available hwcontext in ffmpeg
-static std::vector<AVHWDeviceType> ListSupportedContexts() {
- std::vector<AVHWDeviceType> contexts{};
- AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
- do {
- current_device_type = av_hwdevice_iterate_types(current_device_type);
- contexts.push_back(current_device_type);
- } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
- return contexts;
-}
-
bool Codec::CreateGpuAvDevice() {
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static const auto supported_contexts = ListSupportedContexts();
@@ -97,6 +96,8 @@ bool Codec::CreateGpuAvDevice() {
LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
+ // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
+ av_buffer_unref(&av_gpu_decoder);
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
@@ -128,15 +129,19 @@ bool Codec::CreateGpuAvDevice() {
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
- if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
- av_codec_ctx->pix_fmt = config->pix_fmt;
- if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
+ if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
+#if defined(__unix__)
+ // Some linux decoding backends are reported to crash with this config method
+ // TODO(ameerj): Properly support this method
+ if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
// skip zero-copy decoders, we don't currently support them
LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
av_hwdevice_get_type_name(type), config->methods);
continue;
}
+#endif
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+ av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
}
}
@@ -163,11 +168,11 @@ void Codec::InitializeGpuDecoder() {
void Codec::Initialize() {
const AVCodecID codec = [&] {
switch (current_codec) {
- case NvdecCommon::VideoCodec::H264:
+ case Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
- case NvdecCommon::VideoCodec::VP8:
+ case Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
- case NvdecCommon::VideoCodec::VP9:
+ case Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
@@ -192,7 +197,7 @@ void Codec::Initialize() {
initialized = true;
}
-void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
+void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
if (current_codec != codec) {
current_codec = codec;
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
@@ -210,16 +215,16 @@ void Codec::Decode() {
bool vp9_hidden_frame = false;
const auto& frame_data = [&]() {
switch (current_codec) {
- case Tegra::NvdecCommon::VideoCodec::H264:
+ case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, is_first_frame);
- case Tegra::NvdecCommon::VideoCodec::VP8:
+ case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
- case Tegra::NvdecCommon::VideoCodec::VP9:
+ case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
vp9_decoder->ComposeFrame(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
return vp9_decoder->GetFrameBytes();
default:
- UNREACHABLE();
+ ASSERT(false);
return std::vector<u8>{};
}
}();
@@ -282,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() {
return frame;
}
-NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
+Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec;
}
std::string_view Codec::GetCurrentCodecName() const {
switch (current_codec) {
- case NvdecCommon::VideoCodec::None:
+ case Host1x::NvdecCommon::VideoCodec::None:
return "None";
- case NvdecCommon::VideoCodec::H264:
+ case Host1x::NvdecCommon::VideoCodec::H264:
return "H264";
- case NvdecCommon::VideoCodec::VP8:
+ case Host1x::NvdecCommon::VideoCodec::VP8:
return "VP8";
- case NvdecCommon::VideoCodec::H265:
+ case Host1x::NvdecCommon::VideoCodec::H265:
return "H265";
- case NvdecCommon::VideoCodec::VP9:
+ case Host1x::NvdecCommon::VideoCodec::VP9:
return "VP9";
default:
return "Unknown";
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
index de5672155..0d45fb7fe 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,7 +7,7 @@
#include <string_view>
#include <queue>
#include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/nvdec_common.h"
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
@@ -22,7 +21,6 @@ extern "C" {
}
namespace Tegra {
-class GPU;
void AVFrameDeleter(AVFrame* ptr);
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
@@ -33,16 +31,20 @@ class VP8;
class VP9;
} // namespace Decoder
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
class Codec {
public:
- explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
+ explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
~Codec();
/// Initialize the codec, returning success or failure
void Initialize();
/// Sets NVDEC video stream codec
- void SetTargetCodec(NvdecCommon::VideoCodec codec);
+ void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
@@ -51,7 +53,7 @@ public:
[[nodiscard]] AVFramePtr GetCurrentFrame();
/// Returns the value of current_codec
- [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+ [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
@@ -64,14 +66,14 @@ private:
bool CreateGpuAvDevice();
bool initialized{};
- NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
+ Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
const AVCodec* av_codec{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
- GPU& gpu;
- const NvdecCommon::NvdecRegisters& state;
+ Host1x::Host1x& host1x;
+ const Host1x::NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 84f1fa938..e87bd65fa 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -1,29 +1,12 @@
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: MIT
#include <array>
#include <bit>
#include "common/settings.h"
-#include "video_core/command_classes/codecs/h264.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/codecs/h264.h"
+#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
@@ -41,19 +24,20 @@ constexpr std::array<u8, 16> zig_zag_scan{
};
} // Anonymous namespace
-H264::H264(GPU& gpu_) : gpu(gpu_) {}
+H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
H264::~H264() = default;
-const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state,
+const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
H264DecoderContext context;
- gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
+ host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
+ sizeof(H264DecoderContext));
const s64 frame_number = context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
frame.resize(context.stream_len);
- gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
+ host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
return frame;
}
@@ -172,8 +156,8 @@ const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& sta
frame.resize(encoded_header.size() + context.stream_len);
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
- gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
- frame.data() + encoded_header.size(), context.stream_len);
+ host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+ frame.data() + encoded_header.size(), context.stream_len);
return frame;
}
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index 1899d8e7f..5cc86454e 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -1,22 +1,5 @@
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: MIT
#pragma once
@@ -25,10 +8,14 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
namespace Decoder {
class H264BitWriter {
@@ -72,16 +59,16 @@ private:
class H264 {
public:
- explicit H264(GPU& gpu);
+ explicit H264(Host1x::Host1x& host1x);
~H264();
/// Compose the H264 frame for FFmpeg decoding
- [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state,
- bool is_first_frame = false);
+ [[nodiscard]] const std::vector<u8>& ComposeFrame(
+ const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
private:
std::vector<u8> frame;
- GPU& gpu;
+ Host1x::Host1x& host1x;
struct H264ParameterSet {
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index 32ad0ec16..28fb12cb8 100644
--- a/src/video_core/command_classes/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -1,22 +1,20 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
-#include <array>
#include <vector>
-#include "video_core/command_classes/codecs/vp8.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/codecs/vp8.h"
+#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
-VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
+VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
VP8::~VP8() = default;
-const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info;
- gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
+ host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
const bool is_key_frame = info.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
@@ -47,7 +45,7 @@ const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& stat
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
}
const u64 bitstream_offset = state.frame_bitstream_offset;
- gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
+ host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
return frame;
}
diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
index 41fc7b403..5bf07ecab 100644
--- a/src/video_core/command_classes/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,23 +8,28 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
namespace Decoder {
class VP8 {
public:
- explicit VP8(GPU& gpu);
+ explicit VP8(Host1x::Host1x& host1x);
~VP8();
/// Compose the VP8 frame for FFmpeg decoding
- [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] const std::vector<u8>& ComposeFrame(
+ const Host1x::NvdecCommon::NvdecRegisters& state);
private:
std::vector<u8> frame;
- GPU& gpu;
+ Host1x::Host1x& host1x;
struct VP8PictureInfo {
INSERT_PADDING_WORDS_NOINIT(14);
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index 2c00181fa..cf40c9012 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -1,12 +1,11 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> // for std::copy
#include <numeric>
#include "common/assert.h"
-#include "video_core/command_classes/codecs/vp9.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/codecs/vp9.h"
+#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
@@ -154,7 +153,7 @@ constexpr Vp9EntropyProbs default_probs{
.high_precision{128, 128},
};
-constexpr std::array<s32, 256> norm_lut{
+constexpr std::array<u8, 256> norm_lut{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -165,7 +164,7 @@ constexpr std::array<s32, 256> norm_lut{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
-constexpr std::array<s32, 254> map_lut{
+constexpr std::array<u8, 254> map_lut{
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
@@ -233,11 +232,11 @@ constexpr std::array<s32, 254> map_lut{
std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
}
- return map_lut[index];
+ return static_cast<s32>(map_lut[index]);
}
} // Anonymous namespace
-VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
+VP9::VP9(Host1x::Host1x& host1x_) : host1x{host1x_} {}
VP9::~VP9() = default;
@@ -356,9 +355,9 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
}
}
-Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
+Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
PictureInfo picture_info;
- gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
+ host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
Vp9PictureInfo vp9_info = picture_info.Convert();
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
@@ -373,18 +372,19 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
EntropyProbs entropy;
- gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
+ host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst);
}
-Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
+Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
Vp9FrameContainer current_frame{};
{
- gpu.SyncGuestHost();
+ // gpu.SyncGuestHost(); epic, why?
current_frame.info = GetVp9PictureInfo(state);
current_frame.bit_stream.resize(current_frame.info.bitstream_size);
- gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
- current_frame.info.bitstream_size);
+ host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+ current_frame.bit_stream.data(),
+ current_frame.info.bitstream_size);
}
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
@@ -770,7 +770,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer;
}
-void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
+void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
std::vector<u8> bitstream;
{
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
@@ -820,7 +820,7 @@ void VpxRangeEncoder::Write(bool bit, s32 probability) {
local_range = range - split;
}
- s32 shift = norm_lut[local_range];
+ s32 shift = static_cast<s32>(norm_lut[local_range]);
local_range <<= shift;
count += shift;
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
index 2e735c792..d4083e8d3 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,11 +8,15 @@
#include "common/common_types.h"
#include "common/stream.h"
-#include "video_core/command_classes/codecs/vp9_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/codecs/vp9_types.h"
+#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
namespace Decoder {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
@@ -107,7 +110,7 @@ private:
class VP9 {
public:
- explicit VP9(GPU& gpu_);
+ explicit VP9(Host1x::Host1x& host1x);
~VP9();
VP9(const VP9&) = delete;
@@ -118,7 +121,7 @@ public:
/// Composes the VP9 frame from the GPU state information.
/// Based on the official VP9 spec documentation
- void ComposeFrame(const NvdecCommon::NvdecRegisters& state);
+ void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
@@ -163,19 +166,21 @@ private:
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
/// Returns VP9 information from NVDEC provided offset and size
- [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
+ const Host1x::NvdecCommon::NvdecRegisters& state);
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
/// Returns frame to be decoded after buffering
- [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] Vp9FrameContainer GetCurrentFrame(
+ const Host1x::NvdecCommon::NvdecRegisters& state);
/// Use NVDEC providied information to compose the headers for the current frame
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
- GPU& gpu;
+ Host1x::Host1x& host1x;
std::vector<u8> frame;
std::array<s8, 4> loop_filter_ref_deltas{};
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
index 3b1ed4b3a..adad8ed7e 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -1,17 +1,14 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
-#include <cstring>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
-class GPU;
namespace Decoder {
struct Vp9FrameDimensions {
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp
new file mode 100644
index 000000000..dceefdb7f
--- /dev/null
+++ b/src/video_core/host1x/control.cpp
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "video_core/host1x/control.h"
+#include "video_core/host1x/host1x.h"
+
+namespace Tegra::Host1x {
+
+Control::Control(Host1x& host1x_) : host1x(host1x_) {}
+
+Control::~Control() = default;
+
+void Control::ProcessMethod(Method method, u32 argument) {
+ switch (method) {
+ case Method::LoadSyncptPayload32:
+ syncpoint_value = argument;
+ break;
+ case Method::WaitSyncpt:
+ case Method::WaitSyncpt32:
+ Execute(argument);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
+ break;
+ }
+}
+
+void Control::Execute(u32 data) {
+ host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
+}
+
+} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/host1x/control.h
index 7e94799dd..e117888a3 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/host1x/control.h
@@ -1,18 +1,19 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
-#include <vector>
-#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+
+class Host1x;
class Nvdec;
-class Host1x {
+class Control {
public:
enum class Method : u32 {
WaitSyncpt = 0x8,
@@ -20,8 +21,8 @@ public:
WaitSyncpt32 = 0x50,
};
- explicit Host1x(GPU& gpu);
- ~Host1x();
+ explicit Control(Host1x& host1x);
+ ~Control();
/// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(Method method, u32 argument);
@@ -31,7 +32,9 @@ private:
void Execute(u32 data);
u32 syncpoint_value{};
- GPU& gpu;
+ Host1x& host1x;
};
+} // namespace Host1x
+
} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
new file mode 100644
index 000000000..7c317a85d
--- /dev/null
+++ b/src/video_core/host1x/host1x.cpp
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "core/core.h"
+#include "video_core/host1x/host1x.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+Host1x::Host1x(Core::System& system_)
+ : system{system_}, syncpoint_manager{}, memory_manager{system, 32, 12},
+ allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
new file mode 100644
index 000000000..57082ae54
--- /dev/null
+++ b/src/video_core/host1x/host1x.h
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+#include "common/address_space.h"
+#include "video_core/host1x/syncpoint_manager.h"
+#include "video_core/memory_manager.h"
+
+namespace Core {
+class System;
+} // namespace Core
+
+namespace Tegra {
+
+namespace Host1x {
+
+class Host1x {
+public:
+ explicit Host1x(Core::System& system);
+
+ SyncpointManager& GetSyncpointManager() {
+ return syncpoint_manager;
+ }
+
+ const SyncpointManager& GetSyncpointManager() const {
+ return syncpoint_manager;
+ }
+
+ Tegra::MemoryManager& MemoryManager() {
+ return memory_manager;
+ }
+
+ const Tegra::MemoryManager& MemoryManager() const {
+ return memory_manager;
+ }
+
+ Common::FlatAllocator<u32, 0, 32>& Allocator() {
+ return *allocator;
+ }
+
+ const Common::FlatAllocator<u32, 0, 32>& Allocator() const {
+ return *allocator;
+ }
+
+private:
+ Core::System& system;
+ SyncpointManager syncpoint_manager;
+ Tegra::MemoryManager memory_manager;
+ std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
+};
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/host1x/nvdec.cpp
index 9aaf5247e..a4bd5b79f 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/host1x/nvdec.cpp
@@ -1,17 +1,17 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
-#include "video_core/command_classes/nvdec.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/nvdec.h"
-namespace Tegra {
+namespace Tegra::Host1x {
#define NVDEC_REG_INDEX(field_name) \
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
-Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
+Nvdec::Nvdec(Host1x& host1x_)
+ : host1x(host1x_), state{}, codec(std::make_unique<Codec>(host1x, state)) {}
Nvdec::~Nvdec() = default;
@@ -45,4 +45,4 @@ void Nvdec::Execute() {
}
}
-} // namespace Tegra
+} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/host1x/nvdec.h
index 6e1da0b04..3949d5181 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/host1x/nvdec.h
@@ -1,20 +1,22 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <vector>
#include "common/common_types.h"
-#include "video_core/command_classes/codecs/codec.h"
+#include "video_core/host1x/codecs/codec.h"
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+
+class Host1x;
class Nvdec {
public:
- explicit Nvdec(GPU& gpu);
+ explicit Nvdec(Host1x& host1x);
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
@@ -27,8 +29,11 @@ private:
/// Invoke codec to decode a frame
void Execute();
- GPU& gpu;
+ Host1x& host1x;
NvdecCommon::NvdecRegisters state;
std::unique_ptr<Codec> codec;
};
+
+} // namespace Host1x
+
} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/host1x/nvdec_common.h
index 8a35c44a1..49d67ebbe 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/host1x/nvdec_common.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,7 +7,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
-namespace Tegra::NvdecCommon {
+namespace Tegra::Host1x::NvdecCommon {
enum class VideoCodec : u64 {
None = 0x0,
@@ -95,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
#undef ASSERT_REG_POSITION
-} // namespace Tegra::NvdecCommon
+} // namespace Tegra::Host1x::NvdecCommon
diff --git a/src/video_core/host1x/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp
new file mode 100644
index 000000000..5ef9ea217
--- /dev/null
+++ b/src/video_core/host1x/sync_manager.cpp
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: MIT
+
+#include <algorithm>
+#include "sync_manager.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
+
+namespace Tegra {
+namespace Host1x {
+
+SyncptIncrManager::SyncptIncrManager(Host1x& host1x_) : host1x(host1x_) {}
+SyncptIncrManager::~SyncptIncrManager() = default;
+
+void SyncptIncrManager::Increment(u32 id) {
+ increments.emplace_back(0, 0, id, true);
+ IncrementAllDone();
+}
+
+u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
+ const u32 handle = current_id++;
+ increments.emplace_back(handle, class_id, id);
+ return handle;
+}
+
+void SyncptIncrManager::SignalDone(u32 handle) {
+ const auto done_incr =
+ std::find_if(increments.begin(), increments.end(),
+ [handle](const SyncptIncr& incr) { return incr.id == handle; });
+ if (done_incr != increments.cend()) {
+ done_incr->complete = true;
+ }
+ IncrementAllDone();
+}
+
+void SyncptIncrManager::IncrementAllDone() {
+ std::size_t done_count = 0;
+ for (; done_count < increments.size(); ++done_count) {
+ if (!increments[done_count].complete) {
+ break;
+ }
+ auto& syncpoint_manager = host1x.GetSyncpointManager();
+ syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
+ syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
+ }
+ increments.erase(increments.begin(), increments.begin() + done_count);
+}
+
+} // namespace Host1x
+} // namespace Tegra
diff --git a/src/video_core/host1x/sync_manager.h b/src/video_core/host1x/sync_manager.h
new file mode 100644
index 000000000..7bb77fa27
--- /dev/null
+++ b/src/video_core/host1x/sync_manager.h
@@ -0,0 +1,53 @@
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <mutex>
+#include <vector>
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+class Host1x;
+
+struct SyncptIncr {
+ u32 id;
+ u32 class_id;
+ u32 syncpt_id;
+ bool complete;
+
+ SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+ : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+};
+
+class SyncptIncrManager {
+public:
+ explicit SyncptIncrManager(Host1x& host1x);
+ ~SyncptIncrManager();
+
+ /// Add syncpoint id and increment all
+ void Increment(u32 id);
+
+ /// Returns a handle to increment later
+ u32 IncrementWhenDone(u32 class_id, u32 id);
+
+ /// IncrememntAllDone, including handle
+ void SignalDone(u32 handle);
+
+ /// Increment all sequential pending increments that are already done.
+ void IncrementAllDone();
+
+private:
+ std::vector<SyncptIncr> increments;
+ std::mutex increment_lock;
+ u32 current_id{};
+
+ Host1x& host1x;
+};
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
new file mode 100644
index 000000000..326e8355a
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -0,0 +1,96 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/microprofile.h"
+#include "video_core/host1x/syncpoint_manager.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+
+SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
+ std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
+ std::function<void()>&& action) {
+ if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
+ action();
+ return {};
+ }
+
+ std::unique_lock lk(guard);
+ if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
+ action();
+ return {};
+ }
+ auto it = action_storage.begin();
+ while (it != action_storage.end()) {
+ if (it->expected_value >= expected_value) {
+ break;
+ }
+ ++it;
+ }
+ return action_storage.emplace(it, expected_value, std::move(action));
+}
+
+void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
+ ActionHandle& handle) {
+ std::unique_lock lk(guard);
+ action_storage.erase(handle);
+}
+
+void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
+ DeregisterAction(guest_action_storage[syncpoint_id], handle);
+}
+
+void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
+ DeregisterAction(host_action_storage[syncpoint_id], handle);
+}
+
+void SyncpointManager::IncrementGuest(u32 syncpoint_id) {
+ Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]);
+}
+
+void SyncpointManager::IncrementHost(u32 syncpoint_id) {
+ Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]);
+}
+
+void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) {
+ Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value);
+}
+
+void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) {
+ MICROPROFILE_SCOPE(GPU_wait);
+ Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value);
+}
+
+void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+ std::list<RegisteredAction>& action_storage) {
+ auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
+
+ std::unique_lock lk(guard);
+ auto it = action_storage.begin();
+ while (it != action_storage.end()) {
+ if (it->expected_value > new_value) {
+ break;
+ }
+ it->action();
+ it = action_storage.erase(it);
+ }
+ wait_cv.notify_all();
+}
+
+void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+ u32 expected_value) {
+ const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; };
+ if (pred()) {
+ return;
+ }
+
+ std::unique_lock lk(guard);
+ wait_cv.wait(lk, pred);
+}
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
new file mode 100644
index 000000000..50a264e23
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -0,0 +1,98 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <list>
+#include <mutex>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+class SyncpointManager {
+public:
+ u32 GetGuestSyncpointValue(u32 id) const {
+ return syncpoints_guest[id].load(std::memory_order_acquire);
+ }
+
+ u32 GetHostSyncpointValue(u32 id) const {
+ return syncpoints_host[id].load(std::memory_order_acquire);
+ }
+
+ struct RegisteredAction {
+ explicit RegisteredAction(u32 expected_value_, std::function<void()>&& action_)
+ : expected_value{expected_value_}, action{std::move(action_)} {}
+ u32 expected_value;
+ std::function<void()> action;
+ };
+ using ActionHandle = std::list<RegisteredAction>::iterator;
+
+ template <typename Func>
+ ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
+ std::function<void()> func(action);
+ return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
+ expected_value, std::move(func));
+ }
+
+ template <typename Func>
+ ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
+ std::function<void()> func(action);
+ return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
+ expected_value, std::move(func));
+ }
+
+ void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle);
+
+ void DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle);
+
+ void IncrementGuest(u32 syncpoint_id);
+
+ void IncrementHost(u32 syncpoint_id);
+
+ void WaitGuest(u32 syncpoint_id, u32 expected_value);
+
+ void WaitHost(u32 syncpoint_id, u32 expected_value);
+
+ bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) const {
+ return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
+ }
+
+ bool IsReadyHost(u32 syncpoint_id, u32 expected_value) const {
+ return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
+ }
+
+private:
+ void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+ std::list<RegisteredAction>& action_storage);
+
+ ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
+ std::list<RegisteredAction>& action_storage, u32 expected_value,
+ std::function<void()>&& action);
+
+ void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
+
+ void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
+
+ static constexpr size_t NUM_MAX_SYNCPOINTS = 192;
+
+ std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{};
+ std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{};
+
+ std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage;
+ std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage;
+
+ std::mutex guard;
+ std::condition_variable wait_guest_cv;
+ std::condition_variable wait_host_cv;
+};
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/host1x/vic.cpp
index 051616124..ac0b7d20e 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
@@ -19,14 +18,17 @@ extern "C" {
#include "common/bit_field.h"
#include "common/logging/log.h"
-#include "video_core/command_classes/nvdec.h"
-#include "video_core/command_classes/vic.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/nvdec.h"
+#include "video_core/host1x/vic.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
namespace Tegra {
+
+namespace Host1x {
+
namespace {
enum class VideoPixelFormat : u64_le {
RGBA8 = 0x1f,
@@ -47,8 +49,8 @@ union VicConfig {
BitField<46, 14, u64_le> surface_height_minus1;
};
-Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
- : gpu(gpu_),
+Vic::Vic(Host1x& host1x_, std::shared_ptr<Nvdec> nvdec_processor_)
+ : host1x(host1x_),
nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {}
Vic::~Vic() = default;
@@ -79,7 +81,7 @@ void Vic::Execute() {
LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
return;
}
- const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
+ const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
const auto* frame = frame_ptr.get();
if (!frame) {
@@ -154,15 +156,16 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
luma_buffer.resize(size);
- Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
- converted_frame_buf_addr, block_height, 0, 0);
+ std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height);
+ Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
+ block_height, 0, width * 4);
- gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
+ host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
} else {
// send pitch linear frame
const size_t linear_size = width * height * 4;
- gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
- linear_size);
+ host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
+ linear_size);
}
}
@@ -190,8 +193,8 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
luma_buffer[dst + x] = luma_src[src + x];
}
}
- gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
- luma_buffer.size());
+ host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
+ luma_buffer.size());
// Chroma
const std::size_t half_height = frame_height / 2;
@@ -229,11 +232,13 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
break;
}
default:
- UNREACHABLE();
+ ASSERT(false);
break;
}
- gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
- chroma_buffer.size());
+ host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
+ chroma_buffer.size());
}
+} // namespace Host1x
+
} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/host1x/vic.h
index 6d4cdfd57..2b78786e8 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -11,7 +10,10 @@
struct SwsContext;
namespace Tegra {
-class GPU;
+
+namespace Host1x {
+
+class Host1x;
class Nvdec;
union VicConfig;
@@ -26,7 +28,7 @@ public:
SetOutputSurfaceChromaUnusedOffset = 0x1ca
};
- explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
+ explicit Vic(Host1x& host1x, std::shared_ptr<Nvdec> nvdec_processor);
~Vic();
@@ -40,8 +42,8 @@ private:
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
- GPU& gpu;
- std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+ Host1x& host1x;
+ std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
/// Avoid reallocation of the following buffers every frame, as their
/// size does not change during a stream
@@ -59,4 +61,6 @@ private:
s32 scaler_height{};
};
+} // namespace Host1x
+
} // namespace Tegra
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index fd3e41434..2149ab93e 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
+# SPDX-License-Identifier: GPL-2.0-or-later
+
set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
set(GLSL_INCLUDES
@@ -14,9 +17,11 @@ set(SHADER_FILES
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag
convert_float_to_depth.frag
+ convert_s8d24_to_abgr8.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert
+ opengl_convert_s8d24.comp
opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
index 1b4bc6103..9f7525535 100644
--- a/src/video_core/host_shaders/StringShaderHeader.cmake
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: 2020 yuzu Emulator Project
+# SPDX-License-Identifier: GPL-2.0-or-later
+
set(SOURCE_FILE ${CMAKE_ARGV3})
set(HEADER_FILE ${CMAKE_ARGV4})
set(INPUT_FILE ${CMAKE_ARGV5})
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 3a10578cb..d608678a3 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
@@ -1066,7 +1065,7 @@ TexelWeightParams DecodeBlockInfo() {
void FillError(ivec3 coord) {
for (uint j = 0; j < block_dims.y; j++) {
for (uint i = 0; i < block_dims.x; i++) {
- imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0));
+ imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
}
}
}
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
index a131be79e..121f60cbd 100644
--- a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
+++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
index bb6872e6b..7fc98accb 100644
--- a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
+++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
index ea055ddad..ff102e582 100644
--- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
#extension GL_ARB_shader_stencil_export : require
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
index 94368fb59..d33131d7c 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag
index 624c58509..57f0ce5a6 100644
--- a/src/video_core/host_shaders/convert_depth_to_float.frag
+++ b/src/video_core/host_shaders/convert_depth_to_float.frag
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag
index d86c795f4..5d403b1ec 100644
--- a/src/video_core/host_shaders/convert_float_to_depth.frag
+++ b/src/video_core/host_shaders/convert_float_to_depth.frag
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
new file mode 100644
index 000000000..31db7d426
--- /dev/null
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -0,0 +1,22 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+ uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+ highp uint depth_val =
+ uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
+ lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
+ highp uvec4 components =
+ uvec4((uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu, stencil_val);
+ color.rgba = vec4(components) / (exp2(8.0) - 1.0);
+}
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp
index 6b97f789d..f91b1aa9f 100644
--- a/src/video_core/host_shaders/fidelityfx_fsr.comp
+++ b/src/video_core/host_shaders/fidelityfx_fsr.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
//!#version 460 core
#extension GL_ARB_separate_shader_objects : enable
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
index 452ad6502..2c976b19f 100644
--- a/src/video_core/host_shaders/full_screen_triangle.vert
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag
index 02f4068d1..9bffc20d5 100644
--- a/src/video_core/host_shaders/fxaa.frag
+++ b/src/video_core/host_shaders/fxaa.frag
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
// Source code is adapted from
// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert
index ac20c04e9..c2717d90d 100644
--- a/src/video_core/host_shaders/fxaa.vert
+++ b/src/video_core/host_shaders/fxaa.vert
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460
diff --git a/src/video_core/host_shaders/opengl_convert_s8d24.comp b/src/video_core/host_shaders/opengl_convert_s8d24.comp
new file mode 100644
index 000000000..970203214
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_convert_s8d24.comp
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 430 core
+
+layout(local_size_x = 16, local_size_y = 8) in;
+
+layout(binding = 0, rgba8ui) restrict uniform uimage2D destination;
+layout(location = 0) uniform uvec3 size;
+
+void main() {
+ if (any(greaterThanEqual(gl_GlobalInvocationID, size))) {
+ return;
+ }
+ uvec4 components = imageLoad(destination, ivec2(gl_GlobalInvocationID.xy));
+ imageStore(destination, ivec2(gl_GlobalInvocationID.xy), components.wxyz);
+}
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp
index 7b8e20fbe..8d3ae5a97 100644
--- a/src/video_core/host_shaders/opengl_copy_bc4.comp
+++ b/src/video_core/host_shaders/opengl_copy_bc4.comp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430 core
#extension GL_ARB_gpu_shader_int64 : require
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
index 84b818227..5fd7ad297 100644
--- a/src/video_core/host_shaders/opengl_present.frag
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430 core
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
index c3b5adbba..cc2f40163 100644
--- a/src/video_core/host_shaders/opengl_present.vert
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430 core
diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag
index 71ff9e1e3..a780373e3 100644
--- a/src/video_core/host_shaders/opengl_present_scaleforce.frag
+++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag
@@ -1,24 +1,5 @@
-// MIT License
-//
-// Copyright (c) 2020 BreadFish64
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
+// SPDX-FileCopyrightText: 2020 BreadFish64
+// SPDX-License-Identifier: MIT
// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp
index cb48ec170..7d23f594c 100644
--- a/src/video_core/host_shaders/pitch_unswizzle.comp
+++ b/src/video_core/host_shaders/pitch_unswizzle.comp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 430
diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag
index 902b70c2b..c57dd2851 100644
--- a/src/video_core/host_shaders/present_bicubic.frag
+++ b/src/video_core/host_shaders/present_bicubic.frag
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag
index 66fed3238..5f54b71b6 100644
--- a/src/video_core/host_shaders/present_gaussian.frag
+++ b/src/video_core/host_shaders/present_gaussian.frag
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
// Code adapted from the following sources:
// - https://learnopengl.com/Advanced-Lighting/Bloom
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
index 929dec39b..f189ee06b 100644
--- a/src/video_core/host_shaders/source_shader.h.in
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
#pragma once
#include <string_view>
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag
index 4a6aae410..c0c832296 100644
--- a/src/video_core/host_shaders/vulkan_blit_color_float.frag
+++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
index 19bb23a5a..484b99773 100644
--- a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
+++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
#extension GL_ARB_shader_stencil_export : require
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
index 1c96a7905..00af13726 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
index f4daff739..13d783fa8 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
index 6b6796dd1..331549d96 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
index f785eebf3..013ca0014 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_present.frag b/src/video_core/host_shaders/vulkan_present.frag
index 0979ff3e6..97e098ced 100644
--- a/src/video_core/host_shaders/vulkan_present.frag
+++ b/src/video_core/host_shaders/vulkan_present.frag
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert
index 00b868958..89dc80468 100644
--- a/src/video_core/host_shaders/vulkan_present.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
index 924c03060..3dc9c0df5 100644
--- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
#version 460
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
index a594b83ca..77ed07552 100644
--- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
#version 460
#extension GL_GOOGLE_include_directive : enable
diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index 8655591d0..a412f30ff 100644
--- a/src/video_core/host_shaders/vulkan_quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index 872291670..5aa6abdc8 100644
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
#extension GL_EXT_shader_16bit_storage : require
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 0aeda4ce8..f61d5998e 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -1,13 +1,17 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
+#include <fstream>
#include <optional>
+#include <span>
#include <boost/container_hash/hash.hpp>
+#include <fstream>
#include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/settings.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
@@ -16,6 +20,23 @@
namespace Tegra {
+static void Dump(u64 hash, std::span<const u32> code) {
+ const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
+ const auto macro_dir{base_dir / "macros"};
+ if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
+ return;
+ }
+ const auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
+ std::fstream macro_file(name, std::ios::out | std::ios::binary);
+ if (!macro_file) {
+ LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
+ Common::FS::PathToUTF8String(name));
+ return;
+ }
+ macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
+}
+
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
@@ -25,6 +46,11 @@ void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
+void MacroEngine::ClearCode(u32 method) {
+ macro_cache.erase(method);
+ uploaded_macro_code.erase(method);
+}
+
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
@@ -46,7 +72,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
}
}
if (!mid_method.has_value()) {
- UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
+ ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method);
return;
}
}
@@ -55,6 +81,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
if (!mid_method.has_value()) {
cache_info.lle_program = Compile(macro_code->second);
cache_info.hash = boost::hash_value(macro_code->second);
+ if (Settings::values.dump_macros) {
+ Dump(cache_info.hash, macro_code->second);
+ }
} else {
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
const auto rebased_method = method - mid_method.value();
@@ -64,6 +93,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
code.size() * sizeof(u32));
cache_info.hash = boost::hash_value(code);
cache_info.lle_program = Compile(code);
+ if (Settings::values.dump_macros) {
+ Dump(cache_info.hash, code);
+ }
}
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 7aaa49286..07d97ba39 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -118,6 +117,9 @@ public:
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
+ // Clear the code associated with a method.
+ void ClearCode(u32 method);
+
// Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(u32 method, const std::vector<u32>& parameters);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 900ad23c9..cabe8dcbf 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -1,9 +1,10 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <vector>
+#include "common/scope_exit.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
@@ -59,6 +60,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_array.first = parameters[3];
maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.regs.vb_element_base = element_base;
maxwell3d.regs.vb_base_instance = base_instance;
maxwell3d.mme_draw.instance_count = instance_count;
@@ -81,10 +83,67 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+// Multidraw Indirect
+void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ SCOPE_EXIT({
+ // Clean everything.
+ maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.regs.vb_element_base = 0x0;
+ maxwell3d.regs.vb_base_instance = 0x0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+ maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ });
+ const u32 start_indirect = parameters[0];
+ const u32 end_indirect = parameters[1];
+ if (start_indirect >= end_indirect) {
+ // Nothing to do.
+ return;
+ }
+ const auto topology =
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+ maxwell3d.regs.draw.topology.Assign(topology);
+ const u32 padding = parameters[3];
+ const std::size_t max_draws = parameters[4];
+
+ const u32 indirect_words = 5 + padding;
+ const std::size_t first_draw = start_indirect;
+ const std::size_t effective_draws = end_indirect - start_indirect;
+ const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
+
+ for (std::size_t index = first_draw; index < last_draw; index++) {
+ const std::size_t base = index * indirect_words + 5;
+ const u32 num_vertices = parameters[base];
+ const u32 instance_count = parameters[base + 1];
+ const u32 first_index = parameters[base + 2];
+ const u32 base_vertex = parameters[base + 3];
+ const u32 base_instance = parameters[base + 4];
+ maxwell3d.regs.index_array.first = first_index;
+ maxwell3d.regs.reg_array[0x446] = base_vertex;
+ maxwell3d.regs.index_array.count = num_vertices;
+ maxwell3d.regs.vb_element_base = base_vertex;
+ maxwell3d.regs.vb_base_instance = base_instance;
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, base_vertex);
+ maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.Rasterizer().Draw(true, true);
+ }
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ }
+}
+
+constexpr std::array<std::pair<u64, HLEFunction>, 4> hle_funcs{{
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
{0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
{0x0217920100488FF7, &HLE_0217920100488FF7},
+ {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164},
}};
class HLEMacroImpl final : public CachedMacro {
@@ -100,6 +159,7 @@ private:
Engines::Maxwell3D& maxwell3d;
HLEFunction func;
};
+
} // Anonymous namespace
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index b86ba84a1..625332c9d 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index fba755448..f670b1bca 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <optional>
@@ -309,7 +308,6 @@ bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond,
return value != 0;
}
UNREACHABLE();
- return true;
}
Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index 8a9648e46..f5eeb0b76 100644
--- a/src/video_core/macro/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 47b28ad16..a302a9603 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <bitset>
@@ -24,7 +23,8 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255
namespace Tegra {
namespace {
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
-constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
+constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
@@ -32,6 +32,7 @@ constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
RESULT,
+ MAX_PARAMETER,
PARAMETERS,
METHOD_ADDRESS,
BRANCH_HOLDER,
@@ -81,7 +82,7 @@ private:
u32 carry_flag{};
};
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
- using ProgramType = void (*)(JITState*, const u32*);
+ using ProgramType = void (*)(JITState*, const u32*, const u32*);
struct OptimizerState {
bool can_skip_carry{};
@@ -113,7 +114,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
JITState state{};
state.maxwell3d = &maxwell3d;
state.registers = {};
- program(&state, parameters.data());
+ program(&state, parameters.data(), parameters.data() + parameters.size());
}
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -278,28 +279,13 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
auto dst = Compile_GetRegister(opcode.src_a, RESULT);
auto src = Compile_GetRegister(opcode.src_b, eax);
- if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
- shr(src, opcode.bf_src_bit);
- } else if (opcode.bf_src_bit == 31) {
- xor_(src, src);
- }
- // Don't bother masking the whole register since we're using a 32 bit register
- if (opcode.bf_size != 31 && opcode.bf_size != 0) {
- and_(src, opcode.GetBitfieldMask());
- } else if (opcode.bf_size == 0) {
- xor_(src, src);
- }
- if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
- shl(src, opcode.bf_dst_bit);
- } else if (opcode.bf_dst_bit == 31) {
- xor_(src, src);
- }
-
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
- if (mask != 0xffffffff) {
- and_(dst, mask);
- }
+ and_(dst, mask);
+ shr(src, opcode.bf_src_bit);
+ and_(src, opcode.GetBitfieldMask());
+ shl(src, opcode.bf_dst_bit);
or_(dst, src);
+
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
@@ -308,17 +294,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
shr(src, dst.cvt8());
- if (opcode.bf_size != 0 && opcode.bf_size != 31) {
- and_(src, opcode.GetBitfieldMask());
- } else if (opcode.bf_size == 0) {
- xor_(src, src);
- }
+ and_(src, opcode.GetBitfieldMask());
+ shl(src, opcode.bf_dst_bit);
- if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
- shl(src, opcode.bf_dst_bit);
- } else if (opcode.bf_dst_bit == 31) {
- xor_(src, src);
- }
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
@@ -326,13 +304,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
- if (opcode.bf_src_bit != 0) {
- shr(src, opcode.bf_src_bit);
- }
-
- if (opcode.bf_size != 31) {
- and_(src, opcode.GetBitfieldMask());
- }
+ shr(src, opcode.bf_src_bit);
+ and_(src, opcode.GetBitfieldMask());
shl(src, dst.cvt8());
Compile_ProcessResult(opcode.result_operation, opcode.dst);
@@ -410,7 +383,7 @@ void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
Xbyak::Label end;
auto value = Compile_GetRegister(opcode.src_a, eax);
- test(value, value);
+ cmp(value, 0); // test(value, value);
if (optimizer.has_delayed_pc) {
switch (opcode.branch_condition) {
case Macro::BranchCondition::Zero:
@@ -428,17 +401,11 @@ void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
Xbyak::Label handle_post_exit{};
Xbyak::Label skip{};
jmp(skip, T_NEAR);
- if (opcode.is_exit) {
- L(handle_post_exit);
- // Execute 1 instruction
- mov(BRANCH_HOLDER, end_of_code);
- // Jump to next instruction to skip delay slot check
- jmp(labels[jump_address], T_NEAR);
- } else {
- L(handle_post_exit);
- xor_(BRANCH_HOLDER, BRANCH_HOLDER);
- jmp(labels[jump_address], T_NEAR);
- }
+
+ L(handle_post_exit);
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+ jmp(labels[jump_address], T_NEAR);
+
L(skip);
mov(BRANCH_HOLDER, handle_post_exit);
jmp(delay_skip[pc], T_NEAR);
@@ -489,6 +456,7 @@ void MacroJITx64Impl::Compile() {
// JIT state
mov(STATE, Common::X64::ABI_PARAM1);
mov(PARAMETERS, Common::X64::ABI_PARAM2);
+ mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
xor_(RESULT, RESULT);
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
@@ -599,7 +567,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
return true;
}
+static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
+ LOG_CRITICAL(HW_GPU,
+ "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
+ parameter, max_parameter - sizeof(u32));
+}
+
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
+ Xbyak::Label parameter_ok{};
+ cmp(PARAMETERS, MAX_PARAMETER);
+ jb(parameter_ok, T_NEAR);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(Common::X64::ABI_PARAM1, PARAMETERS);
+ mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
+ Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ L(parameter_ok);
mov(eax, dword[PARAMETERS]);
add(PARAMETERS, sizeof(u32));
return eax;
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 773b037ae..99ee1b9e6 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 4ff3fa268..cca401c74 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@@ -8,182 +7,208 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
+#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system_)
- : system{system_}, page_table(page_table_size) {}
+std::atomic<size_t> MemoryManager::unique_identifier_generator{};
+
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
+ u64 page_bits_)
+ : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()},
+ address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
+ entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
+ page_bits != big_page_bits ? page_bits : 0},
+ unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} {
+ address_space_size = 1ULL << address_space_bits;
+ page_size = 1ULL << page_bits;
+ page_mask = page_size - 1ULL;
+ big_page_size = 1ULL << big_page_bits;
+ big_page_mask = big_page_size - 1ULL;
+ const u64 page_table_bits = address_space_bits - page_bits;
+ const u64 big_page_table_bits = address_space_bits - big_page_bits;
+ const u64 page_table_size = 1ULL << page_table_bits;
+ const u64 big_page_table_size = 1ULL << big_page_table_bits;
+ page_table_mask = page_table_size - 1;
+ big_page_table_mask = big_page_table_size - 1;
+
+ big_entries.resize(big_page_table_size / 32, 0);
+ big_page_table_cpu.resize(big_page_table_size);
+ big_page_continous.resize(big_page_table_size / continous_bits, 0);
+ entries.resize(page_table_size / 32, 0);
+}
MemoryManager::~MemoryManager() = default;
-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
- rasterizer = rasterizer_;
-}
-
-GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
- u64 remaining_size{size};
- for (u64 offset{}; offset < size; offset += page_size) {
- if (remaining_size < page_size) {
- SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size);
- } else {
- SetPageEntry(gpu_addr + offset, page_entry + offset);
- }
- remaining_size -= page_size;
+template <bool is_big_page>
+MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const u64 entry_mask = big_entries[position / 32];
+ const size_t sub_index = position % 32;
+ return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
+ } else {
+ position = position >> page_bits;
+ const u64 entry_mask = entries[position / 32];
+ const size_t sub_index = position % 32;
+ return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
}
- return gpu_addr;
}
-GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
- const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
- if (it != map_ranges.end() && it->first == gpu_addr) {
- it->second = size;
+template <bool is_big_page>
+void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const u64 entry_mask = big_entries[position / 32];
+ const size_t sub_index = position % 32;
+ big_entries[position / 32] =
+ (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
} else {
- map_ranges.insert(it, MapRange{gpu_addr, size});
+ position = position >> page_bits;
+ const u64 entry_mask = entries[position / 32];
+ const size_t sub_index = position % 32;
+ entries[position / 32] =
+ (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
}
- return UpdateRange(gpu_addr, cpu_addr, size);
}
-GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
- return Map(cpu_addr, *FindFreeRange(size, align), size);
+inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
+ const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
+ const size_t sub_index = big_page_index % continous_bits;
+ return ((entry_mask >> sub_index) & 0x1ULL) != 0;
}
-GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
- const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
- ASSERT(gpu_addr);
- return Map(cpu_addr, *gpu_addr, size);
+inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value) {
+ const u64 continous_mask = big_page_continous[big_page_index / continous_bits];
+ const size_t sub_index = big_page_index % continous_bits;
+ big_page_continous[big_page_index / continous_bits] =
+ (~(1ULL << sub_index) & continous_mask) | (value ? 1ULL << sub_index : 0);
}
-void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
- if (size == 0) {
- return;
- }
- const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
- if (it != map_ranges.end()) {
- ASSERT(it->first == gpu_addr);
- map_ranges.erase(it);
- } else {
- UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
- }
- const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
-
- for (const auto& [map_addr, map_size] : submapped_ranges) {
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
- ASSERT(cpu_addr);
-
- rasterizer->UnmapMemory(*cpu_addr, map_size);
+template <MemoryManager::EntryType entry_type>
+GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
+ size_t size) {
+ u64 remaining_size{size};
+ if constexpr (entry_type == EntryType::Mapped) {
+ page_table.ReserveRange(gpu_addr, size);
}
-
- UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
-}
-
-std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
for (u64 offset{}; offset < size; offset += page_size) {
- if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
- return std::nullopt;
+ const GPUVAddr current_gpu_addr = gpu_addr + offset;
+ [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
+ SetEntry<false>(current_gpu_addr, entry_type);
+ if (current_entry_type != entry_type) {
+ rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
+ }
+ if constexpr (entry_type == EntryType::Mapped) {
+ const VAddr current_cpu_addr = cpu_addr + offset;
+ const auto index = PageEntryIndex<false>(current_gpu_addr);
+ const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
+ page_table[index] = sub_value;
}
+ remaining_size -= page_size;
}
-
- return UpdateRange(gpu_addr, PageEntry::State::Allocated, size);
-}
-
-GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
- return *AllocateFixed(*FindFreeRange(size, align), size);
+ return gpu_addr;
}
-void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) {
- if (!page_entry.IsValid()) {
- return;
+template <MemoryManager::EntryType entry_type>
+GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
+ size_t size) {
+ u64 remaining_size{size};
+ for (u64 offset{}; offset < size; offset += big_page_size) {
+ const GPUVAddr current_gpu_addr = gpu_addr + offset;
+ [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
+ SetEntry<true>(current_gpu_addr, entry_type);
+ if (current_entry_type != entry_type) {
+ rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
+ }
+ if constexpr (entry_type == EntryType::Mapped) {
+ const VAddr current_cpu_addr = cpu_addr + offset;
+ const auto index = PageEntryIndex<true>(current_gpu_addr);
+ const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
+ big_page_table_cpu[index] = sub_value;
+ const bool is_continous = ([&] {
+ uintptr_t base_ptr{
+ reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))};
+ if (base_ptr == 0) {
+ return false;
+ }
+ for (VAddr start_cpu = current_cpu_addr + page_size;
+ start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) {
+ base_ptr += page_size;
+ auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu));
+ if (next_ptr == 0 || base_ptr != next_ptr) {
+ return false;
+ }
+ }
+ return true;
+ })();
+ SetBigPageContinous(index, is_continous);
+ }
+ remaining_size -= big_page_size;
}
-
- ASSERT(system.CurrentProcess()
- ->PageTable()
- .LockForDeviceAddressSpace(page_entry.ToAddress(), size)
- .IsSuccess());
+ return gpu_addr;
}
-void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
- if (!page_entry.IsValid()) {
- return;
- }
-
- ASSERT(system.CurrentProcess()
- ->PageTable()
- .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size)
- .IsSuccess());
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
-PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
- return page_table[PageEntryIndex(gpu_addr)];
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+ bool is_big_pages) {
+ if (is_big_pages) [[likely]] {
+ return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+ }
+ return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
}
-void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
- // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to
- // improper tracking, but should be fixed in the future.
-
- //// Unlock the old page
- // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size);
-
- //// Lock the new page
- // TryLockPage(page_entry, size);
- auto& current_page = page_table[PageEntryIndex(gpu_addr)];
-
- if ((!current_page.IsValid() && page_entry.IsValid()) ||
- current_page.ToAddress() != page_entry.ToAddress()) {
- rasterizer->ModifyGPUMemory(gpu_addr, size);
+GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
+ if (is_big_pages) [[likely]] {
+ return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
}
-
- current_page = page_entry;
+ return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
}
-std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
- bool start_32bit_address) const {
- if (!align) {
- align = page_size;
- } else {
- align = Common::AlignUp(align, page_size);
+void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
+ if (size == 0) {
+ return;
}
+ const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
- u64 available_size{};
- GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
- while (gpu_addr + available_size < address_space_size) {
- if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
- available_size += page_size;
-
- if (available_size >= size) {
- return gpu_addr;
- }
- } else {
- gpu_addr += available_size + page_size;
- available_size = 0;
+ for (const auto& [map_addr, map_size] : submapped_ranges) {
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
+ ASSERT(cpu_addr);
- const auto remainder{gpu_addr % align};
- if (remainder) {
- gpu_addr = (gpu_addr - remainder) + align;
- }
- }
+ rasterizer->UnmapMemory(*cpu_addr, map_size);
}
- return std::nullopt;
+ BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
+ PageTableOp<EntryType::Free>(gpu_addr, 0, size);
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
- if (gpu_addr == 0) {
+ if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] {
return std::nullopt;
}
- const auto page_entry{GetPageEntry(gpu_addr)};
- if (!page_entry.IsValid()) {
- return std::nullopt;
+ if (GetEntry<true>(gpu_addr) != EntryType::Mapped) [[unlikely]] {
+ if (GetEntry<false>(gpu_addr) != EntryType::Mapped) {
+ return std::nullopt;
+ }
+
+ const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)])
+ << cpu_page_bits;
+ return cpu_addr_base + (gpu_addr & page_mask);
}
- return page_entry.ToAddress() + (gpu_addr & page_mask);
+ const VAddr cpu_addr_base =
+ static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits;
+ return cpu_addr_base + (gpu_addr & big_page_mask);
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
@@ -191,7 +216,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t s
const size_t page_last{(addr + size + page_size - 1) >> page_bits};
while (page_index < page_last) {
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
- if (page_addr && *page_addr != 0) {
+ if (page_addr) {
return page_addr;
}
++page_index;
@@ -208,7 +233,7 @@ T MemoryManager::Read(GPUVAddr addr) const {
return value;
}
- UNREACHABLE();
+ ASSERT(false);
return {};
}
@@ -221,7 +246,7 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
return;
}
- UNREACHABLE();
+ ASSERT(false);
}
template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
@@ -234,126 +259,298 @@ template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
- if (!GetPageEntry(gpu_addr).IsValid()) {
- return {};
- }
-
const auto address{GpuToCpuAddress(gpu_addr)};
if (!address) {
return {};
}
- return system.Memory().GetPointer(*address);
+ return memory.GetPointer(*address);
}
const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
- if (!GetPageEntry(gpu_addr).IsValid()) {
- return {};
- }
-
const auto address{GpuToCpuAddress(gpu_addr)};
if (!address) {
return {};
}
- return system.Memory().GetPointer(*address);
-}
-
-size_t MemoryManager::BytesToMapEnd(GPUVAddr gpu_addr) const noexcept {
- auto it = std::ranges::upper_bound(map_ranges, gpu_addr, {}, &MapRange::first);
- --it;
- return it->second - (gpu_addr - it->first);
-}
-
-void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
- bool is_safe) const {
+ return memory.GetPointer(*address);
+}
+
+#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
+#pragma inline_recursion(on)
+#endif
+
+template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
+inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size,
+ FuncMapped&& func_mapped, FuncReserved&& func_reserved,
+ FuncUnmapped&& func_unmapped) const {
+ static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v<FuncMapped, bool>;
+ static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v<FuncReserved, bool>;
+ static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v<FuncUnmapped, bool>;
+ u64 used_page_size;
+ u64 used_page_mask;
+ u64 used_page_bits;
+ if constexpr (is_big_pages) {
+ used_page_size = big_page_size;
+ used_page_mask = big_page_mask;
+ used_page_bits = big_page_bits;
+ } else {
+ used_page_size = page_size;
+ used_page_mask = page_mask;
+ used_page_bits = page_bits;
+ }
std::size_t remaining_size{size};
- std::size_t page_index{gpu_src_addr >> page_bits};
- std::size_t page_offset{gpu_src_addr & page_mask};
+ std::size_t page_index{gpu_src_addr >> used_page_bits};
+ std::size_t page_offset{gpu_src_addr & used_page_mask};
+ GPUVAddr current_address = gpu_src_addr;
while (remaining_size > 0) {
const std::size_t copy_amount{
- std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
- if (page_addr && *page_addr != 0) {
- const auto src_addr{*page_addr + page_offset};
- if (is_safe) {
- // Flush must happen on the rasterizer interface, such that memory is always
- // synchronous when it is read (even when in asynchronous GPU mode).
- // Fixes Dead Cells title menu.
- rasterizer->FlushRegion(src_addr, copy_amount);
+ std::min(static_cast<std::size_t>(used_page_size) - page_offset, remaining_size)};
+ auto entry = GetEntry<is_big_pages>(current_address);
+ if (entry == EntryType::Mapped) [[likely]] {
+ if constexpr (BOOL_BREAK_MAPPED) {
+ if (func_mapped(page_index, page_offset, copy_amount)) {
+ return;
+ }
+ } else {
+ func_mapped(page_index, page_offset, copy_amount);
}
- system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
- } else {
- std::memset(dest_buffer, 0, copy_amount);
- }
+ } else if (entry == EntryType::Reserved) {
+ if constexpr (BOOL_BREAK_RESERVED) {
+ if (func_reserved(page_index, page_offset, copy_amount)) {
+ return;
+ }
+ } else {
+ func_reserved(page_index, page_offset, copy_amount);
+ }
+
+ } else [[unlikely]] {
+ if constexpr (BOOL_BREAK_UNMAPPED) {
+ if (func_unmapped(page_index, page_offset, copy_amount)) {
+ return;
+ }
+ } else {
+ func_unmapped(page_index, page_offset, copy_amount);
+ }
+ }
page_index++;
page_offset = 0;
- dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
+ current_address += copy_amount;
}
}
+template <bool is_safe>
+void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
+ std::size_t size) const {
+ auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
+ std::memset(dest_buffer, 0, copy_amount);
+ dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+ };
+ auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ if constexpr (is_safe) {
+ rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+ }
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
+ dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+ };
+ auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ if constexpr (is_safe) {
+ rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+ }
+ if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
+ } else {
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
+ }
+ dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+ };
+ auto read_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, mapped_normal, set_to_zero, set_to_zero);
+ };
+ MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages);
+}
+
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
- ReadBlockImpl(gpu_src_addr, dest_buffer, size, true);
+ ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size);
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
- ReadBlockImpl(gpu_src_addr, dest_buffer, size, false);
+ ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size);
}
-void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
- bool is_safe) {
- std::size_t remaining_size{size};
- std::size_t page_index{gpu_dest_addr >> page_bits};
- std::size_t page_offset{gpu_dest_addr & page_mask};
-
- while (remaining_size > 0) {
- const std::size_t copy_amount{
- std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
- if (page_addr && *page_addr != 0) {
- const auto dest_addr{*page_addr + page_offset};
-
- if (is_safe) {
- // Invalidate must happen on the rasterizer interface, such that memory is always
- // synchronous when it is written (even when in asynchronous GPU mode).
- rasterizer->InvalidateRegion(dest_addr, copy_amount);
- }
- system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+template <bool is_safe>
+void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer,
+ std::size_t size) {
+ auto just_advance = [&]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
+ src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+ };
+ auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ if constexpr (is_safe) {
+ rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
}
-
- page_index++;
- page_offset = 0;
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(physical, src_buffer, copy_amount);
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
- remaining_size -= copy_amount;
- }
+ };
+ auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ if constexpr (is_safe) {
+ rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+ }
+ if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
+ } else {
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(physical, src_buffer, copy_amount);
+ }
+ src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+ };
+ auto write_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, mapped_normal, just_advance, just_advance);
+ };
+ MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages);
}
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
- WriteBlockImpl(gpu_dest_addr, src_buffer, size, true);
+ WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size);
}
void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
std::size_t size) {
- WriteBlockImpl(gpu_dest_addr, src_buffer, size, false);
+ WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size);
}
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
- size_t remaining_size{size};
- size_t page_index{gpu_addr >> page_bits};
- size_t page_offset{gpu_addr & page_mask};
- while (remaining_size > 0) {
- const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
- if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
- rasterizer->FlushRegion(*page_addr + page_offset, num_bytes);
+ auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) {};
+
+ auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+ };
+ auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ rasterizer->FlushRegion(cpu_addr_base, copy_amount);
+ };
+ auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+ };
+ MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
+}
+
+bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const {
+ bool result = false;
+ auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) { return false; };
+
+ auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
+ return result;
+ };
+ auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
+ return result;
+ };
+ auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+ return result;
+ };
+ MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, check_short_pages);
+ return result;
+}
+
+size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
+ std::optional<VAddr> old_page_addr{};
+ size_t range_so_far = 0;
+ bool result{false};
+ auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
+ std::size_t copy_amount) {
+ result = true;
+ return true;
+ };
+ auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr && *old_page_addr != cpu_addr_base) {
+ result = true;
+ return true;
}
- ++page_index;
- page_offset = 0;
- remaining_size -= num_bytes;
- }
+ range_so_far += copy_amount;
+ old_page_addr = {cpu_addr_base + copy_amount};
+ return false;
+ };
+ auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr && *old_page_addr != cpu_addr_base) {
+ return true;
+ }
+ range_so_far += copy_amount;
+ old_page_addr = {cpu_addr_base + copy_amount};
+ return false;
+ };
+ auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, short_check, fail, fail);
+ return result;
+ };
+ MemoryOperation<true>(gpu_addr, size, big_check, fail, check_short_pages);
+ return range_so_far;
+}
+
+void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
+ auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) {};
+
+ auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+ };
+ auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
+ };
+ auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
+ };
+ MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages);
}
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
@@ -367,87 +564,134 @@ void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
- const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
- if (!cpu_addr) {
+ if (GetEntry<true>(gpu_addr) == EntryType::Mapped) [[likely]] {
+ size_t page_index = gpu_addr >> big_page_bits;
+ if (IsBigPageContinous(page_index)) [[likely]] {
+ const std::size_t page{(page_index & big_page_mask) + size};
+ return page <= big_page_size;
+ }
+ const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size};
+ return page <= Core::Memory::YUZU_PAGESIZE;
+ }
+ if (GetEntry<false>(gpu_addr) != EntryType::Mapped) {
return false;
}
- const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
- return page <= Core::Memory::PAGE_SIZE;
+ const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size};
+ return page <= Core::Memory::YUZU_PAGESIZE;
}
bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
- size_t page_index{gpu_addr >> page_bits};
- const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
std::optional<VAddr> old_page_addr{};
- while (page_index != page_last) {
- const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
- if (!page_addr || *page_addr == 0) {
- return false;
+ bool result{true};
+ auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
+ std::size_t copy_amount) {
+ result = false;
+ return true;
+ };
+ auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr && *old_page_addr != cpu_addr_base) {
+ result = false;
+ return true;
}
- if (old_page_addr) {
- if (*old_page_addr + page_size != *page_addr) {
- return false;
- }
+ old_page_addr = {cpu_addr_base + copy_amount};
+ return false;
+ };
+ auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr && *old_page_addr != cpu_addr_base) {
+ result = false;
+ return true;
}
- old_page_addr = page_addr;
- ++page_index;
- }
- return true;
+ old_page_addr = {cpu_addr_base + copy_amount};
+ return false;
+ };
+ auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, short_check, fail, fail);
+ return !result;
+ };
+ MemoryOperation<true>(gpu_addr, size, big_check, fail, check_short_pages);
+ return result;
}
bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
- size_t page_index{gpu_addr >> page_bits};
- const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
- while (page_index < page_last) {
- if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
- return false;
- }
- ++page_index;
- }
- return true;
+ bool result{true};
+ auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) {
+ result = false;
+ return true;
+ };
+ auto pass = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) { return false; };
+ auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, pass, pass, fail);
+ return !result;
+ };
+ MemoryOperation<true>(gpu_addr, size, pass, fail, check_short_pages);
+ return result;
}
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
- size_t page_index{gpu_addr >> page_bits};
- size_t remaining_size{size};
- size_t page_offset{gpu_addr & page_mask};
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
std::optional<VAddr> old_page_addr{};
- const auto extend_size = [&last_segment, &page_index, &page_offset](std::size_t bytes) {
- if (!last_segment) {
- const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset;
- last_segment = {new_base_addr, bytes};
- } else {
- last_segment->second += bytes;
- }
- };
- const auto split = [&last_segment, &result] {
+ const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
+ [[maybe_unused]] std::size_t offset,
+ [[maybe_unused]] std::size_t copy_amount) {
if (last_segment) {
result.push_back(*last_segment);
last_segment = std::nullopt;
}
};
- while (remaining_size > 0) {
- const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
- const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
- if (!page_addr || *page_addr == 0) {
- split();
- } else if (old_page_addr) {
- if (*old_page_addr + page_size != *page_addr) {
- split();
+ const auto extend_size_big = [this, &split, &old_page_addr,
+ &last_segment](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr) {
+ if (*old_page_addr != cpu_addr_base) {
+ split(0, 0, 0);
+ }
+ }
+ old_page_addr = {cpu_addr_base + copy_amount};
+ if (!last_segment) {
+ const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
+ last_segment = {new_base_addr, copy_amount};
+ } else {
+ last_segment->second += copy_amount;
+ }
+ };
+ const auto extend_size_short = [this, &split, &old_page_addr,
+ &last_segment](std::size_t page_index, std::size_t offset,
+ std::size_t copy_amount) {
+ const VAddr cpu_addr_base =
+ (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
+ if (old_page_addr) {
+ if (*old_page_addr != cpu_addr_base) {
+ split(0, 0, 0);
}
- extend_size(num_bytes);
+ }
+ old_page_addr = {cpu_addr_base + copy_amount};
+ if (!last_segment) {
+ const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
+ last_segment = {new_base_addr, copy_amount};
} else {
- extend_size(num_bytes);
+ last_segment->second += copy_amount;
}
- ++page_index;
- page_offset = 0;
- remaining_size -= num_bytes;
- old_page_addr = page_addr;
- }
- split();
+ };
+ auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
+ GPUVAddr base = (page_index << big_page_bits) + offset;
+ MemoryOperation<false>(base, copy_amount, extend_size_short, split, split);
+ };
+ MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
+ split(0, 0, 0);
return result;
}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 61bfe47c7..f992e29f3 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,76 +1,41 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
+#include <atomic>
#include <map>
#include <optional>
#include <vector>
#include "common/common_types.h"
+#include "common/multi_level_page_table.h"
+#include "common/virtual_buffer.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Core {
+class DeviceMemory;
+namespace Memory {
+class Memory;
+} // namespace Memory
class System;
-}
+} // namespace Core
namespace Tegra {
-class PageEntry final {
-public:
- enum class State : u32 {
- Unmapped = static_cast<u32>(-1),
- Allocated = static_cast<u32>(-2),
- };
-
- constexpr PageEntry() = default;
- constexpr PageEntry(State state_) : state{state_} {}
- constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
-
- [[nodiscard]] constexpr bool IsUnmapped() const {
- return state == State::Unmapped;
- }
-
- [[nodiscard]] constexpr bool IsAllocated() const {
- return state == State::Allocated;
- }
-
- [[nodiscard]] constexpr bool IsValid() const {
- return !IsUnmapped() && !IsAllocated();
- }
-
- [[nodiscard]] constexpr VAddr ToAddress() const {
- if (!IsValid()) {
- return {};
- }
-
- return static_cast<VAddr>(state) << ShiftBits;
- }
-
- [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
- // If this is a reserved value, offsets do not apply
- if (!IsValid()) {
- return *this;
- }
- return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset};
- }
-
-private:
- static constexpr std::size_t ShiftBits{12};
-
- State state{State::Unmapped};
-};
-static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
-
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system_);
+ explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
+ u64 big_page_bits_ = 16, u64 page_bits_ = 12);
~MemoryManager();
+ size_t GetID() const {
+ return unique_identifier;
+ }
+
/// Binds a renderer to the memory manager.
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
@@ -87,9 +52,6 @@ public:
[[nodiscard]] u8* GetPointer(GPUVAddr addr);
[[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
- /// Returns the number of bytes until the end of the memory map containing the given GPU address
- [[nodiscard]] size_t BytesToMapEnd(GPUVAddr gpu_addr) const noexcept;
-
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continuous
@@ -136,54 +98,95 @@ public:
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const;
- [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
- [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
- [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
- [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
- [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
+ GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
+ GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
+ void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const;
+
+ bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const;
+
+ size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
+
+ bool IsWithinGPUAddressRange(GPUVAddr gpu_addr) const {
+ return gpu_addr < address_space_size;
+ }
+
private:
- [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
- void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
- GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
- [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
- bool start_32bit_address = false) const;
-
- void TryLockPage(PageEntry page_entry, std::size_t size);
- void TryUnlockPage(PageEntry page_entry, std::size_t size);
-
- void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
- bool is_safe) const;
- void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
- bool is_safe);
-
- [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
- return (gpu_addr >> page_bits) & page_table_mask;
+ template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
+ inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
+ FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
+
+ template <bool is_safe>
+ void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+
+ template <bool is_safe>
+ void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+
+ template <bool is_big_page>
+ [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
+ if constexpr (is_big_page) {
+ return (gpu_addr >> big_page_bits) & big_page_table_mask;
+ } else {
+ return (gpu_addr >> page_bits) & page_table_mask;
+ }
}
- static constexpr u64 address_space_size = 1ULL << 40;
- static constexpr u64 address_space_start = 1ULL << 32;
- static constexpr u64 address_space_start_low = 1ULL << 16;
- static constexpr u64 page_bits{16};
- static constexpr u64 page_size{1 << page_bits};
- static constexpr u64 page_mask{page_size - 1};
- static constexpr u64 page_table_bits{24};
- static constexpr u64 page_table_size{1 << page_table_bits};
- static constexpr u64 page_table_mask{page_table_size - 1};
+ inline bool IsBigPageContinous(size_t big_page_index) const;
+ inline void SetBigPageContinous(size_t big_page_index, bool value);
Core::System& system;
+ Core::Memory::Memory& memory;
+ Core::DeviceMemory& device_memory;
+
+ const u64 address_space_bits;
+ const u64 page_bits;
+ u64 address_space_size;
+ u64 page_size;
+ u64 page_mask;
+ u64 page_table_mask;
+ static constexpr u64 cpu_page_bits{12};
+
+ const u64 big_page_bits;
+ u64 big_page_size;
+ u64 big_page_mask;
+ u64 big_page_table_mask;
VideoCore::RasterizerInterface* rasterizer = nullptr;
- std::vector<PageEntry> page_table;
+ enum class EntryType : u64 {
+ Free = 0,
+ Reserved = 1,
+ Mapped = 2,
+ };
+
+ std::vector<u64> entries;
+ std::vector<u64> big_entries;
+
+ template <EntryType entry_type>
+ GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+
+ template <EntryType entry_type>
+ GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+
+ template <bool is_big_page>
+ inline EntryType GetEntry(size_t position) const;
+
+ template <bool is_big_page>
+ inline void SetEntry(size_t position, EntryType entry);
+
+ Common::MultiLevelPageTable<u32> page_table;
+ Common::VirtualBuffer<u32> big_page_table_cpu;
+
+ std::vector<u64> big_page_continous;
+
+ constexpr static size_t continous_bits = 64;
- using MapRange = std::pair<GPUVAddr, size_t>;
- std::vector<MapRange> map_ranges;
+ const size_t unique_identifier;
- std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
+ static std::atomic<size_t> unique_identifier_generator;
};
} // namespace Tegra
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 392f82eb7..b0ebe71b7 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -18,9 +17,8 @@
#include "common/assert.h"
#include "common/settings.h"
-#include "core/core.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -93,13 +91,10 @@ private:
};
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
-class QueryCacheBase {
+class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
- explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_)
- : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
+ : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -120,13 +115,13 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
ASSERT_OR_EXECUTE(cpu_addr, return;);
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+ u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
@@ -140,8 +135,10 @@ public:
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = maxwell3d.regs;
- Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+ if (maxwell3d) {
+ const auto& regs = maxwell3d->regs;
+ Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+ }
}
/// Resets a counter to zero. It doesn't disable the query after resetting.
@@ -217,8 +214,8 @@ private:
return cache_begin < addr_end && addr_begin < cache_end;
};
- const u64 page_end = addr_end >> PAGE_BITS;
- for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> YUZU_PAGEBITS;
+ for (u64 page = addr_begin >> YUZU_PAGEBITS; page <= page_end; ++page) {
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
@@ -238,14 +235,14 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
- const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
+ const u64 page = static_cast<u64>(cpu_addr) >> YUZU_PAGEBITS;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(VAddr addr) {
- const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
+ const u64 page = static_cast<u64>(addr) >> YUZU_PAGEBITS;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
@@ -263,12 +260,10 @@ private:
uncommitted_flushes->push_back(addr);
}
- static constexpr std::uintptr_t PAGE_SIZE = 4096;
- static constexpr unsigned PAGE_BITS = 12;
+ static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
+ static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index 4c9524702..4a197d65d 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
@@ -25,8 +24,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del
u64 cache_bytes = 0;
std::atomic_thread_fence(std::memory_order_acquire);
- const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
- for (u64 page = addr >> PAGE_BITS; page != page_end; ++page) {
+ const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
+ for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page);
if (delta > 0) {
@@ -45,26 +44,27 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del
if (uncache_bytes == 0) {
uncache_begin = page;
}
- uncache_bytes += PAGE_SIZE;
+ uncache_bytes += YUZU_PAGESIZE;
} else if (uncache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
+ cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes,
+ false);
uncache_bytes = 0;
}
if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
if (cache_bytes == 0) {
cache_begin = page;
}
- cache_bytes += PAGE_SIZE;
+ cache_bytes += YUZU_PAGESIZE;
} else if (cache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
+ cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
cache_bytes = 0;
}
}
if (uncache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
+ cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false);
}
if (cache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
+ cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
}
}
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
index 249644e50..7118b8aff 100644
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1f1f12291..d2d40884c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -17,6 +16,9 @@ class MemoryManager;
namespace Engines {
class AccelerateDMAInterface;
}
+namespace Control {
+struct ChannelState;
+}
} // namespace Tegra
namespace VideoCore {
@@ -60,7 +62,10 @@ public:
virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0;
/// Signal a GPU based semaphore as a fence
- virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
+ virtual void SignalFence(std::function<void()>&& func) = 0;
+
+ /// Send an operation to be done after a certain amount of flushes.
+ virtual void SyncOperation(std::function<void()>&& func) = 0;
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
@@ -87,13 +92,13 @@ public:
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
- virtual void SyncGuestHost() = 0;
+ virtual void InvalidateGPUCache() = 0;
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
/// Remap GPU memory range. This means underneath backing memory changed
- virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+ virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
@@ -124,7 +129,7 @@ public:
[[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
- std::span<u8> memory) = 0;
+ std::span<const u8> memory) = 0;
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
@@ -138,5 +143,11 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
+
+ virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
+
+ virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
+
+ virtual void ReleaseChannel(s32 channel_id) {}
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index a99c33c37..45791aa75 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -1,9 +1,7 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2015 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/log.h"
-#include "common/settings.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_base.h"
@@ -27,6 +25,10 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
}
+bool RendererBase::IsScreenshotPending() const {
+ return renderer_settings.screenshot_requested;
+}
+
void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout) {
if (renderer_settings.screenshot_requested) {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index c5f974080..8d20cbece 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -83,6 +82,9 @@ public:
/// Refreshes the settings common to all renderers
void RefreshBaseSettings();
+ /// Returns true if a screenshot is being processed
+ bool IsScreenshotPending() const;
+
/// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index d4dd10bb6..08f4d69ab 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <span>
@@ -135,6 +134,20 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
+
+ device_access_memory = [this]() -> u64 {
+ if (device.CanReportMemoryUsage()) {
+ return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
+ }
+ return 2_GiB; // Return minimum requirements
+ }();
+}
+
+u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
+ if (device.CanReportMemoryUsage()) {
+ return device_access_memory - device.GetCurrentDedicatedVideoMemory();
+ }
+ return 2_GiB;
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
@@ -155,7 +168,7 @@ void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
- static_cast<GLsizeiptr>(size));
+ static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 060d36427..a8c3f8b67 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -1,15 +1,12 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
-#include "common/alignment.h"
#include "common/common_types.h"
-#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -91,6 +88,8 @@ public:
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
+ u64 GetDeviceMemoryUsage() const;
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
@@ -153,6 +152,14 @@ public:
use_storage_buffers = use_storage_buffers_;
}
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
+ }
+
+ bool CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -186,6 +193,8 @@ private:
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
u32 index_buffer_offset = 0;
+
+ u64 device_access_memory;
};
struct BufferCacheParams {
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 5c1f21c65..26d066004 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
@@ -29,12 +28,11 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
}
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
- BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- ProgramManager& program_manager_, const Shader::Info& info_,
- std::string code, std::vector<u32> code_v)
- : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
- kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ const Shader::Info& info_, std::string code,
+ std::vector<u32> code_v)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
@@ -87,7 +85,7 @@ void ComputePipeline::Configure() {
GLsizei texture_binding{};
GLsizei image_binding{};
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
@@ -102,12 +100,13 @@ void ComputePipeline::Configure() {
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
- const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
- const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left};
+ const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr)
+ << desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
- return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 50c676365..6534dec32 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -1,12 +1,10 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <type_traits>
-#include <utility>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
@@ -51,10 +49,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
- BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- ProgramManager& program_manager_, const Shader::Info& info_,
- std::string code, std::vector<u32> code_v);
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ const Shader::Info& info_, std::string code, std::vector<u32> code_v);
void Configure();
@@ -62,11 +58,17 @@ public:
return writes_global_memory;
}
+ void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
+ Tegra::MemoryManager* gpu_memory_) {
+ kepler_compute = kepler_compute_;
+ gpu_memory = gpu_memory_;
+ }
+
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager* gpu_memory;
+ Tegra::Engines::KeplerCompute* kepler_compute;
ProgramManager& program_manager;
Shader::Info info;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e62912a22..1663e277d 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -1,13 +1,10 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
-#include <limits>
#include <optional>
#include <span>
#include <stdexcept>
@@ -15,13 +12,15 @@
#include <glad/glad.h>
+#include "common/literals.h"
#include "common/logging/log.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+using namespace Common::Literals;
+
namespace OpenGL {
namespace {
constexpr std::array LIMIT_UBOS = {
@@ -168,6 +167,7 @@ Device::Device() {
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
+ can_report_memory = GLAD_GL_NVX_gpu_memory_info;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -279,4 +279,10 @@ void main() {
})");
}
+u64 Device::GetCurrentDedicatedVideoMemory() const {
+ GLint cur_avail_mem_kb = 0;
+ glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
+ return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 95c2e8d38..5ef51ebcf 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -20,6 +19,8 @@ public:
[[nodiscard]] std::string GetVendorName() const;
+ u64 GetCurrentDedicatedVideoMemory() const;
+
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
return max_uniform_buffers[static_cast<size_t>(stage)];
}
@@ -168,6 +169,10 @@ public:
return vendor_name == "ATI Technologies Inc.";
}
+ bool CanReportMemoryUsage() const {
+ return can_report_memory;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -210,6 +215,7 @@ private:
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
bool has_bool_ref_bug{};
+ bool can_report_memory{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 151290101..91463f854 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
@@ -11,10 +10,7 @@
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
-
-GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
- : FenceBase{address_, payload_, is_stubbed_} {}
+GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@@ -31,9 +27,8 @@ bool GLInnerFence::IsSignaled() const {
return true;
}
ASSERT(sync_object.handle != 0);
- GLsizei length;
GLint sync_status;
- glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+ glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
return sync_status == GL_SIGNALED;
}
@@ -50,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
-Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
- return std::make_shared<GLInnerFence>(value, is_stubbed);
-}
-
-Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
- return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
+Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
+ return std::make_shared<GLInnerFence>(is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index e714aa115..f1446e732 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -17,8 +16,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
- explicit GLInnerFence(u32 payload_, bool is_stubbed_);
- explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
+ explicit GLInnerFence(bool is_stubbed_);
~GLInnerFence();
void Queue();
@@ -41,8 +39,7 @@ public:
QueryCache& query_cache);
protected:
- Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..41493a7da 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -49,7 +48,7 @@ GLenum Stage(size_t stage_index) {
case 4:
return GL_FRAGMENT_SHADER;
}
- UNREACHABLE_MSG("{}", stage_index);
+ ASSERT_MSG(false, "{}", stage_index);
return GL_NONE;
}
@@ -66,7 +65,7 @@ GLenum AssemblyStage(size_t stage_index) {
case 4:
return GL_FRAGMENT_PROGRAM_NV;
}
- UNREACHABLE_MSG("{}", stage_index);
+ ASSERT_MSG(false, "{}", stage_index);
return GL_NONE;
}
@@ -170,15 +169,15 @@ ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 ena
}
} // Anonymous namespace
-GraphicsPipeline::GraphicsPipeline(
- const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
- Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
- ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
- VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
- std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_)
- : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
- gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, ShaderWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify,
+ std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv,
+ const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, key{key_} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
@@ -243,10 +242,6 @@ GraphicsPipeline::GraphicsPipeline(
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
- if (in_parallel) {
- // Make sure program is built before continuing when building in parallel
- glGetString(GL_PROGRAM_ERROR_STRING_NV);
- }
}
break;
case Settings::ShaderBackend::SPIRV:
@@ -256,20 +251,18 @@ GraphicsPipeline::GraphicsPipeline(
break;
}
}
- if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
- // Make sure programs have built if we are building shaders in parallel
- for (OGLProgram& program : source_programs) {
- if (program.handle != 0) {
- GLint status{};
- glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
- }
- }
+ if (in_parallel) {
+ std::scoped_lock lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
- is_built = true;
- built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
@@ -292,7 +285,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
- const auto& regs{maxwell3d.regs};
+ const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
const Shader::Info& info{stage_infos[stage]};
@@ -306,7 +299,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
++ssbo_index;
}
}
- const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(cbufs[desc.cbuf_index].enabled);
const u32 index_offset{index << desc.size_shift};
@@ -319,13 +312,14 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
second_offset};
- const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
- const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left};
+ const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr)
+ << desc.secondary_shift_left};
const u32 raw{lhs_raw | rhs_raw};
return TexturePair(raw, via_header_index);
}
}
- return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
- if (!is_built.load(std::memory_order::relaxed)) {
+ if (!IsBuilt()) {
WaitForBuild();
}
const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
void GraphicsPipeline::WaitForBuild() {
- std::unique_lock lock{built_mutex};
- built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
+bool GraphicsPipeline::IsBuilt() noexcept {
+ if (is_built) {
+ return true;
+ }
+ if (built_fence.handle == 0) {
+ return false;
+ }
+ // Timeout of zero means this is non-blocking
+ const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ return is_built;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..a0f0e63cb 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -14,7 +13,6 @@
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
@@ -73,10 +71,9 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
- BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- ProgramManager& program_manager_, StateTracker& state_tracker_,
- ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, ShaderWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
@@ -100,9 +97,7 @@ public:
return writes_global_memory;
}
- [[nodiscard]] bool IsBuilt() const noexcept {
- return is_built.load(std::memory_order::relaxed);
- }
+ [[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
@@ -111,6 +106,11 @@ public:
};
}
+ void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
+ maxwell3d = maxwell3d_;
+ gpu_memory = gpu_memory_;
+ }
+
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
@@ -123,8 +123,8 @@ private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager* gpu_memory;
+ Tegra::Engines::Maxwell3D* maxwell3d;
ProgramManager& program_manager;
StateTracker& state_tracker;
const GraphicsPipelineKey key;
@@ -154,7 +154,8 @@ private:
std::mutex built_mutex;
std::condition_variable built_condvar;
- std::atomic_bool is_built{false};
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index acebbf5f4..5070db441 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -1,17 +1,13 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
-#include <cstring>
#include <memory>
-#include <unordered_map>
#include <utility>
#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -30,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_)
- : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
+ : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 7bbe5cfe9..14ce59990 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -29,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_);
+ explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 142412a8e..c2d80605d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1,14 +1,11 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2015 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <bitset>
#include <memory>
-#include <string>
#include <string_view>
-#include <tuple>
#include <utility>
#include <glad/glad.h>
@@ -17,8 +14,9 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "common/settings.h"
-#include "core/memory.h"
+#include "video_core/control/channel_state.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -58,22 +56,20 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
- : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
- kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
- screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+ : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
+ program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker),
- texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
- buffer_cache_runtime(device),
- buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
- buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
- query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+ texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+ buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+ shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
+ state_tracker, gpu.ShaderNotify()),
+ query_cache(*this), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::SyncVertexFormats() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
@@ -91,7 +87,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
}
flags[Dirty::VertexFormat0 + index] = false;
- const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
+ const auto attrib = maxwell3d->regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
@@ -115,13 +111,13 @@ void RasterizerOpenGL::SyncVertexFormats() {
}
void RasterizerOpenGL::SyncVertexInstances() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
@@ -142,11 +138,11 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerOpenGL::Clear() {
MICROPROFILE_SCOPE(OpenGL_Clears);
- if (!maxwell3d.ShouldExecute()) {
+ if (!maxwell3d->ShouldExecute()) {
return;
}
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -212,28 +208,33 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
}
+
+ gpu.TickWork();
+
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
SyncState();
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology);
BeginTransformFeedback(pipeline, primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
+ const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.vb_base_instance);
const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
+ static_cast<GLsizei>(is_instanced ? maxwell3d->mme_draw.instance_count : 1);
if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
- const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_array.count);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
- const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
@@ -252,8 +253,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance);
}
} else {
- const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
@@ -267,8 +268,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
-
- gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute() {
@@ -276,8 +275,9 @@ void RasterizerOpenGL::DispatchCompute() {
if (!pipeline) {
return;
}
+ pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
@@ -362,7 +362,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
}
}
-void RasterizerOpenGL::SyncGuestHost() {
+void RasterizerOpenGL::InvalidateGPUCache() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
shader_cache.SyncGuestHost();
{
@@ -383,40 +383,30 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
}
-void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
- texture_cache.UnmapGPUMemory(addr, size);
+ texture_cache.UnmapGPUMemory(as_id, addr, size);
}
}
-void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- if (!gpu.IsAsync()) {
- gpu_memory.Write<u32>(addr, value);
- return;
- }
- fence_manager.SignalSemaphore(addr, value);
+void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
+ fence_manager.SignalFence(std::move(func));
+}
+
+void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
+ fence_manager.SyncOperation(std::move(func));
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
- if (!gpu.IsAsync()) {
- gpu.IncrementSyncPoint(value);
- return;
- }
fence_manager.SignalSyncPoint(value);
}
void RasterizerOpenGL::SignalReference() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.SignalOrdering();
}
void RasterizerOpenGL::ReleaseFences() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.WaitPendingFences();
}
@@ -433,6 +423,7 @@ void RasterizerOpenGL::WaitForIdle() {
}
void RasterizerOpenGL::FragmentBarrier() {
+ glTextureBarrier();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
@@ -485,13 +476,13 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
}
void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
- std::span<u8> memory) {
- auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ std::span<const u8> memory) {
+ auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
- gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
- gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -522,6 +513,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
+ screen_info.texture.width = image_view->size.width;
+ screen_info.texture.height = image_view->size.height;
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
@@ -552,19 +545,25 @@ void RasterizerOpenGL::SyncState() {
}
void RasterizerOpenGL::SyncViewport() {
- auto& flags = maxwell3d.dirty.flags;
- const auto& regs = maxwell3d.regs;
+ auto& flags = maxwell3d->dirty.flags;
+ const auto& regs = maxwell3d->regs;
const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
const bool dirty_clip_control = flags[Dirty::ClipControl];
- if (dirty_clip_control || flags[Dirty::FrontFace]) {
+ if (dirty_viewport || dirty_clip_control || flags[Dirty::FrontFace]) {
flags[Dirty::FrontFace] = false;
GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
- if (regs.screen_y_control.triangle_rast_flip != 0 &&
- regs.viewport_transform[0].scale_y < 0.0f) {
+ bool flip_faces = true;
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
+ flip_faces = !flip_faces;
+ }
+ if (regs.viewport_transform[0].scale_y < 0.0f) {
+ flip_faces = !flip_faces;
+ }
+ if (flip_faces) {
switch (mode) {
case GL_CW:
mode = GL_CCW;
@@ -652,23 +651,23 @@ void RasterizerOpenGL::SyncViewport() {
}
void RasterizerOpenGL::SyncDepthClamp() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+ oglEnable(GL_DEPTH_CLAMP, maxwell3d->regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
- clip_mask &= maxwell3d.regs.clip_distance_enabled;
+ clip_mask &= maxwell3d->regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) {
return;
}
@@ -684,8 +683,8 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& flags = maxwell3d.dirty.flags;
- const auto& regs = maxwell3d.regs;
+ auto& flags = maxwell3d->dirty.flags;
+ const auto& regs = maxwell3d->regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
@@ -700,23 +699,23 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
- if (maxwell3d.regs.primitive_restart.enabled) {
+ if (maxwell3d->regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
- glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
+ glPrimitiveRestartIndex(maxwell3d->regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
- auto& flags = maxwell3d.dirty.flags;
- const auto& regs = maxwell3d.regs;
+ auto& flags = maxwell3d->dirty.flags;
+ const auto& regs = maxwell3d->regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
@@ -735,13 +734,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -766,23 +765,23 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncRasterizeEnable() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
- oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
+ oglEnable(GL_RASTERIZER_DISCARD, maxwell3d->regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
@@ -815,7 +814,7 @@ void RasterizerOpenGL::SyncPolygonModes() {
}
void RasterizerOpenGL::SyncColorMask() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
@@ -824,7 +823,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
@@ -849,30 +848,30 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
- glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d->regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& flags = maxwell3d.dirty.flags;
- const auto& regs = maxwell3d.regs;
+ auto& flags = maxwell3d->dirty.flags;
+ const auto& regs = maxwell3d->regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
@@ -929,13 +928,13 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -945,7 +944,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
return;
}
@@ -954,7 +953,7 @@ void RasterizerOpenGL::SyncScissorTest() {
const bool force = flags[VideoCommon::Dirty::RescaleScissors];
flags[VideoCommon::Dirty::RescaleScissors] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
const auto& resolution = Settings::values.resolution_info;
const bool is_rescaling{texture_cache.IsRescaling()};
@@ -990,39 +989,39 @@ void RasterizerOpenGL::SyncScissorTest() {
}
void RasterizerOpenGL::SyncPointState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
- oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
- oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
+ oglEnable(GL_POINT_SPRITE, maxwell3d->regs.point_sprite_enable);
+ oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d->regs.vp_point_size.enable);
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
- glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale));
+ glPointSize(std::max(1.0f, maxwell3d->regs.point_size * scale));
}
void RasterizerOpenGL::SyncLineState() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1036,13 +1035,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::SyncAlphaTest() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.alpha_test_enabled) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
@@ -1052,17 +1051,17 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
- oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
+ oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d->regs.framebuffer_srgb);
}
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1081,11 +1080,48 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum
}
void RasterizerOpenGL::EndTransformFeedback() {
- if (maxwell3d.regs.tfb_enabled != 0) {
+ if (maxwell3d->regs.tfb_enabled != 0) {
glEndTransformFeedback();
}
}
+void RasterizerOpenGL::InitializeChannel(Tegra::Control::ChannelState& channel) {
+ CreateChannel(channel);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.CreateChannel(channel);
+ buffer_cache.CreateChannel(channel);
+ }
+ shader_cache.CreateChannel(channel);
+ query_cache.CreateChannel(channel);
+ state_tracker.SetupTables(channel);
+}
+
+void RasterizerOpenGL::BindChannel(Tegra::Control::ChannelState& channel) {
+ const s32 channel_id = channel.bind_id;
+ BindToChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.BindToChannel(channel_id);
+ buffer_cache.BindToChannel(channel_id);
+ }
+ shader_cache.BindToChannel(channel_id);
+ query_cache.BindToChannel(channel_id);
+ state_tracker.ChangeChannel(channel);
+ state_tracker.InvalidateState();
+}
+
+void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
+ EraseChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.EraseChannel(channel_id);
+ buffer_cache.EraseChannel(channel_id);
+ }
+ shader_cache.EraseChannel(channel_id);
+ query_cache.EraseChannel(channel_id);
+}
+
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 98f6fd342..45131b785 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -1,24 +1,18 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2015 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
-#include <atomic>
#include <cstddef>
-#include <memory>
#include <optional>
-#include <tuple>
-#include <utility>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_info.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
@@ -26,12 +20,8 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/textures/texture.h"
namespace Core::Memory {
class Memory;
@@ -69,7 +59,8 @@ private:
BufferCache& buffer_cache;
};
-class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
+class RasterizerOpenGL : public VideoCore::RasterizerAccelerated,
+ protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
@@ -89,10 +80,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
- void SyncGuestHost() override;
+ void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
- void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
- void SignalSemaphore(GPUVAddr addr, u32 value) override;
+ void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
+ void SignalFence(std::function<void()>&& func) override;
+ void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
@@ -107,7 +99,7 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
- std::span<u8> memory) override;
+ std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
@@ -118,6 +110,12 @@ public:
return num_queued_commands > 0;
}
+ void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+
+ void BindChannel(Tegra::Control::ChannelState& channel) override;
+
+ void ReleaseChannel(s32 channel_id) override;
+
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
@@ -202,9 +200,6 @@ private:
void EndTransformFeedback();
Tegra::GPU& gpu;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
const Device& device;
ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 5e7101d28..3a664fdec 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -1,11 +1,8 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2015 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
-#include <utility>
#include <glad/glad.h>
-#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 84e07f8bd..bc05ba4bd 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -1,6 +1,5 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2015 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f71e01a34..5a29a41d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
#include <fstream>
@@ -14,10 +13,8 @@
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/thread_worker.h"
-#include "core/core.h"
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "shader_recompiler/backend/glsl/emit_glsl.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
@@ -29,7 +26,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -53,7 +49,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
-constexpr u32 CACHE_VERSION = 5;
+constexpr u32 CACHE_VERSION = 6;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -89,7 +85,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Maxwell::TessellationPrimitive::Quads:
return Shader::TessPrimitive::Quads;
}
- UNREACHABLE();
+ ASSERT(false);
return Shader::TessPrimitive::Triangles;
}();
info.tess_spacing = [&] {
@@ -101,7 +97,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Maxwell::TessellationSpacing::FractionalEven:
return Shader::TessSpacing::FractionalEven;
}
- UNREACHABLE();
+ ASSERT(false);
return Shader::TessSpacing::Equal;
}();
break;
@@ -155,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
} // Anonymous namespace
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- TextureCache& texture_cache_, BufferCache& buffer_cache_,
- ProgramManager& program_manager_, StateTracker& state_tracker_,
- VideoCore::ShaderNotify& shader_notify_)
- : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
- emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
- buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
- shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ const Device& device_, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, shader_notify{shader_notify_},
+ use_asynchronous_shaders{device.UseAsynchronousShaders()},
profile{
.supported_spirv = 0x00010000,
@@ -261,7 +254,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
[this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
ctx->pools.ReleaseContents();
auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
}
@@ -283,7 +276,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
}
ctx->pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
}
@@ -303,7 +296,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
state.has_loaded = true;
lock.unlock();
- workers->WaitForRequests();
+ workers->WaitForRequests(stop_loading);
if (!use_asynchronous_shaders) {
workers.reset();
}
@@ -314,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
- const auto& regs{maxwell3d.regs};
+ const auto& regs{maxwell3d->regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@@ -355,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
- if (maxwell3d.regs.zeta_enable) {
+ if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
- if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@@ -372,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const ComputePipelineKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@@ -484,9 +477,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
previous_program = &program;
}
auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
- return std::make_unique<GraphicsPipeline>(
- device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
- thread_worker, &shader_notify, sources, sources_spirv, infos, key);
+ return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager,
+ state_tracker, thread_worker, &shader_notify, sources,
+ sources_spirv, infos, key);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -495,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
@@ -540,9 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
break;
}
- return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
- kepler_compute, program_manager, program.info, code,
- code_spirv);
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager,
+ program.info, code, code_spirv);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
return nullptr;
@@ -550,7 +542,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
- "yuzu:ShaderBuilder",
+ "GlShaderBuilder",
[this] { return Context{emu_window}; });
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a34110b37..89f181fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -1,21 +1,15 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include <array>
#include <filesystem>
#include <stop_token>
#include <unordered_map>
-#include <glad/glad.h>
-
#include "common/common_types.h"
#include "common/thread_worker.h"
-#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
-#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
@@ -36,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- TextureCache& texture_cache_, BufferCache& buffer_cache_,
- ProgramManager& program_manager_, StateTracker& state_tracker_,
- VideoCore::ShaderNotify& shader_notify_);
+ const Device& device_, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
index 6ff34e5d6..ca2bd8e8e 100644
--- a/src/video_core/renderer_opengl/gl_shader_context.h
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 399959afb..d9c29d8b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,3 +1,2 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index d7ef0775d..a84f5aeb3 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index d432072ad..a0d9d10ef 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -1,12 +1,10 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -19,6 +17,7 @@ static OGLProgram LinkSeparableProgram(GLuint shader) {
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
+ glDetachShader(program.handle, shader);
if (!Settings::values.renderer_debug) {
return program;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 4e1a2a8e1..43ebcdeba 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -1,18 +1,13 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
-#include <string>
#include <string_view>
-#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 586da84e3..a8f3a0f57 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -8,8 +7,8 @@
#include "common/common_types.h"
#include "core/core.h"
+#include "video_core/control/channel_state.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
@@ -203,9 +202,8 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
- auto& dirty = gpu.Maxwell3D().dirty;
- auto& tables = dirty.tables;
+void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
+ auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
@@ -231,4 +229,14 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyMisc(tables);
}
+void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
+ flags = &channel_state.maxwell_3d->dirty.flags;
+}
+
+void StateTracker::InvalidateState() {
+ flags->set();
+}
+
+StateTracker::StateTracker() : flags{&default_flags} {}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 5864c7c07..19bcf3f35 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,13 +8,14 @@
#include <glad/glad.h>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
-class GPU;
+namespace Control {
+struct ChannelState;
}
+} // namespace Tegra
namespace OpenGL {
@@ -85,7 +85,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker {
public:
- explicit StateTracker(Tegra::GPU& gpu);
+ explicit StateTracker();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
@@ -123,94 +123,107 @@ public:
}
void NotifyScreenDrawVertexArray() {
- flags[OpenGL::Dirty::VertexFormats] = true;
- flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
- flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
+ (*flags)[OpenGL::Dirty::VertexFormats] = true;
+ (*flags)[OpenGL::Dirty::VertexFormat0 + 0] = true;
+ (*flags)[OpenGL::Dirty::VertexFormat0 + 1] = true;
- flags[VideoCommon::Dirty::VertexBuffers] = true;
- flags[VideoCommon::Dirty::VertexBuffer0] = true;
+ (*flags)[VideoCommon::Dirty::VertexBuffers] = true;
+ (*flags)[VideoCommon::Dirty::VertexBuffer0] = true;
- flags[OpenGL::Dirty::VertexInstances] = true;
- flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
- flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
+ (*flags)[OpenGL::Dirty::VertexInstances] = true;
+ (*flags)[OpenGL::Dirty::VertexInstance0 + 0] = true;
+ (*flags)[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
- flags[OpenGL::Dirty::PolygonModes] = true;
- flags[OpenGL::Dirty::PolygonModeFront] = true;
- flags[OpenGL::Dirty::PolygonModeBack] = true;
+ (*flags)[OpenGL::Dirty::PolygonModes] = true;
+ (*flags)[OpenGL::Dirty::PolygonModeFront] = true;
+ (*flags)[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
- flags[OpenGL::Dirty::Viewports] = true;
- flags[OpenGL::Dirty::Viewport0] = true;
+ (*flags)[OpenGL::Dirty::Viewports] = true;
+ (*flags)[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
- flags[OpenGL::Dirty::Scissors] = true;
- flags[OpenGL::Dirty::Scissor0] = true;
+ (*flags)[OpenGL::Dirty::Scissors] = true;
+ (*flags)[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask(size_t index) {
- flags[OpenGL::Dirty::ColorMasks] = true;
- flags[OpenGL::Dirty::ColorMask0 + index] = true;
+ (*flags)[OpenGL::Dirty::ColorMasks] = true;
+ (*flags)[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
- flags[OpenGL::Dirty::BlendStates] = true;
- flags[OpenGL::Dirty::BlendState0] = true;
+ (*flags)[OpenGL::Dirty::BlendStates] = true;
+ (*flags)[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
- flags[VideoCommon::Dirty::RenderTargets] = true;
+ (*flags)[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
- flags[OpenGL::Dirty::FrontFace] = true;
+ (*flags)[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
- flags[OpenGL::Dirty::CullTest] = true;
+ (*flags)[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
- flags[OpenGL::Dirty::DepthMask] = true;
+ (*flags)[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
- flags[OpenGL::Dirty::DepthTest] = true;
+ (*flags)[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
- flags[OpenGL::Dirty::StencilTest] = true;
+ (*flags)[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
- flags[OpenGL::Dirty::PolygonOffset] = true;
+ (*flags)[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
- flags[OpenGL::Dirty::RasterizeEnable] = true;
+ (*flags)[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
- flags[OpenGL::Dirty::FramebufferSRGB] = true;
+ (*flags)[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
- flags[OpenGL::Dirty::LogicOp] = true;
+ (*flags)[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
- flags[OpenGL::Dirty::ClipControl] = true;
+ (*flags)[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
- flags[OpenGL::Dirty::AlphaTest] = true;
+ (*flags)[OpenGL::Dirty::AlphaTest] = true;
}
+ void NotifyRange(u8 start, u8 end) {
+ for (auto flag = start; flag <= end; flag++) {
+ (*flags)[flag] = true;
+ }
+ }
+
+ void SetupTables(Tegra::Control::ChannelState& channel_state);
+
+ void ChangeChannel(Tegra::Control::ChannelState& channel_state);
+
+ void InvalidateState();
+
private:
- Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags{};
GLuint framebuffer = 0;
GLuint index_buffer = 0;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 77b3ee0fe..2005c8993 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 2e67922a6..8fe927aaf 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,11 +1,9 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
-#include <memory>
#include <span>
#include <utility>
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 3c1f79a27..99cd11d1e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -84,7 +83,7 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
case ImageType::Buffer:
return GL_TEXTURE_BUFFER;
}
- UNREACHABLE_MSG("Invalid image type={}", info.type);
+ ASSERT_MSG(false, "Invalid image type={}", info.type);
return GL_NONE;
}
@@ -94,6 +93,7 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
case Shader::TextureType::Color2D:
+ case Shader::TextureType::Color2DRect:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
@@ -108,7 +108,7 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
- UNREACHABLE_MSG("Invalid image view type={}", type);
+ ASSERT_MSG(false, "Invalid image view type={}", type);
return GL_NONE;
}
@@ -120,7 +120,7 @@ GLenum TextureMode(PixelFormat format, bool is_first) {
case PixelFormat::S8_UINT_D24_UNORM:
return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
- UNREACHABLE();
+ ASSERT(false);
return GL_DEPTH_COMPONENT;
}
}
@@ -141,7 +141,7 @@ GLint Swizzle(SwizzleSource source) {
case SwizzleSource::OneFloat:
return GL_ONE;
}
- UNREACHABLE_MSG("Invalid swizzle source={}", source);
+ ASSERT_MSG(false, "Invalid swizzle source={}", source);
return GL_NONE;
}
@@ -182,6 +182,26 @@ GLenum AttachmentType(PixelFormat format) {
}
}
+GLint ConvertA5B5G5R1_UNORM(SwizzleSource source) {
+ switch (source) {
+ case SwizzleSource::Zero:
+ return GL_ZERO;
+ case SwizzleSource::R:
+ return GL_ALPHA;
+ case SwizzleSource::G:
+ return GL_BLUE;
+ case SwizzleSource::B:
+ return GL_GREEN;
+ case SwizzleSource::A:
+ return GL_RED;
+ case SwizzleSource::OneInt:
+ case SwizzleSource::OneFloat:
+ return GL_ONE;
+ }
+ ASSERT_MSG(false, "Invalid swizzle source={}", source);
+ return GL_NONE;
+}
+
void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
switch (format) {
case PixelFormat::D24_UNORM_S8_UINT:
@@ -192,6 +212,12 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
TextureMode(format, swizzle[0] == SwizzleSource::R));
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
+ case PixelFormat::A5B5G5R1_UNORM: {
+ std::array<GLint, 4> gl_swizzle;
+ std::ranges::transform(swizzle, gl_swizzle.begin(), ConvertA5B5G5R1_UNORM);
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+ return;
+ }
default:
break;
}
@@ -356,10 +382,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
- UNREACHABLE();
+ ASSERT(false);
break;
default:
- UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+ ASSERT_MSG(false, "Invalid target=0x{:x}", target);
break;
}
return texture;
@@ -395,7 +421,7 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
case Shader::ImageFormat::R32G32B32A32_UINT:
return GL_RGBA32UI;
}
- UNREACHABLE_MSG("Invalid image format={}", format);
+ ASSERT_MSG(false, "Invalid image format={}", format);
return GL_R32UI;
}
@@ -409,8 +435,8 @@ ImageBufferMap::~ImageBufferMap() {
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_},
- util_shaders(program_manager), resolution{Settings::values.resolution_info} {
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
+ format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -477,6 +503,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
+ set_view(Shader::TextureType::Color2DRect, null_image_view_2d.handle);
if (resolution.active) {
for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) {
@@ -484,6 +511,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
rescale_read_fbos[i].Create();
}
}
+
+ device_access_memory = [this]() -> u64 {
+ if (device.CanReportMemoryUsage()) {
+ return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
+ }
+ return 2_GiB; // Return minimum requirements
+ }();
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -500,13 +534,11 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
-u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
- if (GLAD_GL_NVX_gpu_memory_info) {
- GLint cur_avail_mem_kb = 0;
- glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
- return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
+u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
+ if (device.CanReportMemoryUsage()) {
+ return device_access_memory - device.GetCurrentDedicatedVideoMemory();
}
- return 2_GiB; // Return minimum requirements
+ return 2_GiB;
}
void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
@@ -549,7 +581,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
} else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
format_conversion_pass.ConvertImage(dst, src, copies);
} else {
- UNREACHABLE();
+ ASSERT(false);
}
}
@@ -590,7 +622,7 @@ void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferM
case ImageType::Linear:
return util_shaders.PitchUpload(image, map, swizzles);
default:
- UNREACHABLE();
+ ASSERT(false);
break;
}
}
@@ -609,7 +641,7 @@ FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal
case ImageType::e3D:
return format_properties[2].at(internal_format);
default:
- UNREACHABLE();
+ ASSERT(false);
return FormatProperties{};
}
}
@@ -686,6 +718,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
}
if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
+ flags |= ImageFlagBits::CostlyLoad;
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
@@ -857,7 +890,7 @@ void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t b
}
break;
default:
- UNREACHABLE();
+ ASSERT(false);
}
}
@@ -893,7 +926,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
depth = copy.image_extent.depth;
break;
default:
- UNREACHABLE();
+ ASSERT(false);
}
// Compressed formats don't have a pixel format or type
const bool is_compressed = gl_format == GL_NONE;
@@ -919,7 +952,7 @@ void Image::Scale(bool up_scale) {
case SurfaceType::DepthStencil:
return GL_DEPTH_STENCIL_ATTACHMENT;
default:
- UNREACHABLE();
+ ASSERT(false);
return GL_COLOR_ATTACHMENT0;
}
}();
@@ -934,7 +967,7 @@ void Image::Scale(bool up_scale) {
case SurfaceType::DepthStencil:
return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
default:
- UNREACHABLE();
+ ASSERT(false);
return GL_COLOR_BUFFER_BIT;
}
}();
@@ -949,7 +982,7 @@ void Image::Scale(bool up_scale) {
case SurfaceType::DepthStencil:
return 3;
default:
- UNREACHABLE();
+ ASSERT(false);
return 0;
}
}();
@@ -1014,7 +1047,7 @@ bool Image::ScaleUp(bool ignore) {
return false;
}
if (info.type == ImageType::Linear) {
- UNREACHABLE();
+ ASSERT(false);
return false;
}
flags |= ImageFlagBits::Rescaled;
@@ -1079,6 +1112,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e2D:
+ case ImageViewType::Rect:
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
// 2D and 2D array views on a 3D textures are used exclusively for render targets
ASSERT(info.range.extent.levels == 1);
@@ -1104,11 +1138,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
SetupView(Shader::TextureType::ColorCube);
SetupView(Shader::TextureType::ColorArrayCube);
break;
- case ImageViewType::Rect:
- UNIMPLEMENTED();
- break;
case ImageViewType::Buffer:
- UNREACHABLE();
+ ASSERT(false);
break;
}
switch (info.type) {
@@ -1119,6 +1150,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
default_handle = Handle(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2D:
+ case ImageViewType::Rect:
default_handle = Handle(Shader::TextureType::Color2D);
break;
case ImageViewType::e2DArray:
@@ -1179,6 +1211,7 @@ GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
case Shader::TextureType::Color1D:
case Shader::TextureType::Color2D:
case Shader::TextureType::ColorCube:
+ case Shader::TextureType::Color2DRect:
view_range = flat_range;
break;
case Shader::TextureType::ColorArray1D:
@@ -1219,7 +1252,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
- UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
sampler.Create();
const GLuint handle = sampler.handle;
@@ -1288,7 +1320,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
break;
default:
- UNREACHABLE();
+ ASSERT(false);
buffer_bits |= GL_DEPTH_BUFFER_BIT;
break;
}
@@ -1319,6 +1351,9 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
Framebuffer::~Framebuffer() = default;
+FormatConversionPass::FormatConversionPass(UtilShaders& util_shaders_)
+ : util_shaders{util_shaders_} {}
+
void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
const GLenum dst_target = ImageTarget(dst_image.info);
@@ -1351,6 +1386,12 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
dst_origin.z, region.width, region.height, region.depth,
dst_image.GlFormat(), dst_image.GlType(), nullptr);
}
+
+ // Swap component order of S8D24 to ABGR8 reinterprets
+ if (src_image.info.format == PixelFormat::D24_UNORM_S8_UINT &&
+ dst_image.info.format == PixelFormat::A8B8G8R8_UNORM) {
+ util_shaders.ConvertS8D24(dst_image, copies);
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 7f425631f..113528e9b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -10,6 +9,7 @@
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -21,7 +21,6 @@ struct ResolutionScalingInfo;
namespace OpenGL {
-class Device;
class ProgramManager;
class StateTracker;
@@ -55,13 +54,14 @@ struct FormatProperties {
class FormatConversionPass {
public:
- FormatConversionPass() = default;
+ explicit FormatConversionPass(UtilShaders& util_shaders);
~FormatConversionPass() = default;
void ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
+ UtilShaders& util_shaders;
OGLBuffer intermediate_pbo;
size_t pbo_size{};
};
@@ -83,7 +83,15 @@ public:
ImageBufferMap DownloadStagingBuffer(size_t size);
- u64 GetDeviceLocalMemory() const;
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
+ }
+
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+ }
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
return true;
@@ -172,6 +180,7 @@ private:
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
+ u64 device_access_memory;
};
class Image : public VideoCommon::ImageBase {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache_base.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
index 385358fea..64416fd51 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index daba42ed9..004421236 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -30,6 +29,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
@@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
@@ -97,6 +98,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
@@ -184,6 +188,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
+ case Maxwell::VertexAttribute::Size::Size_11_11_10:
+ return GL_UNSIGNED_INT_10F_11F_11F_REV;
default:
break;
}
@@ -203,7 +209,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- UNREACHABLE_MSG("Invalid index_format={}", index_format);
+ ASSERT_MSG(false, "Invalid index_format={}", index_format);
return {};
}
@@ -240,7 +246,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
case Maxwell::PrimitiveTopology::Patches:
return GL_PATCHES;
}
- UNREACHABLE_MSG("Invalid topology={}", topology);
+ ASSERT_MSG(false, "Invalid topology={}", topology);
return GL_POINTS;
}
@@ -268,8 +274,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
}
break;
}
- UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
- mipmap_filter_mode);
+ ASSERT_MSG(false, "Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
+ mipmap_filter_mode);
return GL_NEAREST;
}
@@ -547,7 +553,7 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
- UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
+ ASSERT_MSG(false, "Invalid polygon mode={}", polygon_mode);
return GL_FILL;
}
@@ -560,7 +566,7 @@ inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
case Tegra::Texture::SamplerReduction::Max:
return GL_MAX;
}
- UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
+ ASSERT_MSG(false, "Invalid reduction filter={}", static_cast<int>(filter));
return GL_WEIGHTED_AVERAGE_ARB;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f81c1b233..8bd5eba7e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -1,11 +1,9 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
#include <memory>
#include <glad/glad.h>
@@ -15,11 +13,9 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/telemetry.h"
-#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
-#include "core/perf_stats.h"
#include "core/telemetry_session.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
@@ -82,7 +78,7 @@ const char* GetSource(GLenum source) {
case GL_DEBUG_SOURCE_OTHER:
return "OTHER";
default:
- UNREACHABLE();
+ ASSERT(false);
return "Unknown source";
}
}
@@ -104,7 +100,7 @@ const char* GetType(GLenum type) {
case GL_DEBUG_TYPE_MARKER:
return "MARKER";
default:
- UNREACHABLE();
+ ASSERT(false);
return "Unknown type";
}
}
@@ -135,7 +131,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
- emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{},
program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
@@ -211,6 +207,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
+ framebuffer_width = framebuffer.width;
+ framebuffer_height = framebuffer.height;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
screen_info.was_accelerated =
@@ -326,12 +324,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
GLint internal_format;
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
@@ -467,8 +465,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
- if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) {
- if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
+ if (framebuffer_transform_flags != Service::android::BufferTransformFlags::Unset) {
+ if (framebuffer_transform_flags == Service::android::BufferTransformFlags::FlipV) {
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
@@ -480,12 +478,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
}
}
- ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
+ f32 left_start{};
+ if (framebuffer_crop_rect.Top() > 0) {
+ left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
+ static_cast<f32>(framebuffer_crop_rect.Bottom());
+ }
+ f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
+ f32 scale_v =
+ static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
- f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.texture.width);
@@ -502,10 +506,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
const auto& screen = layout.screen;
const std::array vertices = {
- ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
- ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
- ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
- ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
+ ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u,
+ left_start + left * scale_v),
+ ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u,
+ left_start + left * scale_v),
+ ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u,
+ left_start + right * scale_v),
+ ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u,
+ left_start + right * scale_v),
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cda333cad..1a32e739d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,10 +7,12 @@
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
+
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -44,7 +45,7 @@ struct TextureInfo {
GLsizei height;
GLenum gl_format;
GLenum gl_type;
- Tegra::FramebufferConfig::PixelFormat pixel_format;
+ Service::android::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
@@ -133,8 +134,10 @@ private:
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
- Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
+ Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
+ u32 framebuffer_width;
+ u32 framebuffer_height;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 897c380b3..404def62e 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <string_view>
@@ -13,6 +12,7 @@
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -50,7 +50,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
- copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
+ convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -248,6 +249,26 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
program_manager.RestoreGuestCompute();
}
+void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_DESTINATION = 0;
+ static constexpr GLuint LOC_SIZE = 0;
+
+ program_manager.BindComputeProgram(convert_s8d24_program.handle);
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
+ glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
+ glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
+ Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
@@ -261,7 +282,7 @@ GLenum StoreFormat(u32 bytes_per_block) {
case 16:
return GL_RGBA32UI;
}
- UNREACHABLE();
+ ASSERT(false);
return GL_R8UI;
}
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 5de95ea7a..44efb6ecf 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -39,6 +38,8 @@ public:
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
+ void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -49,6 +50,7 @@ private:
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
+ OGLProgram convert_s8d24_program;
};
GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 2c3914459..3f2b139e0 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@@ -9,6 +8,7 @@
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
+#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
@@ -349,7 +349,7 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) {
}
} // Anonymous namespace
-BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
+BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
@@ -366,16 +366,14 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+ blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
+ convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
- nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
- if (device.IsExtShaderStencilExportSupported()) {
- blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
- }
-}
+ nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
BlitImageHelper::~BlitImageHelper() = default;
@@ -474,6 +472,13 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
+ ImageView& src_image_view) {
+ ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
+ convert_s8d24_to_abgr8_frag);
+ ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 85e7dca5b..5df679fb4 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -1,11 +1,8 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include <compare>
-
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/texture_cache/types.h"
@@ -19,7 +16,7 @@ class Device;
class Framebuffer;
class ImageView;
class StateTracker;
-class VKScheduler;
+class Scheduler;
struct BlitImagePipelineKey {
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
@@ -30,7 +27,7 @@ struct BlitImagePipelineKey {
class BlitImageHelper {
public:
- explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
+ explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
@@ -56,6 +53,8 @@ public:
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+ void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -83,7 +82,7 @@ private:
vk::ShaderModule& module);
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StateTracker& state_tracker;
vk::DescriptorSetLayout one_texture_set_layout;
@@ -99,6 +98,7 @@ private:
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
+ vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@@ -112,6 +112,7 @@ private:
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
+ vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index d70153df3..733b454de 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -1,12 +1,8 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstring>
-#include <tuple>
-
-#include <boost/functional/hash.hpp>
#include "common/bit_cast.h"
#include "common/cityhash.h"
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index c9be37935..9d60756e5 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 751e4792b..e7104d377 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <iterator>
@@ -26,7 +25,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter) {
case Tegra::Texture::TextureFilter::Linear:
return VK_FILTER_LINEAR;
}
- UNREACHABLE_MSG("Invalid sampler filter={}", filter);
+ ASSERT_MSG(false, "Invalid sampler filter={}", filter);
return {};
}
@@ -43,7 +42,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter
case Tegra::Texture::TextureMipmapFilter::Linear:
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter);
+ ASSERT_MSG(false, "Invalid sampler mipmap mode={}", mipmap_filter);
return {};
}
@@ -71,7 +70,7 @@ VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wra
case Tegra::Texture::TextureFilter::Linear:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
}
- UNREACHABLE();
+ ASSERT(false);
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
@@ -127,6 +126,7 @@ struct FormatTuple {
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
+ {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled)
{VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
{VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM
{VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT
@@ -172,7 +172,7 @@ struct FormatTuple {
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
{VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT
{VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT
- {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT
+ {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16X16_FLOAT
{VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
{VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
@@ -184,6 +184,7 @@ struct FormatTuple {
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
{VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
+ {VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
@@ -194,6 +195,9 @@ struct FormatTuple {
{VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
+ {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM
+ {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM
+ {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
@@ -315,193 +319,204 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
}
}
-VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
- switch (type) {
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UNORM;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UNORM;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- default:
+VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
+ Maxwell::VertexAttribute::Size size) {
+ const VkFormat format{([&]() {
+ switch (type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::SignedNorm:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SNORM;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SNORM;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SNORM;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SNORM;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
- default:
+ case Maxwell::VertexAttribute::Type::SignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_SNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_USCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_USCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_USCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_USCALED;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_USCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_USCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_USCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_USCALED;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
- default:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_USCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
- default:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::UnsignedInt:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UINT;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UINT;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UINT;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UINT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UINT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UINT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UINT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_UINT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_UINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_UINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_UINT;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UINT_PACK32;
- default:
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UINT;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_UINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_UINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_UINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_UINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UINT_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::SignedInt:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SINT;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SINT;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SINT;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SINT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SINT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SINT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SINT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_SINT;
- case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_SINT_PACK32;
- default:
+ case Maxwell::VertexAttribute::Type::SignedInt:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_SINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_SINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_SINT;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_SINT;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SINT_PACK32;
+ default:
+ break;
+ }
break;
- }
- break;
- case Maxwell::VertexAttribute::Type::Float:
- switch (size) {
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_SFLOAT;
- default:
+ case Maxwell::VertexAttribute::Type::Float:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_11_11_10:
+ return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
+ default:
+ break;
+ }
break;
}
- break;
+ return VK_FORMAT_UNDEFINED;
+ })()};
+
+ if (format == VK_FORMAT_UNDEFINED) {
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size);
}
- UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size);
- return {};
+
+ return device.GetSupportedFormat(format, VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT,
+ FormatType::Buffer);
}
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
@@ -741,7 +756,7 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
case Maxwell::ViewportSwizzle::NegativeW:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
}
- UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
+ ASSERT_MSG(false, "Invalid swizzle={}", swizzle);
return {};
}
@@ -754,7 +769,7 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
case Tegra::Texture::SamplerReduction::Max:
return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
}
- UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
+ ASSERT_MSG(false, "Invalid sampler mode={}", static_cast<int>(reduction));
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
}
@@ -777,7 +792,7 @@ VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
case Tegra::Texture::MsaaMode::Msaa4x4:
return VK_SAMPLE_COUNT_16_BIT;
default:
- UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+ ASSERT_MSG(false, "Invalid msaa_mode={}", static_cast<int>(msaa_mode));
return VK_SAMPLE_COUNT_1_BIT;
}
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 8a9616039..356d46292 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -1,10 +1,8 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include "common/common_types.h"
#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
@@ -50,7 +48,8 @@ VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
-VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
+VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
+ Maxwell::VertexAttribute::Size size);
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 11c160570..b24f3424a 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,7 +7,6 @@
#include <boost/container/small_vector.hpp>
-#include "common/assert.h"
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
@@ -16,7 +14,6 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/types.h"
-#include "video_core/textures/texture.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
@@ -171,7 +168,7 @@ private:
};
inline void PushImageDescriptors(TextureCache& texture_cache,
- VKUpdateDescriptorQueue& update_descriptor_queue,
+ UpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
diff --git a/src/video_core/renderer_vulkan/pipeline_statistics.cpp b/src/video_core/renderer_vulkan/pipeline_statistics.cpp
index bfec931a6..fcd160a32 100644
--- a/src/video_core/renderer_vulkan/pipeline_statistics.cpp
+++ b/src/video_core/renderer_vulkan/pipeline_statistics.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
@@ -57,7 +56,7 @@ void PipelineStatistics::Collect(VkPipeline pipeline) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
collected_stats.push_back(stage_stats);
}
}
@@ -66,7 +65,7 @@ void PipelineStatistics::Report() const {
double num{};
Stats total;
{
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
diff --git a/src/video_core/renderer_vulkan/pipeline_statistics.h b/src/video_core/renderer_vulkan/pipeline_statistics.h
index b61840107..197bb0936 100644
--- a/src/video_core/renderer_vulkan/pipeline_statistics.h
+++ b/src/video_core/renderer_vulkan/pipeline_statistics.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 74822814d..d8131232a 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -13,16 +12,15 @@
#include <fmt/format.h>
#include "common/logging/log.h"
+#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/telemetry.h"
-#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -104,13 +102,13 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
- state_tracker(gpu), scheduler(device, state_tracker),
+ state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
- rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
- memory_allocator, state_tracker, scheduler) {
+ rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
+ state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
@@ -129,6 +127,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!render_window.IsShown()) {
return;
}
+ // Update screen info if the framebuffer size has changed.
+ if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
+ screen_info.width = framebuffer->width;
+ screen_info.height = framebuffer->height;
+ }
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
@@ -139,7 +142,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const auto recreate_swapchain = [&] {
if (!has_been_recreated) {
has_been_recreated = true;
- scheduler.WaitWorker();
+ scheduler.Finish();
}
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
swapchain.Create(layout.width, layout.height, is_srgb);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 6dc985109..e7bfecb20 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -1,12 +1,10 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <string>
-#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
@@ -67,14 +65,14 @@ private:
vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
- VKScreenInfo screen_info;
+ ScreenInfo screen_info;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
- VKScheduler scheduler;
- VKSwapchain swapchain;
- VKBlitScreen blit_screen;
+ Scheduler scheduler;
+ Swapchain swapchain;
+ BlitScreen blit_screen;
RasterizerVulkan rasterizer;
};
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index c71a1f44d..cb7fa2078 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -1,12 +1,10 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
-#include <tuple>
#include <vector>
#include "common/assert.h"
@@ -28,7 +26,6 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
-#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -96,10 +93,12 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
+ case Service::android::PixelFormat::Bgra8888:
+ return VK_FORMAT_B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
@@ -109,7 +108,7 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
} // Anonymous namespace
-struct VKBlitScreen::BufferData {
+struct BlitScreen::BufferData {
struct {
std::array<f32, 4 * 4> modelview_matrix;
} uniform;
@@ -119,10 +118,9 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here
};
-VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
- Core::Frontend::EmuWindow& render_window_, const Device& device_,
- MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
- VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
+BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
+ const Device& device_, MemoryAllocator& memory_allocator_,
+ Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
@@ -132,21 +130,26 @@ VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
CreateDynamicResources();
}
-VKBlitScreen::~VKBlitScreen() = default;
+BlitScreen::~BlitScreen() = default;
-void VKBlitScreen::Recreate() {
+void BlitScreen::Recreate() {
CreateDynamicResources();
}
-VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
- const VkFramebuffer& host_framebuffer,
- const Layout::FramebufferLayout layout, VkExtent2D render_area,
- bool use_accelerated) {
+VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
+ const VkFramebuffer& host_framebuffer,
+ const Layout::FramebufferLayout layout, VkExtent2D render_area,
+ bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
+ if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
+ image_count = swapchain_images;
+ Recreate();
+ }
+
const std::size_t image_index = swapchain.GetImageIndex();
scheduler.Wait(resource_ticks[image_index]);
@@ -171,11 +174,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
- const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
+ const u64 linear_size{GetSizeInBytes(framebuffer)};
+ const u64 tiled_size{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
framebuffer.stride, framebuffer.height,
1, block_height_log2, 0)};
Tegra::Texture::UnswizzleTexture(
- mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
+ mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
@@ -420,20 +424,20 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
return *semaphores[image_index];
}
-VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated) {
+VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
}
-vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
+vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
return CreateFramebuffer(image_view, extent, renderpass);
}
-vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
- vk::RenderPass& rd) {
+vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
+ vk::RenderPass& rd) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -447,17 +451,17 @@ vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, V
});
}
-void VKBlitScreen::CreateStaticResources() {
+void BlitScreen::CreateStaticResources() {
CreateShaders();
+ CreateSampler();
+}
+
+void BlitScreen::CreateDynamicResources() {
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
- CreateSampler();
-}
-
-void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
@@ -467,7 +471,7 @@ void VKBlitScreen::CreateDynamicResources() {
}
}
-void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
+void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
if (!fsr) {
CreateFSR();
@@ -487,7 +491,7 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
CreateRawImages(framebuffer);
}
-void VKBlitScreen::CreateShaders() {
+void BlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
@@ -501,12 +505,12 @@ void VKBlitScreen::CreateShaders() {
}
}
-void VKBlitScreen::CreateSemaphores() {
+void BlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
-void VKBlitScreen::CreateDescriptorPool() {
+void BlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
@@ -546,11 +550,11 @@ void VKBlitScreen::CreateDescriptorPool() {
aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
}
-void VKBlitScreen::CreateRenderPass() {
+void BlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
}
-vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
+vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = format,
@@ -606,7 +610,7 @@ vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_prese
return device.GetLogical().CreateRenderPass(renderpass_ci);
}
-void VKBlitScreen::CreateDescriptorSetLayout() {
+void BlitScreen::CreateDescriptorSetLayout() {
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
{
.binding = 0,
@@ -661,7 +665,7 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
}
-void VKBlitScreen::CreateDescriptorSets() {
+void BlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
@@ -685,7 +689,7 @@ void VKBlitScreen::CreateDescriptorSets() {
aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
}
-void VKBlitScreen::CreatePipelineLayout() {
+void BlitScreen::CreatePipelineLayout() {
const VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -708,7 +712,7 @@ void VKBlitScreen::CreatePipelineLayout() {
aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
}
-void VKBlitScreen::CreateGraphicsPipeline() {
+void BlitScreen::CreateGraphicsPipeline() {
const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@@ -981,7 +985,7 @@ void VKBlitScreen::CreateGraphicsPipeline() {
scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
}
-void VKBlitScreen::CreateSampler() {
+void BlitScreen::CreateSampler() {
const VkSamplerCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
@@ -1028,7 +1032,7 @@ void VKBlitScreen::CreateSampler() {
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
-void VKBlitScreen::CreateFramebuffers() {
+void BlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
@@ -1038,7 +1042,7 @@ void VKBlitScreen::CreateFramebuffers() {
}
}
-void VKBlitScreen::ReleaseRawImages() {
+void BlitScreen::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
}
@@ -1053,7 +1057,7 @@ void VKBlitScreen::ReleaseRawImages() {
buffer_commit = MemoryCommit{};
}
-void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
+void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
const VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -1070,7 +1074,7 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
}
-void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
+void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
@@ -1295,8 +1299,8 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
-void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
- bool nn) const {
+void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
+ bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
@@ -1332,8 +1336,8 @@ void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView im
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
-void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
- bool nn) const {
+void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
+ bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -1375,13 +1379,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
}
-void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
+void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
data.uniform.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
}
-void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
- const Layout::FramebufferLayout layout) const {
+void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
+ const Layout::FramebufferLayout layout) const {
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
@@ -1390,9 +1394,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
auto right = texcoords.right;
switch (framebuffer_transform_flags) {
- case Tegra::FramebufferConfig::TransformFlags::Unset:
+ case Service::android::BufferTransformFlags::Unset:
break;
- case Tegra::FramebufferConfig::TransformFlags::FlipV:
+ case Service::android::BufferTransformFlags::FlipV:
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
@@ -1403,11 +1407,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
break;
}
- UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
- f32 scale_u = 1.0f;
- f32 scale_v = 1.0f;
+ f32 left_start{};
+ if (framebuffer_crop_rect.Top() > 0) {
+ left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
+ static_cast<f32>(framebuffer_crop_rect.Bottom());
+ }
+ f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
+ f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (!fsr) {
@@ -1426,13 +1434,16 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());
const auto h = static_cast<f32>(screen.GetHeight());
- data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v);
- data.vertices[1] = ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v);
- data.vertices[2] = ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v);
- data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
+ data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v);
+ data.vertices[1] =
+ ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v);
+ data.vertices[2] =
+ ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v);
+ data.vertices[3] =
+ ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v);
}
-void VKBlitScreen::CreateFSR() {
+void BlitScreen::CreateFSR() {
const auto& layout = render_window.GetFramebufferLayout();
const VkExtent2D fsr_size{
.width = layout.screen.GetWidth(),
@@ -1441,12 +1452,12 @@ void VKBlitScreen::CreateFSR() {
fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
}
-u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
+u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
-u64 VKBlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
- std::size_t image_index) const {
+u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
+ std::size_t image_index) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index bbca71af3..29e2ea925 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -36,23 +35,22 @@ struct ScreenInfo;
class Device;
class FSR;
class RasterizerVulkan;
-class VKScheduler;
-class VKSwapchain;
+class Scheduler;
+class Swapchain;
-struct VKScreenInfo {
+struct ScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
-class VKBlitScreen {
+class BlitScreen {
public:
- explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
- Core::Frontend::EmuWindow& render_window, const Device& device,
- MemoryAllocator& memory_manager, VKSwapchain& swapchain,
- VKScheduler& scheduler, const VKScreenInfo& screen_info);
- ~VKBlitScreen();
+ explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
+ const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
+ Scheduler& scheduler, const ScreenInfo& screen_info);
+ ~BlitScreen();
void Recreate();
@@ -109,10 +107,10 @@ private:
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
- VKSwapchain& swapchain;
- VKScheduler& scheduler;
- const std::size_t image_count;
- const VKScreenInfo& screen_info;
+ Swapchain& swapchain;
+ Scheduler& scheduler;
+ std::size_t image_count;
+ const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fxaa_vertex_shader;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5ffd93499..558b8db56 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -47,7 +46,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
case VK_INDEX_TYPE_UINT32:
return 4;
default:
- UNREACHABLE_MSG("Invalid index type={}", index_type);
+ ASSERT_MSG(false, "Invalid index type={}", index_type);
return 1;
}
}
@@ -125,8 +124,8 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
- VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+ Scheduler& scheduler_, StagingBufferPool& staging_pool_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
@@ -141,6 +140,18 @@ StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
+u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
+ return device.GetDeviceLocalMemory();
+}
+
+u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
+ return device.GetDeviceMemoryUsage();
+}
+
+bool BufferCacheRuntime::CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+}
+
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
@@ -355,7 +366,7 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
break;
default:
- UNREACHABLE();
+ ASSERT(false);
break;
}
staging_data += quad_size;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 1ee0d8420..a15c8b39b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -17,7 +16,7 @@ namespace Vulkan {
class Device;
class DescriptorPool;
-class VKScheduler;
+class Scheduler;
class BufferCacheRuntime;
@@ -59,12 +58,18 @@ class BufferCacheRuntime {
public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
- VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+ Scheduler& scheduler_, StagingBufferPool& staging_pool_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool);
void Finish();
+ u64 GetDeviceLocalMemory() const;
+
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const;
+
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
@@ -119,9 +124,9 @@ private:
const Device& device;
MemoryAllocator& memory_allocator;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StagingBufferPool& staging_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
vk::Buffer quad_array_lut;
MemoryCommit quad_array_lut_commit;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index d8e92ac0e..2f09de1c1 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstddef>
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
index 61c26a22a..ec1647f01 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.h
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..241d7573e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -1,13 +1,11 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
-#include <cstring>
+#include <array>
#include <memory>
#include <optional>
#include <utility>
-#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
@@ -22,15 +20,12 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
-#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-using Tegra::Texture::SWIZZLE_TABLE;
-
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
@@ -203,9 +198,9 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
ComputePass::~ComputePass() = default;
-Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
- DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
+Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
+ StagingBufferPool& staging_buffer_pool_,
+ UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
VULKAN_UINT8_COMP_SPV),
@@ -244,10 +239,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
return {staging.buffer, staging.offset};
}
-QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
+QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
+ UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
@@ -268,7 +263,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
return 2;
}
- UNREACHABLE();
+ ASSERT(false);
return 2;
}();
const u32 input_size = num_vertices << index_shift;
@@ -292,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants{base_vertex, index_shift};
+ const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
@@ -306,10 +301,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
return {staging.buffer, staging.offset};
}
-ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
+ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
@@ -331,31 +326,32 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle();
const bool is_initialized = image.ExchangeInitialization();
- scheduler.Record(
- [vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) {
- const VkImageMemoryBarrier image_barrier{
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = is_initialized ? VK_ACCESS_SHADER_WRITE_BIT : VkAccessFlags{},
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = vk_image,
- .subresourceRange{
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = VK_REMAINING_MIP_LEVELS,
- .baseArrayLayer = 0,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- },
- };
- cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
- : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
- });
+ scheduler.Record([vk_pipeline, vk_image, aspect_mask,
+ is_initialized](vk::CommandBuffer cmdbuf) {
+ const VkImageMemoryBarrier image_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = static_cast<VkAccessFlags>(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT
+ : VK_ACCESS_NONE),
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = vk_image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
+ : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
+ });
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index c7b92cce0..dcc691a8e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -21,8 +20,8 @@ namespace Vulkan {
class Device;
class StagingBufferPool;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+class Scheduler;
+class UpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
@@ -49,9 +48,9 @@ private:
class Uint8Pass final : public ComputePass {
public:
- explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
+ explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_);
+ UpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
/// Assemble uint8 indices into an uint16 index buffer
@@ -60,17 +59,17 @@ public:
u32 src_offset);
private:
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public ComputePass {
public:
- explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
+ explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_);
+ UpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(
@@ -78,17 +77,17 @@ public:
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
private:
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
};
class ASTCDecoderPass final : public ComputePass {
public:
- explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
+ explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);
~ASTCDecoderPass();
@@ -96,9 +95,9 @@ public:
std::span<const VideoCommon::SwizzleParameters> swizzles);
private:
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
MemoryAllocator& memory_allocator;
};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index de36bcdb7..7906e11a8 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <vector>
@@ -26,7 +25,7 @@ using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
@@ -77,7 +76,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
- std::lock_guard lock{build_mutex};
+ std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
@@ -92,7 +91,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
}
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+ Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
update_descriptor_queue.Acquire();
@@ -127,8 +126,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
- const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
- const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 8c4b0a301..9879735fe 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -11,7 +10,6 @@
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
-#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -26,12 +24,12 @@ namespace Vulkan {
class Device;
class PipelineStatistics;
-class VKScheduler;
+class Scheduler;
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
+ UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
@@ -44,11 +42,11 @@ public:
ComputePipeline(const ComputePipeline&) = delete;
void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
- VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
+ Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
const Device& device;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
Shader::Info info;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index d87da2a34..c7196b64e 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <mutex>
@@ -122,7 +121,7 @@ vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
-DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
+DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
DescriptorPool::~DescriptorPool() = default;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 59466aac5..bd6696b07 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -15,7 +14,7 @@
namespace Vulkan {
class Device;
-class VKScheduler;
+class Scheduler;
struct DescriptorBank;
@@ -63,7 +62,7 @@ private:
class DescriptorPool {
public:
- explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
+ explicit DescriptorPool(const Device& device, Scheduler& scheduler);
~DescriptorPool();
DescriptorPool& operator=(const DescriptorPool&) = delete;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 3bec48d14..0214b103a 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
@@ -9,15 +8,11 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-InnerFence::InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_)
- : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
-
-InnerFence::InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
- : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
+InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
+ : FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@@ -44,30 +39,25 @@ void InnerFence::Wait() {
scheduler.Wait(wait_tick);
}
-VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- TextureCache& texture_cache_, BufferCache& buffer_cache_,
- VKQueryCache& query_cache_, const Device& device_,
- VKScheduler& scheduler_)
+FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ QueryCache& query_cache_, const Device& device_, Scheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
-Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
-}
-
-Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
+Fence FenceManager::CreateFence(bool is_stubbed) {
+ return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
-void VKFenceManager::QueueFence(Fence& fence) {
+void FenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
-bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
+bool FenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
-void VKFenceManager::WaitFence(Fence& fence) {
+void FenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 2f8322d29..7fe2afcd9 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,7 +8,6 @@
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
@@ -22,13 +20,12 @@ class RasterizerInterface;
namespace Vulkan {
class Device;
-class VKQueryCache;
-class VKScheduler;
+class QueryCache;
+class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
- explicit InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_);
- explicit InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
+ explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
~InnerFence();
void Queue();
@@ -38,30 +35,27 @@ public:
void Wait();
private:
- VKScheduler& scheduler;
+ Scheduler& scheduler;
u64 wait_tick = 0;
};
using Fence = std::shared_ptr<InnerFence>;
-using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
-class VKFenceManager final : public GenericFenceManager {
+class FenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
- TextureCache& texture_cache, BufferCache& buffer_cache,
- VKQueryCache& query_cache, const Device& device,
- VKScheduler& scheduler);
+ explicit FenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected:
- Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
- VKScheduler& scheduler;
+ Scheduler& scheduler;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index b630090e8..dd450169e 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cmath>
#include "common/bit_cast.h"
@@ -173,7 +172,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
CreatePipeline();
}
-VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
UpdateDescriptorSet(image_index, image_view);
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 6bbec3d36..5d872861f 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -11,13 +10,13 @@
namespace Vulkan {
class Device;
-class VKScheduler;
+class Scheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
- VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+ VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
private:
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d514b71d0..f47786f48 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <span>
@@ -216,15 +215,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
} // Anonymous namespace
GraphicsPipeline::GraphicsPipeline(
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
+ Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
+ UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
- : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
- texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
+ : key{key_}, device{device_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, scheduler{scheduler_},
update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
@@ -258,7 +256,7 @@ GraphicsPipeline::GraphicsPipeline(
pipeline_statistics->Collect(*pipeline);
}
- std::lock_guard lock{build_mutex};
+ std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
@@ -289,7 +287,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
- const auto& regs{maxwell3d.regs};
+ const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
const Shader::Info& info{stage_infos[stage]};
@@ -303,7 +301,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
++ssbo_index;
}
}
- const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(cbufs[desc.cbuf_index].enabled);
const u32 index_offset{index << desc.size_shift};
@@ -316,13 +314,14 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
second_offset};
- const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
- const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left};
+ const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr)
+ << desc.secondary_shift_left};
const u32 raw{lhs_raw | rhs_raw};
return TexturePair(raw, via_header_index);
}
}
- return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
@@ -560,7 +559,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
vertex_attributes.push_back({
.location = static_cast<u32>(index),
.binding = attribute.buffer,
- .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+ .format = MaxwellToVK::VertexFormat(device, attribute.Type(), attribute.Size()),
.offset = attribute.offset,
});
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a0c1d8f07..85602592b 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -63,22 +62,23 @@ class Device;
class PipelineStatistics;
class RenderPassCache;
class RescalingPushConstant;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+class Scheduler;
+class UpdateDescriptorQueue;
class GraphicsPipeline {
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
public:
- explicit GraphicsPipeline(
- Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
- VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
- VideoCore::ShaderNotify* shader_notify, const Device& device,
- DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
- Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics,
- RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key,
- std::array<vk::ShaderModule, NUM_STAGES> stages,
- const std::array<const Shader::Info*, NUM_STAGES>& infos);
+ explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
+ TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
+ const Device& device, DescriptorPool& descriptor_pool,
+ UpdateDescriptorQueue& update_descriptor_queue,
+ Common::ThreadWorker* worker_thread,
+ PipelineStatistics* pipeline_statistics,
+ RenderPassCache& render_pass_cache,
+ const GraphicsPipelineCacheKey& key,
+ std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -110,6 +110,11 @@ public:
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
}
+ void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
+ maxwell3d = maxwell3d_;
+ gpu_memory = gpu_memory_;
+ }
+
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
@@ -121,13 +126,13 @@ private:
void Validate();
const GraphicsPipelineCacheKey key;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D* maxwell3d;
+ Tegra::MemoryManager* gpu_memory;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
- VKScheduler& scheduler;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ Scheduler& scheduler;
+ UpdateDescriptorQueue& update_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool){};
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 6852c11b0..4e81d3d28 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 9be9c9bed..362ed579a 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a633b73e5..732e7b6f2 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstddef>
@@ -16,13 +15,11 @@
#include "common/microprofile.h"
#include "common/thread_worker.h"
#include "core/core.h"
-#include "core/memory.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate_program.h"
#include "shader_recompiler/program_header.h"
-#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -56,7 +53,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
-constexpr u32 CACHE_VERSION = 5;
+constexpr u32 CACHE_VERSION = 6;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -177,7 +174,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
case Maxwell::TessellationPrimitive::Quads:
return Shader::TessPrimitive::Quads;
}
- UNREACHABLE();
+ ASSERT(false);
return Shader::TessPrimitive::Triangles;
}();
info.tess_spacing = [&] {
@@ -190,7 +187,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
case Maxwell::TessellationSpacing::FractionalEven:
return Shader::TessSpacing::FractionalEven;
}
- UNREACHABLE();
+ ASSERT(false);
return Shader::TessSpacing::Equal;
}();
break;
@@ -262,20 +259,18 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
return std::memcmp(&rhs, this, Size()) == 0;
}
-PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
+ Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
+ UpdateDescriptorQueue& update_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
- : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
- device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
- update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
- buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+ : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
+ descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
+ texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
- workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
- serialization_thread(1, "yuzu:PipelineSerialization") {
+ workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
+ serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const VkDriverIdKHR driver_id{device.GetDriverID()};
profile = Shader::Profile{
@@ -340,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
- graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+ graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
device.IsExtVertexInputDynamicStateSupported());
if (current_pipeline) {
@@ -360,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const ComputePipelineCacheKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@@ -406,7 +401,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
ShaderPools pools;
auto pipeline{CreateComputePipeline(pools, key, env, state.statistics.get(), false)};
- std::lock_guard lock{state.mutex};
+ std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
}
@@ -436,8 +431,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs),
state.statistics.get(), false)};
- std::lock_guard lock{state.mutex};
- graphics_cache.emplace(key, std::move(pipeline));
+ std::scoped_lock lock{state.mutex};
+ if (pipeline) {
+ graphics_cache.emplace(key, std::move(pipeline));
+ }
++state.built;
if (state.has_loaded) {
callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
@@ -455,7 +452,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
state.has_loaded = true;
lock.unlock();
- workers.WaitForRequests();
+ workers.WaitForRequests(stop_loading);
if (state.statistics) {
state.statistics->Report();
@@ -487,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
- if (maxwell3d.regs.zeta_enable) {
+ if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
- if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@@ -558,10 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
previous_stage = &program;
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
- return std::make_unique<GraphicsPipeline>(
- maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
- descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
- std::move(modules), infos);
+ return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache,
+ &shader_notify, device, descriptor_pool,
+ update_descriptor_queue, thread_worker, statistics,
+ render_pass_cache, key, std::move(modules), infos);
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
@@ -593,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 4c135b5dd..61f9e9366 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -1,17 +1,14 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <filesystem>
-#include <iosfwd>
#include <memory>
#include <type_traits>
#include <unordered_map>
-#include <utility>
#include <vector>
#include "common/common_types.h"
@@ -29,7 +26,6 @@
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
@@ -85,8 +81,8 @@ class Device;
class PipelineStatistics;
class RasterizerVulkan;
class RenderPassCache;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+class Scheduler;
+class UpdateDescriptorQueue;
using VideoCommon::ShaderInfo;
@@ -104,11 +100,9 @@ struct ShaderPools {
class PipelineCache : public VideoCommon::ShaderCache {
public:
- explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- VKScheduler& scheduler, DescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
+ explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
+ DescriptorPool& descriptor_pool,
+ UpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
~PipelineCache();
@@ -142,9 +136,9 @@ private:
bool build_in_parallel);
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
DescriptorPool& descriptor_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
+ UpdateDescriptorQueue& update_descriptor_queue;
RenderPassCache& render_pass_cache;
BufferCache& buffer_cache;
TextureCache& texture_cache;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 259cba156..7cb02631c 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstddef>
@@ -27,7 +26,7 @@ constexpr VkQueryType GetTarget(QueryType type) {
} // Anonymous namespace
-QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_)
+QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_)
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
@@ -66,15 +65,14 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- const Device& device_, VKScheduler& scheduler_)
- : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
+QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ Scheduler& scheduler_)
+ : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
-VKQueryCache::~VKQueryCache() {
+QueryCache::~QueryCache() {
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
// destructor is called. The query cache should be redesigned to have a proper ownership model
// instead of using shared pointers.
@@ -85,15 +83,15 @@ VKQueryCache::~VKQueryCache() {
}
}
-std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
+std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit();
}
-void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
+void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
-HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 7190946b9..26762ee09 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -23,14 +22,14 @@ namespace Vulkan {
class CachedQuery;
class Device;
class HostCounter;
-class VKQueryCache;
-class VKScheduler;
+class QueryCache;
+class Scheduler;
-using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryPool final : public ResourcePool {
public:
- explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type);
+ explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
std::pair<VkQueryPool, u32> Commit();
@@ -50,13 +49,12 @@ private:
std::vector<bool> usage;
};
-class VKQueryCache final
- : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- const Device& device_, VKScheduler& scheduler_);
- ~VKQueryCache();
+ explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ Scheduler& scheduler_);
+ ~QueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
@@ -66,19 +64,19 @@ public:
return device;
}
- VKScheduler& GetScheduler() const noexcept {
+ Scheduler& GetScheduler() const noexcept {
return scheduler;
}
private:
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
-class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
- explicit HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
@@ -87,7 +85,7 @@ public:
private:
u64 BlockingQuery() const override;
- VKQueryCache& cache;
+ QueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
const u64 tick;
@@ -95,7 +93,7 @@ private:
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
- explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
+ explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_} {}
};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2227d9197..acfd5da7d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -1,20 +1,17 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
-#include <vector>
-#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
-#include "core/core.h"
+#include "video_core/control/channel_state.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/blit_image.h"
@@ -73,10 +70,17 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
const float width = conv(src.scale_x * 2.0f);
float y = conv(src.translate_y - src.scale_y);
float height = conv(src.scale_y * 2.0f);
- if (regs.screen_y_control.y_negate) {
+ bool y_negate = regs.screen_y_control.y_negate;
+
+ if (!device.IsNvViewportSwizzleSupported()) {
+ y_negate = y_negate != (src.swizzle.y == Maxwell::ViewportSwizzle::NegativeY);
+ }
+
+ if (y_negate) {
y += height;
height = -height;
}
+
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
VkViewport viewport{
.x = x,
@@ -145,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& gpu_memory_,
- Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
+ Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
- StateTracker& state_tracker_, VKScheduler& scheduler_)
- : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
- gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
- screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
- state_tracker{state_tracker_}, scheduler{scheduler_},
+ StateTracker& state_tracker_, Scheduler& scheduler_)
+ : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
+ memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
@@ -162,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
memory_allocator, staging_pool,
blit_image, astc_decoder_pass,
render_pass_cache},
- texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
- buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
- descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
- texture_cache, gpu.ShaderNotify()),
- query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
+ buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+ pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
+ render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
+ query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -190,14 +190,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
return;
}
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ // update engine as channel may be different.
+ pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
BeginTransformFeedback();
UpdateDynamicStates();
- const auto& regs{maxwell3d.regs};
- const u32 num_instances{maxwell3d.mme_draw.instance_count};
+ const auto& regs{maxwell3d->regs};
+ const u32 num_instances{maxwell3d->mme_draw.instance_count};
const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
@@ -215,14 +217,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- if (!maxwell3d.ShouldExecute()) {
+ if (!maxwell3d->ShouldExecute()) {
return;
}
FlushWork();
query_cache.UpdateCounters();
- auto& regs = maxwell3d.regs;
+ auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -245,8 +247,15 @@ void RasterizerVulkan::Clear() {
}
UpdateViewportsState(regs);
+ VkRect2D default_scissor;
+ default_scissor.offset.x = 0;
+ default_scissor.offset.y = 0;
+ default_scissor.extent.width = std::numeric_limits<s32>::max();
+ default_scissor.extent.height = std::numeric_limits<s32>::max();
+
VkClearRect clear_rect{
- .rect = GetScissorState(regs, 0, up_scale, down_shift),
+ .rect = regs.clear_flags.scissor ? GetScissorState(regs, 0, up_scale, down_shift)
+ : default_scissor,
.baseArrayLayer = regs.clear_buffers.layer,
.layerCount = 1,
};
@@ -336,9 +345,9 @@ void RasterizerVulkan::DispatchCompute() {
return;
}
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
- pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
+ pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@@ -419,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
}
}
-void RasterizerVulkan::SyncGuestHost() {
+void RasterizerVulkan::InvalidateGPUCache() {
pipeline_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
@@ -439,40 +448,30 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
pipeline_cache.OnCPUWrite(addr, size);
}
-void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
- texture_cache.UnmapGPUMemory(addr, size);
+ texture_cache.UnmapGPUMemory(as_id, addr, size);
}
}
-void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- if (!gpu.IsAsync()) {
- gpu_memory.Write<u32>(addr, value);
- return;
- }
- fence_manager.SignalSemaphore(addr, value);
+void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
+ fence_manager.SignalFence(std::move(func));
+}
+
+void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
+ fence_manager.SyncOperation(std::move(func));
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
- if (!gpu.IsAsync()) {
- gpu.IncrementSyncPoint(value);
- return;
- }
fence_manager.SignalSyncPoint(value);
}
void RasterizerVulkan::SignalReference() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.SignalOrdering();
}
void RasterizerVulkan::ReleaseFences() {
- if (!gpu.IsAsync()) {
- return;
- }
fence_manager.WaitPendingFences();
}
@@ -549,13 +548,13 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
}
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
- std::span<u8> memory) {
- auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ std::span<const u8> memory) {
+ auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
- gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
- gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -624,7 +623,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = maxwell3d.regs;
+ auto& regs = maxwell3d->regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -648,7 +647,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -664,7 +663,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -788,8 +787,8 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
});
} else {
// Front face defines both faces
- scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
- test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([ref = regs.stencil_front_func_ref, write_mask = regs.stencil_front_mask,
+ test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
@@ -914,7 +913,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
}
void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
- auto& dirty{maxwell3d.dirty.flags};
+ auto& dirty{maxwell3d->dirty.flags};
if (!dirty[Dirty::VertexInput]) {
return;
}
@@ -943,7 +942,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
.pNext = nullptr,
.location = static_cast<u32>(index),
.binding = binding,
- .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+ .format = MaxwellToVK::VertexFormat(device, attribute.type, attribute.size),
.offset = attribute.offset,
});
}
@@ -971,4 +970,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
});
}
+void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
+ CreateChannel(channel);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.CreateChannel(channel);
+ buffer_cache.CreateChannel(channel);
+ }
+ pipeline_cache.CreateChannel(channel);
+ query_cache.CreateChannel(channel);
+ state_tracker.SetupTables(channel);
+}
+
+void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
+ const s32 channel_id = channel.bind_id;
+ BindToChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.BindToChannel(channel_id);
+ buffer_cache.BindToChannel(channel_id);
+ }
+ pipeline_cache.BindToChannel(channel_id);
+ query_cache.BindToChannel(channel_id);
+ state_tracker.ChangeChannel(channel);
+ state_tracker.InvalidateState();
+}
+
+void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
+ EraseChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.EraseChannel(channel_id);
+ buffer_cache.EraseChannel(channel_id);
+ }
+ pipeline_cache.EraseChannel(channel_id);
+ query_cache.EraseChannel(channel_id);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 5af2e275b..4cde3c983 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -1,30 +1,24 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
-#include <bitset>
-#include <memory>
-#include <utility>
-#include <vector>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
-#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -45,7 +39,7 @@ class Maxwell3D;
namespace Vulkan {
-struct VKScreenInfo;
+struct ScreenInfo;
class StateTracker;
@@ -61,13 +55,13 @@ private:
BufferCache& buffer_cache;
};
-class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
+ protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- VKScreenInfo& screen_info_, const Device& device_,
- MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
- VKScheduler& scheduler_);
+ Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
+ const Device& device_, MemoryAllocator& memory_allocator_,
+ StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -82,10 +76,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
- void SyncGuestHost() override;
+ void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
- void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
- void SignalSemaphore(GPUVAddr addr, u32 value) override;
+ void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
+ void SignalFence(std::function<void()>&& func) override;
+ void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
@@ -100,12 +95,18 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
- std::span<u8> memory) override;
+ std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+
+ void BindChannel(Tegra::Control::ChannelState& channel) override;
+
+ void ReleaseChannel(s32 channel_id) override;
+
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
@@ -141,19 +142,16 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- VKScreenInfo& screen_info;
+ ScreenInfo& screen_info;
const Device& device;
MemoryAllocator& memory_allocator;
StateTracker& state_tracker;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
StagingBufferPool staging_pool;
DescriptorPool descriptor_pool;
- VKUpdateDescriptorQueue update_descriptor_queue;
+ UpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
ASTCDecoderPass astc_decoder_pass;
RenderPassCache render_pass_cache;
@@ -163,9 +161,9 @@ private:
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
PipelineCache pipeline_cache;
- VKQueryCache query_cache;
+ QueryCache query_cache;
AccelerateDMA accelerate_dma;
- VKFenceManager fence_manager;
+ FenceManager fence_manager;
vk::Event wfi_event;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index 451ffe019..ae9f1de64 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map>
@@ -36,7 +35,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
- std::lock_guard lock{mutex};
+ std::scoped_lock lock{mutex};
const auto [pair, is_new] = cache.try_emplace(key);
if (!is_new) {
return *pair->second;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
index eaa0ed775..dc21b7e69 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index 2dd514968..6c8ac22f4 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index f0b80ad59..a39a3b881 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 7d9d4f7ba..d96720b80 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -1,10 +1,8 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include <mutex>
-#include <optional>
#include <thread>
#include <utility>
@@ -23,7 +21,7 @@ namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
-void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
+void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
@@ -37,7 +35,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
last = nullptr;
}
-VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
+Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_},
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
@@ -46,14 +44,14 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
-VKScheduler::~VKScheduler() = default;
+Scheduler::~Scheduler() = default;
-void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
-void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal_semaphore, wait_semaphore);
WaitWorker();
@@ -61,7 +59,7 @@ void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphor
AllocateNewContext();
}
-void VKScheduler::WaitWorker() {
+void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
@@ -69,19 +67,19 @@ void VKScheduler::WaitWorker() {
wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
-void VKScheduler::DispatchWork() {
+void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
{
- std::lock_guard lock{work_mutex};
+ std::scoped_lock lock{work_mutex};
work_queue.push(std::move(chunk));
}
work_cv.notify_one();
AcquireNewChunk();
}
-void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
+void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
@@ -116,11 +114,11 @@ void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
renderpass_image_ranges = framebuffer->ImageRanges();
}
-void VKScheduler::RequestOutsideRenderPassOperationContext() {
+void Scheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
-bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
+bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
return false;
}
@@ -128,7 +126,7 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
return true;
}
-bool VKScheduler::UpdateRescaling(bool is_rescaling) {
+bool Scheduler::UpdateRescaling(bool is_rescaling) {
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
return false;
}
@@ -137,8 +135,8 @@ bool VKScheduler::UpdateRescaling(bool is_rescaling) {
return true;
}
-void VKScheduler::WorkerThread(std::stop_token stop_token) {
- Common::SetCurrentThreadName("yuzu:VulkanWorker");
+void Scheduler::WorkerThread(std::stop_token stop_token) {
+ Common::SetCurrentThreadName("VulkanWorker");
do {
std::unique_ptr<CommandChunk> work;
{
@@ -158,12 +156,12 @@ void VKScheduler::WorkerThread(std::stop_token stop_token) {
if (has_submit) {
AllocateWorkerCommandBuffer();
}
- std::lock_guard reserve_lock{reserve_mutex};
+ std::scoped_lock reserve_lock{reserve_mutex};
chunk_reserve.push_back(std::move(work));
} while (!stop_token.stop_requested());
}
-void VKScheduler::AllocateWorkerCommandBuffer() {
+void Scheduler::AllocateWorkerCommandBuffer() {
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
@@ -173,7 +171,7 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
});
}
-void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
@@ -227,25 +225,25 @@ void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait
DispatchWork();
}
-void VKScheduler::AllocateNewContext() {
+void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
}
-void VKScheduler::InvalidateState() {
+void Scheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.rescaling_defined = false;
state_tracker.InvalidateCommandBufferState();
}
-void VKScheduler::EndPendingOperations() {
+void Scheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass();
}
-void VKScheduler::EndRenderPass() {
+void Scheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
@@ -282,8 +280,8 @@ void VKScheduler::EndRenderPass() {
num_renderpass_images = 0;
}
-void VKScheduler::AcquireNewChunk() {
- std::lock_guard lock{reserve_mutex};
+void Scheduler::AcquireNewChunk() {
+ std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index e69aa136b..c04aad08f 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -1,10 +1,8 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <memory>
@@ -24,14 +22,14 @@ class Device;
class Framebuffer;
class GraphicsPipeline;
class StateTracker;
-class VKQueryCache;
+class QueryCache;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
-class VKScheduler {
+class Scheduler {
public:
- explicit VKScheduler(const Device& device, StateTracker& state_tracker);
- ~VKScheduler();
+ explicit Scheduler(const Device& device, StateTracker& state_tracker);
+ ~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -63,7 +61,7 @@ public:
void InvalidateState();
/// Assigns the query cache.
- void SetQueryCache(VKQueryCache& query_cache_) {
+ void SetQueryCache(QueryCache& query_cache_) {
query_cache = &query_cache_;
}
@@ -214,7 +212,7 @@ private:
std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool;
- VKQueryCache* query_cache = nullptr;
+ QueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index aaad4f292..7a0a2b154 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -1,11 +1,8 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
-#include <memory>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index 9517cbe84..2f7c9f25c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 5d5329abf..7fb256953 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <utility>
@@ -27,20 +26,39 @@ using namespace Common::Literals;
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
-// Stream buffer size in bytes
-constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
-constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-bool IsStreamHeap(VkMemoryHeap heap) noexcept {
- return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
+static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept {
+ return staging_buffer_size < (heap.size * 2) / 3;
+}
+
+static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) {
+ const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT};
+
+ for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+ const auto& memory_type{props.memoryTypes[type_index]};
+
+ if ((memory_type.propertyFlags & flags) != flags) {
+ // Memory must be device local and host visible
+ continue;
+ }
+
+ const auto& heap{props.memoryHeaps[memory_type.heapIndex]};
+ if (heap.size >= 7168_MiB) {
+ // This is the right type of memory
+ return true;
+ }
+ }
+
+ return false;
}
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- VkMemoryPropertyFlags flags) noexcept {
+ VkMemoryPropertyFlags flags,
+ size_t staging_buffer_size) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible
@@ -51,7 +69,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
// Memory type doesn't have the flags we want
continue;
}
- if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
+ if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) {
// Memory heap is not suitable for streaming
continue;
}
@@ -62,17 +80,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- bool try_device_local) {
+ bool try_device_local, size_t staging_buffer_size) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+ type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
- type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
+ type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size);
if (type) {
return *type;
}
@@ -80,20 +98,32 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
-size_t Region(size_t iterator) noexcept {
- return iterator / REGION_SIZE;
+size_t Region(size_t iterator, size_t region_size) noexcept {
+ return iterator / region_size;
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
- VKScheduler& scheduler_)
+ Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+
+ const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties};
+ if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) {
+ // Possible on many integrated and newer discrete cards
+ staging_buffer_size = 1_GiB;
+ } else {
+ // Well-supported default size used by most Vulkan PC games
+ staging_buffer_size = 256_MiB;
+ }
+
+ region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS;
+
const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = STREAM_BUFFER_SIZE,
+ .size = staging_buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@@ -118,19 +148,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.image = nullptr,
.buffer = *stream_buffer,
};
- const auto memory_properties = device.GetPhysical().GetMemoryProperties();
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
- .memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
+ .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true,
+ staging_buffer_size),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
- stream_memory_info.memoryTypeIndex =
- FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
+ stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex(
+ memory_properties, requirements.memoryTypeBits, false, staging_buffer_size);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
@@ -138,7 +167,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
stream_buffer.BindMemory(*stream_memory, 0);
- stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+ stream_pointer = stream_memory.Map(0, staging_buffer_size);
}
StagingBufferPool::~StagingBufferPool() = default;
@@ -159,25 +188,25 @@ void StagingBufferPool::TickFrame() {
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
- if (AreRegionsActive(Region(free_iterator) + 1,
- std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
+ if (AreRegionsActive(Region(free_iterator, region_size) + 1,
+ std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
- std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
- current_tick);
+ std::fill(sync_ticks.begin() + Region(used_iterator, region_size),
+ sync_ticks.begin() + Region(iterator, region_size), current_tick);
used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size);
- if (iterator + size >= STREAM_BUFFER_SIZE) {
- std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
- current_tick);
+ if (iterator + size >= staging_buffer_size) {
+ std::fill(sync_ticks.begin() + Region(used_iterator, region_size),
+ sync_ticks.begin() + NUM_SYNCS, current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = size;
- if (AreRegionsActive(0, Region(size) + 1)) {
+ if (AreRegionsActive(0, Region(size, region_size) + 1)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
@@ -264,7 +293,7 @@ StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage
case MemoryUsage::Download:
return download_cache;
default:
- UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ ASSERT_MSG(false, "Invalid memory usage={}", usage);
return upload_cache;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 69f7618de..90c67177f 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -15,7 +14,7 @@
namespace Vulkan {
class Device;
-class VKScheduler;
+class Scheduler;
struct StagingBufferRef {
VkBuffer buffer;
@@ -28,7 +27,7 @@ public:
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
- VKScheduler& scheduler);
+ Scheduler& scheduler);
~StagingBufferPool();
StagingBufferRef Request(size_t size, MemoryUsage usage);
@@ -83,7 +82,7 @@ private:
const Device& device;
MemoryAllocator& memory_allocator;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
vk::Buffer stream_buffer;
vk::DeviceMemory stream_memory;
@@ -94,6 +93,9 @@ private:
size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{};
+ size_t staging_buffer_size = 0;
+ size_t region_size = 0;
+
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index c00913f55..f234e1a31 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -1,17 +1,15 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
-#include <iterator>
#include "common/common_types.h"
#include "core/core.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
@@ -176,9 +174,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
}
} // Anonymous namespace
-StateTracker::StateTracker(Tegra::GPU& gpu)
- : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
- auto& tables{gpu.Maxwell3D().dirty.tables};
+void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
+ auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -201,4 +198,15 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyVertexBindings(tables);
}
+void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
+ flags = &channel_state.maxwell_3d->dirty.flags;
+}
+
+void StateTracker::InvalidateState() {
+ flags->set();
+}
+
+StateTracker::StateTracker()
+ : flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 40a149832..2296dea60 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -8,10 +7,15 @@
#include <limits>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
namespace Vulkan {
namespace Dirty {
@@ -55,19 +59,19 @@ class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- explicit StateTracker(Tegra::GPU& gpu);
+ explicit StateTracker();
void InvalidateCommandBufferState() {
- flags |= invalidation_flags;
+ (*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
- flags[Dirty::Viewports] = true;
+ (*flags)[Dirty::Viewports] = true;
}
void InvalidateScissors() {
- flags[Dirty::Scissors] = true;
+ (*flags)[Dirty::Scissors] = true;
}
bool TouchViewports() {
@@ -141,16 +145,23 @@ public:
return has_changed;
}
+ void SetupTables(Tegra::Control::ChannelState& channel_state);
+
+ void ChangeChannel(Tegra::Control::ChannelState& channel_state);
+
+ void InvalidateState();
+
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
- const bool is_dirty = flags[id];
- flags[id] = new_value;
+ const bool is_dirty = (*flags)[id];
+ (*flags)[id] = new_value;
return is_dirty;
}
- Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 8972a6921..706d9ba74 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -1,17 +1,14 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <limits>
#include <vector>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
-#include "core/frontend/framebuffer_layout.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -36,12 +33,14 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats)
}
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
- // Mailbox doesn't lock the application like fifo (vsync), prefer it
+ // Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
+ // prefer it if vsync option is not selected
const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
- if (found_mailbox != modes.end()) {
+ if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
+ found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
return VK_PRESENT_MODE_MAILBOX_KHR;
}
- if (Settings::values.disable_fps_limit.GetValue()) {
+ if (!Settings::values.use_speed_limit.GetValue()) {
// FIFO present mode locks the framerate to the monitor's refresh rate,
// Find an alternative to surpass this limitation if FPS is unlocked.
const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
@@ -67,15 +66,15 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
- u32 width, u32 height, bool srgb)
+Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
+ u32 height, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
Create(width, height, srgb);
}
-VKSwapchain::~VKSwapchain() = default;
+Swapchain::~Swapchain() = default;
-void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
+void Swapchain::Create(u32 width, u32 height, bool srgb) {
is_outdated = false;
is_suboptimal = false;
@@ -96,7 +95,7 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
resource_ticks.resize(image_count);
}
-void VKSwapchain::AcquireNextImage() {
+void Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
@@ -117,7 +116,7 @@ void VKSwapchain::AcquireNextImage() {
resource_ticks[image_index] = scheduler.CurrentTick();
}
-void VKSwapchain::Present(VkSemaphore render_semaphore) {
+void Swapchain::Present(VkSemaphore render_semaphore) {
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
@@ -148,8 +147,8 @@ void VKSwapchain::Present(VkSemaphore render_semaphore) {
}
}
-void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
- u32 height, bool srgb) {
+void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
+ bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
@@ -158,8 +157,16 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
- if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
- requested_image_count = capabilities.maxImageCount;
+ // Ensure Tripple buffering if possible.
+ if (capabilities.maxImageCount > 0) {
+ if (requested_image_count > capabilities.maxImageCount) {
+ requested_image_count = capabilities.maxImageCount;
+ } else {
+ requested_image_count =
+ std::max(requested_image_count, std::min(3U, capabilities.maxImageCount));
+ }
+ } else {
+ requested_image_count = std::max(requested_image_count, 3U);
}
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
@@ -208,20 +215,20 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
extent = swapchain_ci.imageExtent;
current_srgb = srgb;
- current_fps_unlocked = Settings::values.disable_fps_limit.GetValue();
+ current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
}
-void VKSwapchain::CreateSemaphores() {
+void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
-void VKSwapchain::CreateImageViews() {
+void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
@@ -253,7 +260,7 @@ void VKSwapchain::CreateImageViews() {
}
}
-void VKSwapchain::Destroy() {
+void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
@@ -261,11 +268,11 @@ void VKSwapchain::Destroy() {
swapchain.reset();
}
-bool VKSwapchain::HasFpsUnlockChanged() const {
- return current_fps_unlocked != Settings::values.disable_fps_limit.GetValue();
+bool Swapchain::HasFpsUnlockChanged() const {
+ return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
}
-bool VKSwapchain::NeedsPresentModeUpdate() const {
+bool Swapchain::NeedsPresentModeUpdate() const {
// Mailbox present mode is the ideal for all scenarios. If it is not available,
// A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 61a6d959e..111b3902d 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -16,13 +15,13 @@ struct FramebufferLayout;
namespace Vulkan {
class Device;
-class VKScheduler;
+class Scheduler;
-class VKSwapchain {
+class Swapchain {
public:
- explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
- u32 width, u32 height, bool srgb);
- ~VKSwapchain();
+ explicit Swapchain(VkSurfaceKHR surface, const Device& device, Scheduler& scheduler, u32 width,
+ u32 height, bool srgb);
+ ~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool srgb);
@@ -95,7 +94,7 @@ private:
const VkSurfaceKHR surface;
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
vk::SwapchainKHR swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0ba56ff1e..305ad8aee 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -71,7 +70,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case ImageType::Buffer:
break;
}
- UNREACHABLE_MSG("Invalid image type={}", type);
+ ASSERT_MSG(false, "Invalid image type={}", type);
return {};
}
@@ -88,7 +87,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case 16:
return VK_SAMPLE_COUNT_16_BIT;
default:
- UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ ASSERT_MSG(false, "Invalid number of samples={}", num_samples);
return VK_SAMPLE_COUNT_1_BIT;
}
}
@@ -108,7 +107,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
break;
default:
- UNREACHABLE_MSG("Invalid surface type");
+ ASSERT_MSG(false, "Invalid surface type");
}
}
if (info.storage) {
@@ -180,7 +179,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCore::Surface::SurfaceType::DepthStencil:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
default:
- UNREACHABLE_MSG("Invalid surface type");
+ ASSERT_MSG(false, "Invalid surface type");
return VkImageAspectFlags{};
}
}
@@ -222,7 +221,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case SwizzleSource::OneInt:
return VK_COMPONENT_SWIZZLE_ONE;
}
- UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
+ ASSERT_MSG(false, "Invalid swizzle={}", swizzle);
return VK_COMPONENT_SWIZZLE_ZERO;
}
@@ -231,6 +230,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case Shader::TextureType::Color1D:
return VK_IMAGE_VIEW_TYPE_1D;
case Shader::TextureType::Color2D:
+ case Shader::TextureType::Color2DRect:
return VK_IMAGE_VIEW_TYPE_2D;
case Shader::TextureType::ColorCube:
return VK_IMAGE_VIEW_TYPE_CUBE;
@@ -243,10 +243,10 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case Shader::TextureType::ColorArrayCube:
return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
case Shader::TextureType::Buffer:
- UNREACHABLE_MSG("Texture buffers can't be image views");
+ ASSERT_MSG(false, "Texture buffers can't be image views");
return VK_IMAGE_VIEW_TYPE_1D;
}
- UNREACHABLE_MSG("Invalid image view type={}", type);
+ ASSERT_MSG(false, "Invalid image view type={}", type);
return VK_IMAGE_VIEW_TYPE_2D;
}
@@ -255,6 +255,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCommon::ImageViewType::e1D:
return VK_IMAGE_VIEW_TYPE_1D;
case VideoCommon::ImageViewType::e2D:
+ case VideoCommon::ImageViewType::Rect:
return VK_IMAGE_VIEW_TYPE_2D;
case VideoCommon::ImageViewType::Cube:
return VK_IMAGE_VIEW_TYPE_CUBE;
@@ -266,14 +267,11 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case VideoCommon::ImageViewType::CubeArray:
return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
- case VideoCommon::ImageViewType::Rect:
- UNIMPLEMENTED_MSG("Rect image view");
- return VK_IMAGE_VIEW_TYPE_2D;
case VideoCommon::ImageViewType::Buffer:
- UNREACHABLE_MSG("Texture buffers can't be image views");
+ ASSERT_MSG(false, "Texture buffers can't be image views");
return VK_IMAGE_VIEW_TYPE_1D;
}
- UNREACHABLE_MSG("Invalid image view type={}", type);
+ ASSERT_MSG(false, "Invalid image view type={}", type);
return VK_IMAGE_VIEW_TYPE_2D;
}
@@ -438,6 +436,32 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
+[[nodiscard]] SwizzleSource SwapGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::R:
+ return SwizzleSource::G;
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
+ }
+}
+
+[[nodiscard]] SwizzleSource SwapSpecial(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::A:
+ return SwizzleSource::R;
+ case SwizzleSource::R:
+ return SwizzleSource::A;
+ case SwizzleSource::G:
+ return SwizzleSource::B;
+ case SwizzleSource::B:
+ return SwizzleSource::G;
+ default:
+ return value;
+ }
+}
+
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
@@ -554,12 +578,25 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
};
}
-[[nodiscard]] bool IsFormatFlipped(PixelFormat format) {
+void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4>& swizzle,
+ bool emulate_bgr565) {
switch (format) {
case PixelFormat::A1B5G5R5_UNORM:
- return true;
+ std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+ break;
+ case PixelFormat::B5G6R5_UNORM:
+ if (emulate_bgr565) {
+ std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+ }
+ break;
+ case PixelFormat::A5B5G5R1_UNORM:
+ std::ranges::transform(swizzle, swizzle.begin(), SwapSpecial);
+ break;
+ case PixelFormat::G4R4_UNORM:
+ std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed);
+ break;
default:
- return false;
+ break;
}
}
@@ -606,11 +643,11 @@ struct RangedBarrierRange {
case Shader::ImageFormat::R32G32B32A32_UINT:
return VK_FORMAT_R32G32B32A32_UINT;
}
- UNREACHABLE_MSG("Invalid image format={}", format);
+ ASSERT_MSG(false, "Invalid image format={}", format);
return VK_FORMAT_R32_UINT;
}
-void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
+void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
bool up_scaling = true) {
const bool is_2d = info.type == ImageType::e2D;
@@ -750,7 +787,7 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con
}
} // Anonymous namespace
-TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
@@ -779,11 +816,6 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
!device.IsExtShaderStencilExportSupported()) {
return true;
}
- if (VideoCore::Surface::GetFormatType(src.info.format) ==
- VideoCore::Surface::SurfaceType::DepthStencil &&
- !device.IsExtShaderStencilExportSupported()) {
- return true;
- }
if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT ||
src.info.format == PixelFormat::D32_FLOAT_S8_UINT) {
return true;
@@ -1068,6 +1100,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
+ if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
+ return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
+ }
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
@@ -1189,6 +1224,14 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
+u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
+ return device.GetDeviceMemoryUsage();
+}
+
+bool TextureCacheRuntime::CanReportMemoryUsage() const {
+ return device.CanReportMemoryUsage();
+}
+
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
@@ -1203,6 +1246,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
} else {
flags |= VideoCommon::ImageFlagBits::Converted;
}
+ flags |= VideoCommon::ImageFlagBits::CostlyLoad;
}
if (runtime->device.HasDebuggingToolAttached()) {
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
@@ -1430,22 +1474,23 @@ bool Image::BlitScaleHelper(bool scale_up) {
};
const VkExtent2D extent{
.width = std::max(scaled_width, info.size.width),
- .height = std::max(scaled_height, info.size.width),
+ .height = std::max(scaled_height, info.size.height),
};
auto* view_ptr = blit_view.get();
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (!blit_framebuffer) {
- blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+ blit_framebuffer =
+ std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up);
}
const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
src_region, operation, BLIT_OPERATION);
- } else if (!runtime->device.IsBlitDepthStencilSupported() &&
- aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!blit_framebuffer) {
- blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
+ blit_framebuffer =
+ std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up);
}
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
blit_view->StencilView(), dst_region,
@@ -1488,9 +1533,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
};
if (!info.IsRenderTarget()) {
swizzle = info.Swizzle();
- if (IsFormatFlipped(format)) {
- std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
- }
+ TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565());
if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
}
@@ -1537,6 +1580,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
break;
case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::e2DArray:
+ case VideoCommon::ImageViewType::Rect:
create(TextureType::Color2D, 1);
create(TextureType::ColorArray2D, std::nullopt);
render_target = Handle(Shader::TextureType::ColorArray2D);
@@ -1550,11 +1594,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
create(TextureType::ColorCube, 6);
create(TextureType::ColorArrayCube, std::nullopt);
break;
- case VideoCommon::ImageViewType::Rect:
- UNIMPLEMENTED();
- break;
case VideoCommon::ImageViewType::Buffer:
- UNREACHABLE();
+ ASSERT(false);
break;
}
}
@@ -1576,6 +1617,9 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParam
ImageView::~ImageView() = default;
VkImageView ImageView::DepthView() {
+ if (!image_handle) {
+ return VK_NULL_HANDLE;
+ }
if (depth_view) {
return *depth_view;
}
@@ -1585,6 +1629,9 @@ VkImageView ImageView::DepthView() {
}
VkImageView ImageView::StencilView() {
+ if (!image_handle) {
+ return VK_NULL_HANDLE;
+ }
if (stencil_view) {
return *stencil_view;
}
@@ -1594,6 +1641,9 @@ VkImageView ImageView::StencilView() {
}
VkImageView ImageView::ColorView() {
+ if (!image_handle) {
+ return VK_NULL_HANDLE;
+ }
if (color_view) {
return *color_view;
}
@@ -1603,6 +1653,9 @@ VkImageView ImageView::ColorView() {
VkImageView ImageView::StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format) {
+ if (!image_handle) {
+ return VK_NULL_HANDLE;
+ }
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
}
@@ -1705,34 +1758,42 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.width = key.size.width,
.height = key.size.height,
}} {
- CreateFramebuffer(runtime, color_buffers, depth_buffer);
+ CreateFramebuffer(runtime, color_buffers, depth_buffer, key.is_rescaled);
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
}
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
- ImageView* depth_buffer, VkExtent2D extent)
+ ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled)
: render_area{extent} {
std::array<ImageView*, NUM_RT> color_buffers{color_buffer};
- CreateFramebuffer(runtime, color_buffers, depth_buffer);
+ CreateFramebuffer(runtime, color_buffers, depth_buffer, is_rescaled);
}
Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
- ImageView* depth_buffer) {
+ ImageView* depth_buffer, bool is_rescaled) {
std::vector<VkImageView> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
+ const auto& resolution = runtime.resolution;
+
+ u32 width = 0;
+ u32 height = 0;
for (size_t index = 0; index < NUM_RT; ++index) {
const ImageView* const color_buffer = color_buffers[index];
if (!color_buffer) {
renderpass_key.color_formats[index] = PixelFormat::Invalid;
continue;
}
+ width = std::max(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width)
+ : color_buffer->size.width);
+ height = std::max(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height)
+ : color_buffer->size.height);
attachments.push_back(color_buffer->RenderTarget());
renderpass_key.color_formats[index] = color_buffer->format;
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1743,6 +1804,10 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
}
const size_t num_colors = attachments.size();
if (depth_buffer) {
+ width = std::max(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width)
+ : depth_buffer->size.width);
+ height = std::max(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height)
+ : depth_buffer->size.height);
attachments.push_back(depth_buffer->RenderTarget());
renderpass_key.depth_format = depth_buffer->format;
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1759,6 +1824,8 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
renderpass_key.samples = samples;
renderpass = runtime.render_pass_cache.Get(renderpass_key);
+ render_area.width = std::min(render_area.width, width);
+ render_area.height = std::min(render_area.height, height);
num_color_buffers = static_cast<u32>(num_colors);
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
@@ -1780,7 +1847,7 @@ void TextureCacheRuntime::AccelerateImageUpload(
if (IsPixelFormatASTC(image.info.format)) {
return astc_decoder_pass.Assemble(image, map, swizzles);
}
- UNREACHABLE();
+ ASSERT(false);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index c81130dd2..0b7ac0df1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -34,11 +33,11 @@ class ImageView;
class Framebuffer;
class RenderPassCache;
class StagingBufferPool;
-class VKScheduler;
+class Scheduler;
class TextureCacheRuntime {
public:
- explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+ explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
@@ -55,6 +54,10 @@ public:
u64 GetDeviceLocalMemory() const;
+ u64 GetDeviceMemoryUsage() const;
+
+ bool CanReportMemoryUsage() const;
+
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@@ -90,7 +93,7 @@ public:
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
@@ -151,7 +154,7 @@ private:
bool NeedsScaleHelper() const;
- VKScheduler* scheduler{};
+ Scheduler* scheduler{};
TextureCacheRuntime* runtime{};
vk::Image original_image;
@@ -265,7 +268,7 @@ public:
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
- ImageView* depth_buffer, VkExtent2D extent);
+ ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
~Framebuffer();
@@ -276,7 +279,8 @@ public:
Framebuffer& operator=(Framebuffer&&) = default;
void CreateFramebuffer(TextureCacheRuntime& runtime,
- std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer);
+ std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
+ bool is_rescaled = false);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
index 44e688342..7e1c3a863 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 0df3a7fe9..4d4a6753b 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -1,11 +1,9 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <variant>
#include <boost/container/static_vector.hpp>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -14,18 +12,18 @@
namespace Vulkan {
-VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
+UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_cursor = payload.data();
}
-VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
+UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
-void VKUpdateDescriptorQueue::TickFrame() {
+void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
}
-void VKUpdateDescriptorQueue::Acquire() {
+void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
static constexpr size_t MIN_ENTRIES = 0x400;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d7de4c490..625bcc809 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -1,18 +1,16 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
-#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
-class VKScheduler;
+class Scheduler;
struct DescriptorUpdateEntry {
struct Empty {};
@@ -30,10 +28,10 @@ struct DescriptorUpdateEntry {
};
};
-class VKUpdateDescriptorQueue final {
+class UpdateDescriptorQueue final {
public:
- explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_);
- ~VKUpdateDescriptorQueue();
+ explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
+ ~UpdateDescriptorQueue();
void TickFrame();
@@ -73,7 +71,7 @@ public:
private:
const Device& device;
- VKScheduler& scheduler;
+ Scheduler& scheduler;
DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 87636857d..f53066579 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
@@ -9,6 +8,7 @@
#include "common/assert.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
@@ -25,7 +25,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
}
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
- std::lock_guard lock{invalidation_mutex};
+ std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
}
@@ -34,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
RemovePendingShaders();
}
-ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_)
- : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
- rasterizer{rasterizer_} {}
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
- auto& dirty{maxwell3d.dirty.flags};
+ auto& dirty{maxwell3d->dirty.flags};
if (!dirty[VideoCommon::Dirty::Shaders]) {
return last_shaders_valid;
}
dirty[VideoCommon::Dirty::Shaders] = false;
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
unique_hashes[index] = 0;
continue;
}
- const auto& shader_config{maxwell3d.regs.shader_config[index]};
+ const auto& shader_config{maxwell3d->regs.shader_config[index]};
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
const GPUVAddr shader_addr{base_addr + shader_config.offset};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
last_shaders_valid = false;
@@ -65,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
if (!shader_info) {
const u32 start_address{shader_config.offset};
- GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+ GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
}
shader_infos[index] = shader_info;
@@ -76,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
}
const ShaderInfo* ShaderCache::ComputeShader() {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
const GPUVAddr shader_addr{program_base + qmd.program_start};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
return nullptr;
@@ -87,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
return shader;
}
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
return MakeShaderInfo(env, *cpu_shader_addr);
}
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
size_t env_index{};
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
if (unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
auto& env{result.envs[index]};
- const u32 start_address{maxwell3d.regs.shader_config[index].offset};
- env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+ const u32 start_address{maxwell3d->regs.shader_config[index].offset};
+ env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
result.env_ptrs[env_index++] = &env;
}
@@ -124,8 +120,8 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t
const VAddr addr_end = addr + size;
Entry* const entry = NewEntry(addr, addr_end, data.get());
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ const u64 page_end = (addr_end + YUZU_PAGESIZE - 1) >> YUZU_PAGEBITS;
+ for (u64 page = addr >> YUZU_PAGEBITS; page < page_end; ++page) {
invalidation_cache[page].push_back(entry);
}
@@ -136,8 +132,8 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t
void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
const VAddr addr_end = addr + size;
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ const u64 page_end = (addr_end + YUZU_PAGESIZE - 1) >> YUZU_PAGEBITS;
+ for (u64 page = addr >> YUZU_PAGEBITS; page < page_end; ++page) {
auto it = invalidation_cache.find(page);
if (it == invalidation_cache.end()) {
continue;
@@ -190,8 +186,8 @@ void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr
}
void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
- const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
+ const u64 page_end = (entry->addr_end + YUZU_PAGESIZE - 1) >> YUZU_PAGEBITS;
+ for (u64 page = entry->addr_start >> YUZU_PAGEBITS; page < page_end; ++page) {
const auto entries_it = invalidation_cache.find(page);
ASSERT(entries_it != invalidation_cache.end());
std::vector<Entry*>& entries = entries_it->second;
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 8836bc8c6..a4391202d 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -13,6 +12,7 @@
#include <vector>
#include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader_environment.h"
@@ -20,6 +20,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace Tegra::Control {
+struct ChannelState;
+}
+
namespace VideoCommon {
class GenericEnvironment;
@@ -29,9 +33,9 @@ struct ShaderInfo {
size_t size_bytes{};
};
-class ShaderCache {
- static constexpr u64 PAGE_BITS = 14;
- static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
+class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
+ static constexpr u64 YUZU_PAGEBITS = 14;
+ static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
static constexpr size_t NUM_PROGRAMS = 6;
@@ -72,9 +76,7 @@ protected:
}
};
- explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_);
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
/// @brief Update the hashes and information of shader stages
/// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -89,10 +91,6 @@ protected:
void GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
-
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
bool last_shaders_valid = false;
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 3e673c437..5f7625947 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -1,9 +1,7 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
-#include <bit>
#include <filesystem>
#include <fstream>
#include <memory>
@@ -41,7 +39,8 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
return Shader::TextureType::Color1D;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
- return Shader::TextureType::Color2D;
+ return entry.normalized_coords ? Shader::TextureType::Color2D
+ : Shader::TextureType::Color2DRect;
case Tegra::Texture::TextureType::Texture3D:
return Shader::TextureType::Color3D;
case Tegra::Texture::TextureType::TextureCubemap:
@@ -55,7 +54,8 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
case Tegra::Texture::TextureType::TextureCubeArray:
return Shader::TextureType::ColorArrayCube;
default:
- throw Shader::NotImplementedException("Unknown texture type");
+ UNIMPLEMENTED();
+ return Shader::TextureType::Color2D;
}
}
@@ -190,11 +190,11 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
.write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
.write(reinterpret_cast<const char*>(&stage), sizeof(stage))
.write(reinterpret_cast<const char*>(code.data()), code_size);
- for (const auto [key, type] : texture_types) {
+ for (const auto& [key, type] : texture_types) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
- for (const auto [key, type] : cbuf_values) {
+ for (const auto& [key, type] : cbuf_values) {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
@@ -282,7 +282,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
stage_index = 4;
break;
default:
- UNREACHABLE_MSG("Invalid program={}", program);
+ ASSERT_MSG(false, "Invalid program={}", program);
break;
}
const u64 local_size{sph.LocalMemorySize()};
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index aae762b27..5a145f33a 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index bcaf5f575..631891ec2 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -1,10 +1,8 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
#include <chrono>
-#include <optional>
#include "video_core/shader_notify.h"
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index 4d8d52071..696a51384 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -1,12 +1,10 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <chrono>
-#include <optional>
namespace VideoCore {
class ShaderNotify {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index a36015c8c..a2bf08294 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "common/math_util.h"
@@ -29,7 +28,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
return SurfaceTarget::Texture2DArray;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", texture_type);
- UNREACHABLE();
+ ASSERT(false);
return SurfaceTarget::Texture2D;
}
}
@@ -48,7 +47,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
return true;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
- UNREACHABLE();
+ ASSERT(false);
return false;
}
}
@@ -67,7 +66,7 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
return true;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
- UNREACHABLE();
+ ASSERT(false);
return false;
}
}
@@ -190,13 +189,13 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
}
}
-PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
+PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) {
switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ case Service::android::PixelFormat::Rgba8888:
return PixelFormat::A8B8G8R8_UNORM;
- case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ case Service::android::PixelFormat::Rgb565:
return PixelFormat::R5G6B5_UNORM;
- case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ case Service::android::PixelFormat::Bgra8888:
return PixelFormat::B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unimplemented format={}", format);
@@ -247,6 +246,9 @@ bool IsPixelFormatASTC(PixelFormat format) {
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_UNORM:
case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X6_UNORM:
+ case PixelFormat::ASTC_2D_10X5_UNORM:
+ case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X10_UNORM:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X12_UNORM:
@@ -276,6 +278,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_8X6_SRGB:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 33e8d24ab..57ca7f597 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -25,6 +24,7 @@ enum class PixelFormat {
A2B10G10R10_UNORM,
A2B10G10R10_UINT,
A1B5G5R5_UNORM,
+ A5B5G5R1_UNORM,
R8_UNORM,
R8_SNORM,
R8_SINT,
@@ -82,6 +82,7 @@ enum class PixelFormat {
BC3_SRGB,
BC7_SRGB,
A4B4G4R4_UNORM,
+ G4R4_UNORM,
ASTC_2D_4X4_SRGB,
ASTC_2D_8X8_SRGB,
ASTC_2D_8X5_SRGB,
@@ -92,6 +93,9 @@ enum class PixelFormat {
ASTC_2D_10X8_SRGB,
ASTC_2D_6X6_UNORM,
ASTC_2D_6X6_SRGB,
+ ASTC_2D_10X6_UNORM,
+ ASTC_2D_10X5_UNORM,
+ ASTC_2D_10X5_SRGB,
ASTC_2D_10X10_UNORM,
ASTC_2D_10X10_SRGB,
ASTC_2D_12X12_UNORM,
@@ -145,7 +149,7 @@ enum class SurfaceTarget {
TextureCubeArray,
};
-constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
+constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // A8B8G8R8_UNORM
1, // A8B8G8R8_SNORM
1, // A8B8G8R8_SINT
@@ -156,6 +160,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
1, // A1B5G5R5_UNORM
+ 1, // A5B5G5R1_UNORM
1, // R8_UNORM
1, // R8_SNORM
1, // R8_SINT
@@ -213,6 +218,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
4, // BC3_SRGB
4, // BC7_SRGB
1, // A4B4G4R4_UNORM
+ 1, // G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
@@ -223,6 +229,9 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
10, // ASTC_2D_10X8_SRGB
6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
+ 10, // ASTC_2D_10X6_UNORM
+ 10, // ASTC_2D_10X5_UNORM
+ 10, // ASTC_2D_10X5_SRGB
10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
12, // ASTC_2D_12X12_UNORM
@@ -245,7 +254,7 @@ constexpr u32 DefaultBlockWidth(PixelFormat format) {
return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];
}
-constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
+constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // A8B8G8R8_UNORM
1, // A8B8G8R8_SNORM
1, // A8B8G8R8_SINT
@@ -256,6 +265,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
1, // A1B5G5R5_UNORM
+ 1, // A5B5G5R1_UNORM
1, // R8_UNORM
1, // R8_SNORM
1, // R8_SINT
@@ -313,6 +323,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
4, // BC3_SRGB
4, // BC7_SRGB
1, // A4B4G4R4_UNORM
+ 1, // G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
5, // ASTC_2D_8X5_SRGB
@@ -323,6 +334,9 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
8, // ASTC_2D_10X8_SRGB
6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
+ 6, // ASTC_2D_10X6_UNORM
+ 5, // ASTC_2D_10X5_UNORM
+ 5, // ASTC_2D_10X5_SRGB
10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
12, // ASTC_2D_12X12_UNORM
@@ -345,7 +359,7 @@ constexpr u32 DefaultBlockHeight(PixelFormat format) {
return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];
}
-constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
+constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
32, // A8B8G8R8_UNORM
32, // A8B8G8R8_SNORM
32, // A8B8G8R8_SINT
@@ -356,6 +370,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
32, // A2B10G10R10_UNORM
32, // A2B10G10R10_UINT
16, // A1B5G5R5_UNORM
+ 16, // A5B5G5R1_UNORM
8, // R8_UNORM
8, // R8_SNORM
8, // R8_SINT
@@ -413,6 +428,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // BC3_SRGB
128, // BC7_UNORM
16, // A4B4G4R4_UNORM
+ 8, // G4R4_UNORM
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
@@ -423,6 +439,9 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // ASTC_2D_10X8_SRGB
128, // ASTC_2D_6X6_UNORM
128, // ASTC_2D_6X6_SRGB
+ 128, // ASTC_2D_10X6_UNORM
+ 128, // ASTC_2D_10X5_UNORM
+ 128, // ASTC_2D_10X5_SRGB
128, // ASTC_2D_10X10_UNORM
128, // ASTC_2D_10X10_SRGB
128, // ASTC_2D_12X12_UNORM
@@ -460,7 +479,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
-PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format);
+PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format);
SurfaceType GetFormatType(PixelFormat pixel_format);
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
index 15585caeb..70be1657e 100644
--- a/src/video_core/texture_cache/accelerated_swizzle.cpp
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <bit>
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
index a11c924e1..d4250da68 100644
--- a/src/video_core/texture_cache/accelerated_swizzle.h
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
index 017327975..ef98afdca 100644
--- a/src/video_core/texture_cache/decode_bc4.cpp
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
index 63fb23508..ab2f735be 100644
--- a/src/video_core/texture_cache/decode_bc4.h
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
index 3a03b786f..b18e3838f 100644
--- a/src/video_core/texture_cache/descriptor_table.h
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,7 +8,6 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
-#include "common/logging/log.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index afa807d5d..ad935d386 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -63,6 +62,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::A1B5G5R5_UNORM;
case Hash(TextureFormat::A4B4G4R4, UNORM):
return PixelFormat::A4B4G4R4_UNORM;
+ case Hash(TextureFormat::G4R4, UNORM):
+ return PixelFormat::G4R4_UNORM;
+ case Hash(TextureFormat::A5B5G5R1, UNORM):
+ return PixelFormat::A5B5G5R1_UNORM;
case Hash(TextureFormat::R8, UNORM):
return PixelFormat::R8_UNORM;
case Hash(TextureFormat::R8, SNORM):
@@ -143,6 +146,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::S8_UINT_D24_UNORM;
case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR):
+ return PixelFormat::D24_UNORM_S8_UINT;
case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
@@ -201,6 +206,12 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::ASTC_2D_6X6_UNORM;
case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
return PixelFormat::ASTC_2D_6X6_SRGB;
+ case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X6_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_10X5_SRGB;
case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
return PixelFormat::ASTC_2D_10X10_UNORM;
case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index 729533999..f504a6cd0 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 249cc4d0f..ee4f2d406 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <string>
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index b2c81057b..acc854715 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -38,6 +37,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "A2B10G10R10_UINT";
case PixelFormat::A1B5G5R5_UNORM:
return "A1B5G5R5_UNORM";
+ case PixelFormat::A5B5G5R1_UNORM:
+ return "A5B5G5R1_UNORM";
case PixelFormat::R8_UNORM:
return "R8_UNORM";
case PixelFormat::R8_SNORM:
@@ -152,6 +153,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "BC7_SRGB";
case PixelFormat::A4B4G4R4_UNORM:
return "A4B4G4R4_UNORM";
+ case PixelFormat::G4R4_UNORM:
+ return "G4R4_UNORM";
case PixelFormat::ASTC_2D_4X4_SRGB:
return "ASTC_2D_4X4_SRGB";
case PixelFormat::ASTC_2D_8X8_SRGB:
@@ -172,6 +175,12 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_6X6_UNORM";
case PixelFormat::ASTC_2D_6X6_SRGB:
return "ASTC_2D_6X6_SRGB";
+ case PixelFormat::ASTC_2D_10X6_UNORM:
+ return "ASTC_2D_10X6_UNORM";
+ case PixelFormat::ASTC_2D_10X5_UNORM:
+ return "ASTC_2D_10X5_UNORM";
+ case PixelFormat::ASTC_2D_10X5_SRGB:
+ return "ASTC_2D_10X5_SRGB";
case PixelFormat::ASTC_2D_10X10_UNORM:
return "ASTC_2D_10X10_UNORM";
case PixelFormat::ASTC_2D_10X10_SRGB:
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 3db2fdf34..91512022f 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <optional>
@@ -8,6 +7,7 @@
#include <vector>
#include "common/common_types.h"
+#include "common/div_ceil.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/image_base.h"
@@ -183,10 +183,6 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
};
const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
- if (is_lhs_compressed && is_rhs_compressed) {
- LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
- return;
- }
const s32 lhs_mips = lhs.info.resources.levels;
const s32 rhs_mips = rhs.info.resources.levels;
const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
@@ -200,12 +196,12 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
if (is_lhs_compressed) {
- lhs_size.width /= lhs_block.width;
- lhs_size.height /= lhs_block.height;
+ lhs_size.width = Common::DivCeil(lhs_size.width, lhs_block.width);
+ lhs_size.height = Common::DivCeil(lhs_size.height, lhs_block.height);
}
if (is_rhs_compressed) {
- rhs_size.width /= rhs_block.width;
- rhs_size.height /= rhs_block.height;
+ rhs_size.width = Common::DivCeil(rhs_size.width, rhs_block.width);
+ rhs_size.height = Common::DivCeil(rhs_size.height, rhs_block.height);
}
const Extent3D copy_size{
.width = std::min(lhs_size.width, rhs_size.width),
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 89c111c00..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -33,11 +32,12 @@ enum class ImageFlagBits : u32 {
///< garbage collection priority
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
///< collection
+ CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back.
// Rescaler
- Rescaled = 1 << 12,
- CheckingRescalable = 1 << 13,
- IsRescalable = 1 << 14,
+ Rescaled = 1 << 13,
+ CheckingRescalable = 1 << 14,
+ IsRescalable = 1 << 15,
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -88,6 +88,9 @@ struct ImageBase {
u32 scale_rating = 0;
u64 scale_tick = 0;
bool has_scaled = false;
+
+ size_t channel = 0;
+
ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index afb94082b..6c073ee57 100644
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "video_core/surface.h"
@@ -95,7 +94,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
resources.layers = 1;
break;
default:
- UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ ASSERT_MSG(false, "Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
break;
}
if (type != ImageType::Linear) {
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
index 5932dcaba..93755e15e 100644
--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index c7b4fc231..04fb84bfa 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 9c24c5359..69c9776e7 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
index e751f26c7..f47885147 100644
--- a/src/video_core/texture_cache/image_view_info.cpp
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <limits>
@@ -72,7 +71,7 @@ ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
range.extent.layers = config.Depth() * 6;
break;
default:
- UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ ASSERT_MSG(false, "Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
break;
}
}
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
index 0c1f99117..921f88988 100644
--- a/src/video_core/texture_cache/image_view_info.h
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
index 0cb227d69..1efbd6507 100644
--- a/src/video_core/texture_cache/render_targets.h
+++ b/src/video_core/texture_cache/render_targets.h
@@ -1,12 +1,10 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <span>
-#include <utility>
#include "common/bit_cast.h"
#include "video_core/texture_cache/types.h"
@@ -28,6 +26,7 @@ struct RenderTargets {
ImageViewId depth_buffer_id{};
std::array<u8, NUM_RT> draw_buffers{};
Extent2D size{};
+ bool is_rescaled{};
};
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
index 04539a43c..d552bccf0 100644
--- a/src/video_core/texture_cache/samples_helper.h
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -24,7 +23,7 @@ namespace VideoCommon {
case 16:
return {2, 2};
}
- UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ ASSERT_MSG(false, "Invalid number of samples={}", num_samples);
return {1, 1};
}
@@ -48,7 +47,7 @@ namespace VideoCommon {
case MsaaMode::Msaa4x4:
return 16;
}
- UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
+ ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode));
return 1;
}
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 50df06409..46e8a86e6 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -1,13 +1,10 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
-#include <array>
#include <bit>
-#include <concepts>
#include <numeric>
#include <type_traits>
#include <utility>
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
new file mode 100644
index 000000000..8a9a32f44
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "video_core/control/channel_state_cache.inc"
+#include "video_core/texture_cache/texture_cache_base.h"
+
+namespace VideoCommon {
+
+TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
+ : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
+ compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
+
+template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 198bb0cfb..eaf4a1c95 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -8,6 +7,7 @@
#include "common/alignment.h"
#include "common/settings.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -30,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
template <class P>
-TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_)
- : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
+ : runtime{runtime_}, rasterizer{rasterizer_} {
// Configure null sampler
TSCEntry sampler_descriptor{};
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -50,14 +46,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
- const auto device_memory = runtime.GetDeviceLocalMemory();
- const u64 possible_expected_memory = (device_memory * 4) / 10;
- const u64 possible_critical_memory = (device_memory * 7) / 10;
- expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
- critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
- minimum_memory = 0;
+ const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
+ const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
+ const s64 min_spacing_critical = device_memory - 1_GiB;
+ const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
+ const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
+ const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
+ expected_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
+ DEFAULT_EXPECTED_MEMORY));
+ critical_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
+ DEFAULT_CRITICAL_MEMORY));
+ minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
} else {
- // On OpenGL we can be more conservatives as the driver takes care.
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
@@ -66,18 +68,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
template <class P>
void TextureCache<P>::RunGarbageCollector() {
- const bool high_priority_mode = total_used_memory >= expected_memory;
- const bool aggressive_mode = total_used_memory >= critical_memory;
- const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
- size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
- const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
+ bool high_priority_mode = total_used_memory >= expected_memory;
+ bool aggressive_mode = total_used_memory >= critical_memory;
+ const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
+ size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
+ const auto clean_up = [this, &num_iterations, &high_priority_mode,
+ &aggressive_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& image = slot_images[image_id];
- const bool must_download = image.IsSafeDownload();
- if (!high_priority_mode && must_download) {
+ const bool must_download =
+ image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
+ if (!high_priority_mode &&
+ (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
return false;
}
if (must_download) {
@@ -85,13 +90,25 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
+ if (total_used_memory < critical_memory) {
+ if (aggressive_mode) {
+ // Sink the aggresiveness.
+ num_iterations >>= 2;
+ aggressive_mode = false;
+ return false;
+ }
+ if (high_priority_mode && total_used_memory < expected_memory) {
+ num_iterations >>= 1;
+ high_priority_mode = false;
+ }
+ }
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -99,6 +116,10 @@ void TextureCache<P>::RunGarbageCollector() {
template <class P>
void TextureCache<P>::TickFrame() {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
@@ -106,6 +127,7 @@ void TextureCache<P>::TickFrame() {
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
runtime.TickFrame();
+ critical_gc = 0;
++frame_tick;
}
@@ -127,22 +149,24 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
- FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
+ FillImageViews<has_blacklists>(channel_state->graphics_image_table,
+ channel_state->graphics_image_view_ids, views);
}
template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
- FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
+ FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
+ views);
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
- if (index > graphics_sampler_table.Limit()) {
+ if (index > channel_state->graphics_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
- const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
- SamplerId& id = graphics_sampler_ids[index];
+ const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
+ SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@@ -151,12 +175,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
- if (index > compute_sampler_table.Limit()) {
+ if (index > channel_state->compute_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
- const auto [descriptor, is_new] = compute_sampler_table.Read(index);
- SamplerId& id = compute_sampler_ids[index];
+ const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
+ SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@@ -166,34 +190,36 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
- const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
- const u32 tic_limit = maxwell3d.regs.tic.limit;
- const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
- if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
- graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+ const u32 tic_limit = maxwell3d->regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
+ if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(),
+ tsc_limit)) {
+ channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
- graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
+ channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
void TextureCache<P>::SynchronizeComputeDescriptors() {
- const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
- const u32 tic_limit = kepler_compute.regs.tic.limit;
- const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
- const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
- if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
- compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
+ const u32 tic_limit = kepler_compute->regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
+ const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
+ if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+ channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
- compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(),
+ tic_limit)) {
+ channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -290,7 +316,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
template <class P>
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::RenderTargets]) {
for (size_t index = 0; index < NUM_RT; ++index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -317,7 +343,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
for (size_t index = 0; index < NUM_RT; ++index) {
- render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+ render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
}
u32 up_scale = 1;
u32 down_shift = 0;
@@ -326,9 +352,10 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
down_shift = Settings::values.resolution_info.down_shift;
}
render_targets.size = Extent2D{
- (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
- (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
+ (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
+ (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
};
+ render_targets.is_rescaled = is_rescaling;
flags[Dirty::DepthBiasGlobal] = true;
}
@@ -343,7 +370,7 @@ template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views) {
- bool has_blacklisted;
+ bool has_blacklisted = false;
do {
has_deleted_images = false;
if constexpr (has_blacklists) {
@@ -433,7 +460,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
}
@@ -452,12 +479,20 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
}
template <class P>
-void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
std::vector<ImageId> deleted_images;
- ForEachImageInRegionGPU(gpu_addr, size,
+ ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ return;
+ }
+ image.flags |= ImageFlagBits::CpuModified;
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, id);
+ }
+ /*
if (True(image.flags & ImageFlagBits::Remapped)) {
continue;
}
@@ -465,6 +500,7 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
+ */
}
}
@@ -564,7 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
template <class P>
typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
// TODO: Properly implement this
- const auto it = page_table.find(cpu_addr >> PAGE_BITS);
+ const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS);
if (it == page_table.end()) {
return nullptr;
}
@@ -630,7 +666,7 @@ void TextureCache<P>::PopAsyncFlushes() {
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
@@ -689,26 +725,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
const GPUVAddr gpu_addr = image.gpu_addr;
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
- gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+ gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads);
} else if (True(image.flags & ImageFlagBits::Converted)) {
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
- auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+ auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
} else {
- const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+ const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
image.UploadMemory(staging, copies);
}
}
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
- if (!IsValidEntry(gpu_memory, config)) {
+ if (!IsValidEntry(*gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
- const auto [pair, is_new] = image_views.try_emplace(config);
+ const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
ImageViewId& image_view_id = pair->second;
if (is_new) {
image_view_id = CreateImageView(config);
@@ -752,9 +788,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
- cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
if (!cpu_addr) {
return ImageId{};
}
@@ -835,7 +871,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.scale_tick = frame_tick + 1;
}
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
- auto& dirty = maxwell3d.dirty.flags;
+ auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -855,12 +891,15 @@ void TextureCache<P>::InvalidateScale(Image& image) {
}
image.image_view_ids.clear();
image.image_view_infos.clear();
- if constexpr (ENABLE_VALIDATION) {
- std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
- std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ for (size_t c : active_channel_ids) {
+ auto& channel_info = channel_storage[c];
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
+ }
+ channel_info.graphics_image_table.Invalidate();
+ channel_info.compute_image_table.Invalidate();
}
- graphics_image_table.Invalidate();
- compute_image_table.Invalidate();
has_deleted_images = true;
}
@@ -904,10 +943,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
const auto size = CalculateGuestSizeInBytes(info);
- cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+ cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
if (!cpu_addr) {
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1025,7 +1064,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
- if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
new_image.flags |= ImageFlagBits::Sparse;
}
@@ -1052,6 +1091,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ new_image.flags |= ImageFlagBits::GpuModified;
+ }
if (overlap.info.num_samples != new_image.info.num_samples) {
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
@@ -1164,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID;
}
- const auto [pair, is_new] = samplers.try_emplace(config);
+ const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) {
pair->second = slot_samplers.insert(runtime, config);
}
@@ -1173,7 +1215,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
}
@@ -1191,7 +1233,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
template <class P>
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
}
@@ -1288,11 +1330,17 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
template <class P>
template <typename Func>
-void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size,
+ Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 8> images;
- ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ auto storage_id = getStorageID(as_id);
+ if (!storage_id) {
+ return;
+ }
+ auto& gpu_page_table = gpu_page_table_storage[*storage_id];
+ ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
const auto it = gpu_page_table.find(page);
if (it == gpu_page_table.end()) {
if constexpr (BOOL_BREAK) {
@@ -1375,9 +1423,9 @@ template <typename Func>
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
- const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+ const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
for (const auto& [gpu_addr, size] : segments) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
if constexpr (RETURNS_BOOL) {
if (func(gpu_addr, *cpu_addr, size)) {
@@ -1414,10 +1462,15 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
+ if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
+ RunGarbageCollector();
+ critical_gc++;
+ }
image.lru_index = lru_cache.Insert(image_id, frame_tick);
- ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+ (*channel_state->gpu_page_table)[page].push_back(image_id);
+ });
if (False(image.flags & ImageFlagBits::Sparse)) {
auto map_id =
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1448,38 +1501,39 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
image.flags &= ~ImageFlagBits::BadOverlap;
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
- [this, image_id](
- u64 page,
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+ [this, image_id](u64 page,
+ std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
+ selected_page_table) {
const auto page_it = selected_page_table.find(page);
if (page_it == selected_page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
return;
}
std::vector<ImageId>& image_ids = page_it->second;
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
- UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
- page << PAGE_BITS);
+ ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}",
+ page << YUZU_PAGEBITS);
return;
}
image_ids.erase(vector_it);
};
- ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
- [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+ clear_page_table(page, (*channel_state->gpu_page_table));
+ });
if (False(image.flags & ImageFlagBits::Sparse)) {
const auto map_id = image.map_view_id;
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
return;
}
std::vector<ImageMapId>& image_map_ids = page_it->second;
const auto vector_it = std::ranges::find(image_map_ids, map_id);
if (vector_it == image_map_ids.end()) {
- UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
- page << PAGE_BITS);
+ ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}",
+ page << YUZU_PAGEBITS);
return;
}
image_map_ids.erase(vector_it);
@@ -1500,7 +1554,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
return;
}
std::vector<ImageMapId>& image_map_ids = page_it->second;
@@ -1584,22 +1638,22 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
- UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
- gpu_addr);
+ ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}",
+ gpu_addr);
return;
}
const ImageAllocId alloc_id = alloc_it->second;
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
if (alloc_image_it == alloc_images.end()) {
- UNREACHABLE_MSG("Trying to delete an image that does not exist");
+ ASSERT_MSG(false, "Trying to delete an image that does not exist");
return;
}
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
// Mark render targets as dirty
- auto& dirty = maxwell3d.dirty.flags;
+ auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1649,24 +1703,30 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it);
}
- if constexpr (ENABLE_VALIDATION) {
- std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
- std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ for (size_t c : active_channel_ids) {
+ auto& channel_info = channel_storage[c];
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
+ }
+ channel_info.graphics_image_table.Invalidate();
+ channel_info.compute_image_table.Invalidate();
}
- graphics_image_table.Invalidate();
- compute_image_table.Invalidate();
has_deleted_images = true;
}
template <class P>
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
- auto it = image_views.begin();
- while (it != image_views.end()) {
- const auto found = std::ranges::find(removed_views, it->second);
- if (found != removed_views.end()) {
- it = image_views.erase(it);
- } else {
- ++it;
+ for (size_t c : active_channel_ids) {
+ auto& channel_info = channel_storage[c];
+ auto it = channel_info.image_views.begin();
+ while (it != channel_info.image_views.end()) {
+ const auto found = std::ranges::find(removed_views, it->second);
+ if (found != removed_views.end()) {
+ it = channel_info.image_views.erase(it);
+ } else {
+ ++it;
+ }
}
}
}
@@ -1697,6 +1757,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
+ bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
u64 most_recent_tick = image.modification_tick;
for (const AliasedImage& aliased : image.aliased_images) {
ImageBase& aliased_image = slot_images[aliased.id];
@@ -1704,6 +1765,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
+ any_modified |= True(aliased_image.flags & ImageFlagBits::GpuModified);
}
}
if (aliased_images.empty()) {
@@ -1718,6 +1780,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
}
}
image.modification_tick = most_recent_tick;
+ if (any_modified) {
+ image.flags |= ImageFlagBits::GpuModified;
+ }
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
const ImageBase& lhs_image = slot_images[lhs->id];
const ImageBase& rhs_image = slot_images[rhs->id];
@@ -1725,7 +1790,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
});
const auto& resolution = Settings::values.resolution_info;
for (const AliasedImage* const aliased : aliased_images) {
- if (!resolution.active | !any_rescaled) {
+ if (!resolution.active || !any_rescaled) {
CopyImage(image_id, aliased->id, aliased->copies);
continue;
}
@@ -1736,19 +1801,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
continue;
}
ScaleUp(aliased_image);
-
- const bool both_2d{image.info.type == ImageType::e2D &&
- aliased_image.info.type == ImageType::e2D};
- auto copies = aliased->copies;
- for (auto copy : copies) {
- copy.extent.width = std::max<u32>(
- (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
- if (both_2d) {
- copy.extent.height = std::max<u32>(
- (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
- }
- }
- CopyImage(image_id, aliased->id, copies);
+ CopyImage(image_id, aliased->id, aliased->copies);
}
}
@@ -1908,6 +1961,7 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
.color_buffer_ids = {color_view_id},
.depth_buffer_id = depth_view_id,
.size = {extent.width >> samples_x, extent.height >> samples_y},
+ .is_rescaled = is_rescaled,
});
return {framebuffer_id, view_id};
}
@@ -1920,7 +1974,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
const ImageViewBase& image_view = slot_image_views[id];
const ImageBase& image = slot_images[image_view.image_id];
const Extent3D size = image_view.size;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
const auto& scissor = regs.scissor_test[0];
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
// Images with multiple resources can't be cleared in a single call
@@ -1935,4 +1989,19 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
scissor.max_y >= size.height;
}
+template <class P>
+void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+ VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo>::CreateChannel(channel);
+ const auto it = channel_map.find(channel.bind_id);
+ auto* this_state = &channel_storage[it->second];
+ const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
+ this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
+}
+
+/// Bind a channel for execution.
+template <class P>
+void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
+ gpu_page_table_storage.emplace_back();
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7107887a6..2fa8445eb 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,9 +1,10 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
+#include <deque>
+#include <limits>
#include <mutex>
#include <span>
#include <type_traits>
@@ -12,9 +13,11 @@
#include <queue>
#include "common/common_types.h"
+#include "common/hash.h"
#include "common/literals.h"
#include "common/lru_cache.h"
#include "video_core/compatible_formats.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/surface.h"
@@ -22,12 +25,15 @@
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
-#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
+namespace Tegra::Control {
+struct ChannelState;
+}
+
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
@@ -46,10 +52,37 @@ struct ImageViewInOut {
ImageViewId id{};
};
+using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
+
+class TextureCacheChannelInfo : public ChannelInfo {
+public:
+ TextureCacheChannelInfo() = delete;
+ TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
+ TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
+ TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
+ TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default;
+ TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default;
+
+ DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+ std::vector<SamplerId> graphics_sampler_ids;
+ std::vector<ImageViewId> graphics_image_view_ids;
+
+ DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+ std::vector<SamplerId> compute_sampler_ids;
+ std::vector<ImageViewId> compute_image_view_ids;
+
+ std::unordered_map<TICEntry, ImageViewId> image_views;
+ std::unordered_map<TSCEntry, SamplerId> samplers;
+
+ TextureCacheGPUMap* gpu_page_table;
+};
+
template <class P>
-class TextureCache {
+class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
/// Address shift for caching images into a hash table
- static constexpr u64 PAGE_BITS = 20;
+ static constexpr u64 YUZU_PAGEBITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
@@ -60,8 +93,12 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
- static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
- static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+ static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
+ static constexpr size_t GC_EMERGENCY_COUNTS = 2;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
@@ -77,16 +114,8 @@ class TextureCache {
PixelFormat src_format;
};
- template <typename T>
- struct IdentityHash {
- [[nodiscard]] size_t operator()(T value) const noexcept {
- return static_cast<size_t>(value);
- }
- };
-
public:
- explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
- Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
+ explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
/// Notify the cache that a new frame has been queued
void TickFrame();
@@ -142,7 +171,7 @@ public:
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
- void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+ void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
@@ -171,6 +200,9 @@ public:
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
+ /// Create channel state.
+ void CreateChannel(Tegra::Control::ChannelState& channel) final override;
+
std::mutex mutex;
private:
@@ -178,8 +210,8 @@ private:
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
- const u64 page_end = (addr + size - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
+ for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
@@ -193,8 +225,8 @@ private:
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
- const u64 page_end = (addr + size - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
+ for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
@@ -205,6 +237,8 @@ private:
}
}
+ void OnGPUASRegister(size_t map_id) final override;
+
/// Runs the Garbage Collector.
void RunGarbageCollector();
@@ -273,7 +307,7 @@ private:
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
- void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+ void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
@@ -338,31 +372,16 @@ private:
u64 GetScaledImageSizeBytes(ImageBase& image);
Runtime& runtime;
- VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
- DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
- DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
- std::vector<SamplerId> graphics_sampler_ids;
- std::vector<ImageViewId> graphics_image_view_ids;
-
- DescriptorTable<TICEntry> compute_image_table{gpu_memory};
- DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
- std::vector<SamplerId> compute_sampler_ids;
- std::vector<ImageViewId> compute_image_view_ids;
+ VideoCore::RasterizerInterface& rasterizer;
+ std::deque<TextureCacheGPUMap> gpu_page_table_storage;
RenderTargets render_targets;
- std::unordered_map<TICEntry, ImageViewId> image_views;
- std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
- std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
-
+ std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
@@ -373,6 +392,7 @@ private:
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
+ size_t critical_gc;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 5ac27b3a7..0453456b4 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -1,6 +1,5 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index d8e19cb2f..1223df5a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1,31 +1,11 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-FileCopyrightText: Ryujinx Team and Contributors
+// SPDX-License-Identifier: GPL-2.0-or-later AND MIT
// This files contains code from Ryujinx
// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
// The sections using code from Ryujinx are marked with a link to the original version
-// MIT License
-//
-// Copyright (c) Ryujinx Team and Contributors
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
-// associated documentation files (the "Software"), to deal in the Software without restriction,
-// including without limitation the rights to use, copy, modify, merge, publish, distribute,
-// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or
-// substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
-// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
-
#include <algorithm>
#include <array>
#include <numeric>
@@ -537,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
-
UNIMPLEMENTED_IF(copy.image_offset.x != 0);
UNIMPLEMENTED_IF(copy.image_offset.y != 0);
UNIMPLEMENTED_IF(copy.image_offset.z != 0);
@@ -775,7 +754,7 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
if (address == 0) {
return false;
}
- if (address > (1ULL << 48)) {
+ if (address >= (1ULL << 40)) {
return false;
}
if (gpu_memory.GpuToCpuAddress(address).has_value()) {
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 7af52de2e..5e28f4ab3 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -9,10 +8,8 @@
#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/image_base.h"
-#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 25161df1f..15b9d4182 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1,21 +1,11 @@
-// Copyright 2016 The University of North Carolina at Chapel Hill
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
+// SPDX-FileCopyrightText: 2016 The University of North Carolina at Chapel Hill
+// SPDX-License-Identifier: Apache-2.0
+
// Please send all BUG REPORTS to <pavel@cs.unc.edu>.
// <http://gamma.cs.unc.edu/FasTC/>
#include <algorithm>
+#include <bit>
#include <cassert>
#include <cstring>
#include <span>
@@ -23,7 +13,9 @@
#include <boost/container/static_vector.hpp>
+#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/thread_worker.h"
#include "video_core/textures/astc.h"
class InputBitStream {
@@ -1421,7 +1413,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 b
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
- outBuf[j * blockWidth + i] = 0xFFFF00FF;
+ outBuf[j * blockWidth + i] = 0x00000000;
}
}
}
@@ -1660,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
- u32 block_index = 0;
- std::size_t depth_offset = 0;
- for (u32 z = 0; z < depth; z++) {
- for (u32 y = 0; y < height; y += block_height) {
- for (u32 x = 0; x < width; x += block_width) {
- const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
-
- // Blocks can be at most 12x12
- std::array<u32, 12 * 12> uncompData;
- DecompressBlock(blockPtr, block_width, block_height, uncompData);
-
- u32 decompWidth = std::min(block_width, width - x);
- u32 decompHeight = std::min(block_height, height - y);
-
- const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
- for (u32 jj = 0; jj < decompHeight; jj++) {
- std::memcpy(outRow.data() + jj * width * 4,
- uncompData.data() + jj * block_width, decompWidth * 4);
+ const u32 rows = Common::DivideUp(height, block_height);
+ const u32 cols = Common::DivideUp(width, block_width);
+
+ Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
+ "ASTCDecompress"};
+
+ for (u32 z = 0; z < depth; ++z) {
+ const u32 depth_offset = z * height * width * 4;
+ for (u32 y_index = 0; y_index < rows; ++y_index) {
+ auto decompress_stride = [data, width, height, depth, block_width, block_height, output,
+ rows, cols, z, depth_offset, y_index] {
+ const u32 y = y_index * block_height;
+ for (u32 x_index = 0; x_index < cols; ++x_index) {
+ const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index;
+ const u32 x = x_index * block_width;
+
+ const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
+
+ // Blocks can be at most 12x12
+ std::array<u32, 12 * 12> uncompData;
+ DecompressBlock(blockPtr, block_width, block_height, uncompData);
+
+ u32 decompWidth = std::min(block_width, width - x);
+ u32 decompHeight = std::min(block_height, height - y);
+
+ const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
+ for (u32 h = 0; h < decompHeight; ++h) {
+ std::memcpy(outRow.data() + h * width * 4,
+ uncompData.data() + h * block_width, decompWidth * 4);
+ }
}
- ++block_index;
- }
+ };
+ workers.QueueWork(std::move(decompress_stride));
}
- depth_offset += height * width * 4;
+ workers.WaitForRequests();
}
}
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 14d2beec0..afd3933c3 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -1,12 +1,8 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include <bit>
-#include "common/common_types.h"
-
namespace Tegra::Texture::ASTC {
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 24e943e4c..52d067a2d 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -1,12 +1,10 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <cmath>
#include <cstring>
#include <span>
-#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
@@ -14,13 +12,30 @@
#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
-#include "video_core/textures/texture.h"
namespace Tegra::Texture {
namespace {
+template <u32 mask>
+constexpr u32 pdep(u32 value) {
+ u32 result = 0;
+ u32 m = mask;
+ for (u32 bit = 1; m; bit += bit) {
+ if (value & bit)
+ result |= m & -m;
+ m &= m - 1;
+ }
+ return result;
+}
+
+template <u32 mask, u32 incr_amount>
+void incrpdep(u32& value) {
+ constexpr u32 swizzled_incr = pdep<mask>(incr_amount);
+ value = ((value | ~mask) + swizzled_incr) & mask;
+}
+
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
- u32 block_height, u32 block_depth, u32 stride_alignment) {
+ u32 block_height, u32 block_depth, u32 stride) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_x = 0;
@@ -30,7 +45,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
// We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = width * BYTES_PER_PIXEL;
- const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@@ -47,18 +61,20 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
for (u32 line = 0; line < height; ++line) {
const u32 y = line + origin_y;
- const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
+ const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
const u32 offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
- for (u32 column = 0; column < width; ++column) {
+ u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
+ for (u32 column = 0; column < width;
+ ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
- const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
+ const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
@@ -72,6 +88,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
}
}
+template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
+void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
+ u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
+ u32 block_height, u32 block_depth, u32 pitch_linear) {
+ // The origin of the transformation can be configured here, leave it as zero as the current API
+ // doesn't expose it.
+ static constexpr u32 origin_z = 0;
+
+ // We can configure here a custom pitch
+ // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
+ const u32 pitch = pitch_linear;
+ const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
+
+ const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+ const u32 slice_size =
+ Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
+
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 block_depth_mask = (1U << block_depth) - 1;
+ const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
+
+ u32 unprocessed_lines = num_lines;
+ u32 extent_y = std::min(num_lines, height - origin_y);
+
+ for (u32 slice = 0; slice < depth; ++slice) {
+ const u32 z = slice + origin_z;
+ const u32 offset_z = (z >> block_depth) * slice_size +
+ ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
+ const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
+ for (u32 line = 0; line < lines_in_y; ++line) {
+ const u32 y = line + origin_y;
+ const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
+
+ const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
+ const u32 offset_y = (block_y >> block_height) * block_size +
+ ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+
+ u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
+ for (u32 column = 0; column < extent_x;
+ ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
+ const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
+ const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+ const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
+ const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
+
+ const u32 unswizzled_offset =
+ slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
+
+ u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
+ const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
+
+ std::memcpy(dst, src, BYTES_PER_PIXEL);
+ }
+ }
+ unprocessed_lines -= lines_in_y;
+ if (unprocessed_lines == 0) {
+ return;
+ }
+ }
+}
+
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
@@ -90,118 +169,43 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
BPP_CASE(16)
#undef BPP_CASE
default:
- UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
- }
-}
-
-template <u32 BYTES_PER_PIXEL>
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
- u32 offset_x, u32 offset_y) {
- const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs =
- (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
- for (u32 line = 0; line < subrect_height; ++line) {
- const u32 dst_y = line + offset_y;
- const u32 gob_address_y =
- (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
- ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
- for (u32 x = 0; x < subrect_width; ++x) {
- const u32 dst_x = x + offset_x;
- const u32 gob_address =
- gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
- const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X];
- const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
-
- const u8* const source_line = unswizzled_data + unswizzled_offset;
- u8* const dest_addr = swizzled_data + swizzled_offset;
- std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
- }
+ ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
-template <u32 BYTES_PER_PIXEL>
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
- u32 origin_x, u32 origin_y, u8* output, const u8* input) {
- const u32 stride = width * BYTES_PER_PIXEL;
- const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
- const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
-
- const u32 block_height_mask = (1U << block_height) - 1;
- const u32 x_shift = GOB_SIZE_SHIFT + block_height;
-
- for (u32 line = 0; line < line_count; ++line) {
- const u32 src_y = line + origin_y;
- const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
-
- const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
- const u32 src_offset_y = (block_y >> block_height) * block_size +
- ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
- for (u32 column = 0; column < line_length_in; ++column) {
- const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
- const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
-
- const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
- const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
-
- std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
- }
- }
-}
-
-template <u32 BYTES_PER_PIXEL>
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
- u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
- const u8* input) {
- UNIMPLEMENTED_IF(origin_x > 0);
- UNIMPLEMENTED_IF(origin_y > 0);
-
- const u32 stride = width * BYTES_PER_PIXEL;
- const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
- const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
-
- const u32 block_height_mask = (1U << block_height) - 1;
- const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
-
- for (u32 line = 0; line < line_count; ++line) {
- const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
- const u32 block_y = line / GOB_SIZE_Y;
- const u32 dst_offset_y =
- (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
- for (u32 x = 0; x < line_length_in; ++x) {
- const u32 dst_offset =
- ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
- const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
- std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
- }
- }
-}
} // Anonymous namespace
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
+ const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
+ const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
+ width = (width * bytes_per_pixel) >> new_bpp;
+ bytes_per_pixel = 1U << new_bpp;
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
- stride_alignment);
+ stride);
}
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
+ const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
+ const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
+ width = (width * bytes_per_pixel) >> new_bpp;
+ bytes_per_pixel = 1U << new_bpp;
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
- stride_alignment);
+ stride);
}
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
- u32 block_height_bit, u32 offset_x, u32 offset_y) {
+void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
+ u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
- return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \
- swizzled_data, unswizzled_data, block_height_bit, offset_x, \
- offset_y);
+ return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \
+ origin_y, extent_x, extent_y, block_height, \
+ block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
@@ -212,17 +216,19 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
BPP_CASE(16)
#undef BPP_CASE
default:
- UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
+ ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
- u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
+ u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
- return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \
- origin_x, origin_y, output, input);
+ return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \
+ origin_y, extent_x, extent_y, block_height, \
+ block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
@@ -233,54 +239,7 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
BPP_CASE(16)
#undef BPP_CASE
default:
- UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
- }
-}
-
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
- u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
- u32 origin_y, u8* output, const u8* input) {
- switch (bytes_per_pixel) {
-#define BPP_CASE(x) \
- case x: \
- return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \
- block_height, block_depth, origin_x, origin_y, output, \
- input);
- BPP_CASE(1)
- BPP_CASE(2)
- BPP_CASE(3)
- BPP_CASE(4)
- BPP_CASE(6)
- BPP_CASE(8)
- BPP_CASE(12)
- BPP_CASE(16)
-#undef BPP_CASE
- default:
- UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
- }
-}
-
-void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
- const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
- u8* swizzle_data) {
- const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
- std::size_t count = 0;
- for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
- const std::size_t gob_address_y =
- (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
- ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
- for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
- const std::size_t gob_address =
- gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
- const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X];
- const u8* source_line = source_data + count;
- u8* dest_addr = swizzle_data + swizzled_offset;
- count++;
-
- *dest_addr = *source_line;
- }
+ ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 4c14cefbf..e70407692 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -21,6 +20,9 @@ constexpr u32 GOB_SIZE_Y_SHIFT = 3;
constexpr u32 GOB_SIZE_Z_SHIFT = 0;
constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
+constexpr u32 SWIZZLE_X_BITS = 0b100101111;
+constexpr u32 SWIZZLE_Y_BITS = 0b011010000;
+
using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
/**
@@ -38,7 +40,6 @@ constexpr SwizzleTable MakeSwizzleTable() {
}
return table;
}
-constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable();
/// Unswizzles a block linear texture into linear memory.
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -55,34 +56,14 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
u32 block_height, u32 block_depth);
/// Copies an untiled subrectangle into a tiled surface.
-void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
- u32 block_height_bit, u32 offset_x, u32 offset_y);
+void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
+ u32 block_height, u32 block_depth, u32 pitch_linear);
/// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
- u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
-
-/// @brief Swizzles a 2D array of pixels into a 3D texture
-/// @param line_length_in Number of pixels per line
-/// @param line_count Number of lines
-/// @param pitch Number of bytes per line
-/// @param width Width of the swizzled texture
-/// @param height Height of the swizzled texture
-/// @param bytes_per_pixel Number of bytes used per pixel
-/// @param block_height Block height shift
-/// @param block_depth Block depth shift
-/// @param origin_x Column offset in pixels of the swizzled texture
-/// @param origin_y Row offset in pixels of the swizzled texture
-/// @param output Pointer to the pixels of the swizzled texture
-/// @param input Pointer to the 2D array of pixels used as input
-/// @pre input and output points to an array large enough to hold the number of bytes used
-void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
- u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
- u32 origin_y, u8* output, const u8* input);
-
-void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
- std::size_t copy_size, const u8* source_data, u8* swizzle_data);
+void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
+ u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear);
/// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 06954963d..b8327c88a 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -1,8 +1,6 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
-#include <algorithm>
#include <array>
#include "common/cityhash.h"
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7994cb859..7c4553a53 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index ba26ac3f1..7e605981c 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index 8f6946d65..a519adb59 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -1,6 +1,5 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..04ac4af11 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
@@ -50,6 +49,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
+ scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 084df641f..f8e2444f3 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -1,6 +1,5 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: 2014 Citra Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index fdd1a5081..85f1d13e0 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef HAS_NSIGHT_AFTERMATH
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index eae1891dd..9c1fb7720 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -1,23 +1,24 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
-#include <filesystem>
-#include <mutex>
#include <span>
-#include <string>
-#include <vector>
#include "common/common_types.h"
-#include "common/dynamic_library.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
#ifdef HAS_NSIGHT_AFTERMATH
+#include <filesystem>
+#include <mutex>
+
+// Vulkan headers must be included before Aftermath
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
+
+#include "common/dynamic_library.h"
#endif
namespace Vulkan {
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
index cf94e1d39..736474009 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include "common/logging/log.h"
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
index b0519f132..71b1f69ec 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.h
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 153702c0b..ddecfca13 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1,24 +1,24 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bitset>
#include <chrono>
#include <optional>
-#include <string_view>
#include <thread>
#include <unordered_set>
#include <utility>
#include <vector>
#include "common/assert.h"
+#include "common/literals.h"
#include "common/settings.h"
#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+using namespace Common::Literals;
namespace {
namespace Alternatives {
constexpr std::array STENCIL8_UINT{
@@ -39,6 +39,32 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_UNDEFINED,
};
+
+constexpr std::array B5G6R5_UNORM_PACK16{
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VK_FORMAT_UNDEFINED,
+};
+
+constexpr std::array R4G4_UNORM_PACK8{
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_UNDEFINED,
+};
+
+constexpr std::array R16G16B16_SFLOAT{
+ VK_FORMAT_R16G16B16A16_SFLOAT,
+ VK_FORMAT_UNDEFINED,
+};
+
+constexpr std::array R16G16B16_SSCALED{
+ VK_FORMAT_R16G16B16A16_SSCALED,
+ VK_FORMAT_UNDEFINED,
+};
+
+constexpr std::array R8G8B8_SSCALED{
+ VK_FORMAT_R8G8B8A8_SSCALED,
+ VK_FORMAT_UNDEFINED,
+};
+
} // namespace Alternatives
enum class NvidiaArchitecture {
@@ -87,6 +113,16 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
case VK_FORMAT_D16_UNORM_S8_UINT:
return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ return Alternatives::B5G6R5_UNORM_PACK16.data();
+ case VK_FORMAT_R4G4_UNORM_PACK8:
+ return Alternatives::R4G4_UNORM_PACK8.data();
+ case VK_FORMAT_R16G16B16_SFLOAT:
+ return Alternatives::R16G16B16_SFLOAT.data();
+ case VK_FORMAT_R16G16B16_SSCALED:
+ return Alternatives::R16G16B16_SSCALED.data();
+ case VK_FORMAT_R8G8B8_SSCALED:
+ return Alternatives::R8G8B8_SSCALED.data();
default:
return nullptr;
}
@@ -107,105 +143,142 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::PhysicalDevice physical) {
static constexpr std::array formats{
- VK_FORMAT_A8B8G8R8_UNORM_PACK32,
- VK_FORMAT_A8B8G8R8_UINT_PACK32,
- VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VK_FORMAT_A2B10G10R10_SINT_PACK32,
+ VK_FORMAT_A2B10G10R10_SNORM_PACK32,
+ VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
+ VK_FORMAT_A2B10G10R10_UINT_PACK32,
+ VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A2B10G10R10_USCALED_PACK32,
VK_FORMAT_A8B8G8R8_SINT_PACK32,
+ VK_FORMAT_A8B8G8R8_SNORM_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
- VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VK_FORMAT_A8B8G8R8_UINT_PACK32,
+ VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+ VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
+ VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
+ VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+ VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+ VK_FORMAT_B4G4R4A4_UNORM_PACK16,
+ VK_FORMAT_B5G5R5A1_UNORM_PACK16,
VK_FORMAT_B5G6R5_UNORM_PACK16,
- VK_FORMAT_A2B10G10R10_UNORM_PACK32,
- VK_FORMAT_A2B10G10R10_UINT_PACK32,
- VK_FORMAT_A1R5G5B5_UNORM_PACK16,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R32G32B32A32_SINT,
- VK_FORMAT_R32G32B32A32_UINT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R32G32_SINT,
- VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_B8G8R8A8_SRGB,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK,
+ VK_FORMAT_BC3_SRGB_BLOCK,
+ VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC4_SNORM_BLOCK,
+ VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK,
+ VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK,
+ VK_FORMAT_BC7_UNORM_BLOCK,
+ VK_FORMAT_D16_UNORM,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+ VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_SINT,
- VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SNORM,
+ VK_FORMAT_R16G16B16A16_SSCALED,
+ VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16_UNORM,
- VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16B16A16_USCALED,
+ VK_FORMAT_R16G16B16_SFLOAT,
+ VK_FORMAT_R16G16B16_SINT,
+ VK_FORMAT_R16G16B16_SNORM,
+ VK_FORMAT_R16G16B16_SSCALED,
+ VK_FORMAT_R16G16B16_UINT,
+ VK_FORMAT_R16G16B16_UNORM,
+ VK_FORMAT_R16G16B16_USCALED,
VK_FORMAT_R16G16_SFLOAT,
- VK_FORMAT_R16G16_UINT,
VK_FORMAT_R16G16_SINT,
- VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16_SSCALED,
+ VK_FORMAT_R16G16_UINT,
+ VK_FORMAT_R16G16_UNORM,
+ VK_FORMAT_R16G16_USCALED,
+ VK_FORMAT_R16_SFLOAT,
+ VK_FORMAT_R16_SINT,
VK_FORMAT_R16_SNORM,
+ VK_FORMAT_R16_SSCALED,
VK_FORMAT_R16_UINT,
+ VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_USCALED,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SINT,
+ VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32G32B32_SFLOAT,
+ VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32_UINT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_SINT,
+ VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32_SINT,
+ VK_FORMAT_R32_UINT,
+ VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VK_FORMAT_R4G4_UNORM_PACK8,
+ VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VK_FORMAT_R8G8B8A8_SINT,
+ VK_FORMAT_R8G8B8A8_SNORM,
VK_FORMAT_R8G8B8A8_SRGB,
- VK_FORMAT_R8G8_UNORM,
- VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8B8A8_SSCALED,
+ VK_FORMAT_R8G8B8A8_UINT,
+ VK_FORMAT_R8G8B8A8_UNORM,
+ VK_FORMAT_R8G8B8A8_USCALED,
+ VK_FORMAT_R8G8B8_SINT,
+ VK_FORMAT_R8G8B8_SNORM,
+ VK_FORMAT_R8G8B8_SSCALED,
+ VK_FORMAT_R8G8B8_UINT,
+ VK_FORMAT_R8G8B8_UNORM,
+ VK_FORMAT_R8G8B8_USCALED,
VK_FORMAT_R8G8_SINT,
+ VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_SSCALED,
VK_FORMAT_R8G8_UINT,
- VK_FORMAT_R8_UNORM,
- VK_FORMAT_R8_SNORM,
+ VK_FORMAT_R8G8_UNORM,
+ VK_FORMAT_R8G8_USCALED,
VK_FORMAT_R8_SINT,
+ VK_FORMAT_R8_SNORM,
+ VK_FORMAT_R8_SSCALED,
VK_FORMAT_R8_UINT,
- VK_FORMAT_B10G11R11_UFLOAT_PACK32,
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32_UINT,
- VK_FORMAT_R32_SINT,
- VK_FORMAT_R16_SFLOAT,
- VK_FORMAT_R16G16B16A16_SFLOAT,
- VK_FORMAT_B8G8R8A8_UNORM,
- VK_FORMAT_B8G8R8A8_SRGB,
- VK_FORMAT_R4G4B4A4_UNORM_PACK16,
- VK_FORMAT_D32_SFLOAT,
- VK_FORMAT_D16_UNORM,
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_USCALED,
VK_FORMAT_S8_UINT,
- VK_FORMAT_D16_UNORM_S8_UINT,
- VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
- VK_FORMAT_BC2_UNORM_BLOCK,
- VK_FORMAT_BC3_UNORM_BLOCK,
- VK_FORMAT_BC4_UNORM_BLOCK,
- VK_FORMAT_BC4_SNORM_BLOCK,
- VK_FORMAT_BC5_UNORM_BLOCK,
- VK_FORMAT_BC5_SNORM_BLOCK,
- VK_FORMAT_BC7_UNORM_BLOCK,
- VK_FORMAT_BC6H_UFLOAT_BLOCK,
- VK_FORMAT_BC6H_SFLOAT_BLOCK,
- VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
- VK_FORMAT_BC2_SRGB_BLOCK,
- VK_FORMAT_BC3_SRGB_BLOCK,
- VK_FORMAT_BC7_SRGB_BLOCK,
- VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
- VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
- VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
- VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
- VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
- VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
- VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
- VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
};
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
for (const auto format : formats) {
@@ -224,9 +297,14 @@ std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
return supported_extensions;
}
+bool IsExtensionSupported(std::span<const std::string> supported_extensions,
+ std::string_view extension) {
+ return std::ranges::find(supported_extensions, extension) != supported_extensions.end();
+}
+
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
std::span<const std::string> exts) {
- if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
+ if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
shading_rate_props.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
@@ -239,7 +317,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
return NvidiaArchitecture::AmpereOrNewer;
}
}
- if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
+ if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
return NvidiaArchitecture::Turing;
}
return NvidiaArchitecture::VoltaOrOlder;
@@ -542,7 +620,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
- if (khr_workgroup_memory_explicit_layout) {
+ if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) {
workgroup_layout = {
.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
@@ -553,6 +631,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
};
SetNext(next, workgroup_layout);
+ } else if (khr_workgroup_memory_explicit_layout) {
+ // TODO(lat9nq): Find a proper fix for this
+ LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a "
+ "yuzu bug when host driver does not support 16-bit integers");
+ khr_workgroup_memory_explicit_layout = false;
}
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
@@ -585,6 +668,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
+ is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+ is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
+ is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
+ properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
+
CollectPhysicalMemoryInfo();
CollectTelemetryParameters();
CollectToolingInfo();
@@ -603,8 +691,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
khr_push_descriptor = false;
break;
}
+ const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
+ if (nv_major_version >= 510) {
+ LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
+ cant_blit_msaa = true;
+ }
}
- if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
+ const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
+ if (ext_extended_dynamic_state && is_radv) {
// Mask driver version variant
const u32 version = (properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
@@ -613,6 +707,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
ext_extended_dynamic_state = false;
}
}
+ if (ext_vertex_input_dynamic_state && is_radv) {
+ // TODO(ameerj): Blacklist only offending driver versions
+ // TODO(ameerj): Confirm if RDNA1 is affected
+ const bool is_rdna2 =
+ IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
+ if (is_rdna2) {
+ LOG_WARNING(Render_Vulkan,
+ "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
+ ext_vertex_input_dynamic_state = false;
+ }
+ }
sets_per_pool = 64;
const bool is_amd =
@@ -628,7 +733,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
has_broken_cube_compatibility = true;
}
}
- const bool is_amd_or_radv = is_amd || driver_id == VK_DRIVER_ID_MESA_RADV;
+ const bool is_amd_or_radv = is_amd || is_radv;
if (ext_sampler_filter_minmax && is_amd_or_radv) {
// Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
if (!is_float16_supported) {
@@ -639,6 +744,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
+ const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
@@ -652,6 +758,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
+ if (is_intel_anv) {
+ LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format");
+ must_emulate_bgr565 = true;
+ }
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
@@ -671,9 +781,10 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
- UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
- "hardware does not support it",
- wanted_format, wanted_usage, format_type);
+ ASSERT_MSG(false,
+ "Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ wanted_format, wanted_usage, format_type);
return wanted_format;
}
@@ -682,21 +793,22 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
}
- LOG_WARNING(Render_Vulkan,
- "Emulating format={} with alternative format={} with usage={} and type={}",
- wanted_format, alternative, wanted_usage, format_type);
+ LOG_DEBUG(Render_Vulkan,
+ "Emulating format={} with alternative format={} with usage={} and type={}",
+ wanted_format, alternative, wanted_usage, format_type);
return alternative;
}
// No alternatives found, panic
- UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
- "doesn't support any of the alternatives",
- wanted_format, wanted_usage, format_type);
+ ASSERT_MSG(false,
+ "Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ wanted_format, wanted_usage, format_type);
return wanted_format;
}
void Device::ReportLoss() const {
- LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
+ LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{15});
@@ -957,6 +1069,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
false);
test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
+ test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true);
if (Settings::values.enable_nsight_aftermath) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
@@ -969,7 +1082,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
- VkPhysicalDeviceProperties2KHR physical_properties;
+ VkPhysicalDeviceProperties2KHR physical_properties{};
physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
if (has_khr_shader_float16_int8) {
@@ -1239,15 +1352,50 @@ void Device::CollectTelemetryParameters() {
vendor_name = driver.driverName;
}
+u64 Device::GetDeviceMemoryUsage() const {
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
+ budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
+ budget.pNext = nullptr;
+ physical.GetMemoryProperties(&budget);
+ u64 result{};
+ for (const size_t heap : valid_heap_memory) {
+ result += budget.heapUsage[heap];
+ }
+ return result;
+}
+
void Device::CollectPhysicalMemoryInfo() {
- const auto mem_properties = physical.GetMemoryProperties();
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
+ budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
+ const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr);
+ const auto& mem_properties = mem_info.memoryProperties;
const size_t num_properties = mem_properties.memoryHeapCount;
device_access_memory = 0;
+ u64 device_initial_usage = 0;
+ u64 local_memory = 0;
for (size_t element = 0; element < num_properties; ++element) {
- if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
- device_access_memory += mem_properties.memoryHeaps[element].size;
+ const bool is_heap_local =
+ (mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0;
+ if (!is_integrated && !is_heap_local) {
+ continue;
+ }
+ valid_heap_memory.push_back(element);
+ if (is_heap_local) {
+ local_memory += mem_properties.memoryHeaps[element].size;
+ }
+ if (ext_memory_budget) {
+ device_initial_usage += budget.heapUsage[element];
+ device_access_memory += budget.heapBudget[element];
+ continue;
}
+ device_access_memory += mem_properties.memoryHeaps[element].size;
+ }
+ if (!is_integrated) {
+ return;
}
+ const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
+ device_access_memory = static_cast<u64>(std::max<s64>(
+ std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory)));
}
void Device::CollectToolingInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 37d140ebd..d7cc6c593 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -1,12 +1,10 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <string>
-#include <string_view>
#include <unordered_map>
#include <vector>
@@ -342,6 +340,12 @@ public:
return device_access_memory;
}
+ bool CanReportMemoryUsage() const {
+ return ext_memory_budget;
+ }
+
+ u64 GetDeviceMemoryUsage() const;
+
u32 GetSetsPerPool() const {
return sets_per_pool;
}
@@ -354,6 +358,10 @@ public:
return cant_blit_msaa;
}
+ bool MustEmulateBGR565() const {
+ return must_emulate_bgr565;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -418,6 +426,9 @@ private:
bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list
///< topologies.
bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch.
+ bool is_integrated{}; ///< Is GPU an iGPU.
+ bool is_virtual{}; ///< Is GPU a virtual GPU.
+ bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
@@ -442,16 +453,19 @@ private:
bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
+ bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
+ bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
+ std::vector<size_t> valid_heap_memory; ///< Heaps used.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index bfd6e6add..a082e3059 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -1,12 +1,9 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
-#include <algorithm>
#include <future>
#include <optional>
#include <span>
-#include <utility>
#include <vector>
#include "common/common_types.h"
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
index e5e3a7144..40419d802 100644
--- a/src/video_core/vulkan_common/vulkan_instance.h
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
index 22833fa56..4eb3913ee 100644
--- a/src/video_core/vulkan_common/vulkan_library.cpp
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -1,17 +1,17 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
-#include <cstdlib>
#include <string>
#include "common/dynamic_library.h"
#include "common/fs/path_util.h"
+#include "common/logging/log.h"
#include "video_core/vulkan_common/vulkan_library.h"
namespace Vulkan {
Common::DynamicLibrary OpenLibrary() {
+ LOG_DEBUG(Render_Vulkan, "Looking for a Vulkan library");
Common::DynamicLibrary library;
#ifdef __APPLE__
// Check if a path to a specific Vulkan library has been specified.
@@ -24,9 +24,11 @@ Common::DynamicLibrary OpenLibrary() {
}
#else
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
+ LOG_DEBUG(Render_Vulkan, "Trying Vulkan library: {}", filename);
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
+ LOG_DEBUG(Render_Vulkan, "Trying Vulkan library (second attempt): {}", filename);
void(library.Open(filename.c_str()));
}
#endif
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h
index 8b28b0e17..364ca979b 100644
--- a/src/video_core/vulkan_common/vulkan_library.h
+++ b/src/video_core/vulkan_common/vulkan_library.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 300a61205..6442898bd 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -1,6 +1,5 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bit>
@@ -50,7 +49,7 @@ struct Range {
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
}
- UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
@@ -227,7 +226,7 @@ void MemoryCommit::Release() {
}
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
- : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
+ : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
export_allocations{export_allocations_},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
@@ -326,7 +325,7 @@ VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
// Remove device local, if it's not supported by the requested resource
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
- UNREACHABLE_MSG("No compatible memory types found");
+ ASSERT_MSG(false, "No compatible memory types found");
return 0;
}
@@ -350,7 +349,7 @@ bool IsHostVisible(MemoryUsage usage) noexcept {
case MemoryUsage::Download:
return true;
}
- UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ ASSERT_MSG(false, "Invalid memory usage={}", usage);
return false;
}
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 86e8ed119..a5bff03fe 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -1,12 +1,10 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <span>
-#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
index 3c3238f96..69f9c494b 100644
--- a/src/video_core/vulkan_common/vulkan_surface.cpp
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h
index 05a169e32..5725143e6 100644
--- a/src/video_core/vulkan_common/vulkan_surface.h
+++ b/src/video_core/vulkan_common/vulkan_surface.h
@@ -1,6 +1,5 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index a9faa4807..2ad98dcfe 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -1,12 +1,9 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
-#include <exception>
#include <memory>
#include <optional>
-#include <string_view>
#include <utility>
#include <vector>
@@ -239,8 +236,8 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
- X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) &&
- X(vkGetPhysicalDeviceQueueFamilyProperties);
+ X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceMemoryProperties2) &&
+ X(vkGetPhysicalDeviceProperties) && X(vkGetPhysicalDeviceQueueFamilyProperties);
#undef X
}
@@ -328,6 +325,8 @@ const char* ToString(VkResult result) noexcept {
return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
case VkResult::VK_RESULT_MAX_ENUM:
return "VK_RESULT_MAX_ENUM";
+ case VkResult::VK_ERROR_COMPRESSION_EXHAUSTED_EXT:
+ return "VK_ERROR_COMPRESSION_EXHAUSTED_EXT";
}
return "Unknown";
}
@@ -928,9 +927,12 @@ std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
return modes;
}
-VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
- VkPhysicalDeviceMemoryProperties properties;
- dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
+VkPhysicalDeviceMemoryProperties2 PhysicalDevice::GetMemoryProperties(
+ void* next_structures) const noexcept {
+ VkPhysicalDeviceMemoryProperties2 properties{};
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
+ properties.pNext = next_structures;
+ dld->vkGetPhysicalDeviceMemoryProperties2(physical_device, &properties);
return properties;
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index b7ae01c6c..1b3f493bd 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -1,11 +1,9 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <exception>
-#include <iterator>
#include <limits>
#include <memory>
#include <optional>
@@ -173,6 +171,7 @@ struct InstanceDispatch {
PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{};
PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
+ PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{};
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{};
PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
@@ -520,9 +519,7 @@ public:
dld{rhs.dld} {}
/// Assign an allocation transfering ownership from another allocation.
- /// Releases any previously held allocation.
PoolAllocations& operator=(PoolAllocations&& rhs) noexcept {
- Release();
allocations = std::move(rhs.allocations);
num = rhs.num;
device = rhs.device;
@@ -531,11 +528,6 @@ public:
return *this;
}
- /// Destroys any held allocation.
- ~PoolAllocations() {
- Release();
- }
-
/// Returns the number of allocations.
std::size_t size() const noexcept {
return num;
@@ -558,19 +550,6 @@ public:
}
private:
- /// Destroys the held allocations if they exist.
- void Release() noexcept {
- if (!allocations) {
- return;
- }
- const Span<AllocationType> span(allocations.get(), num);
- const VkResult result = Free(device, pool, span, *dld);
- // There's no way to report errors from a destructor.
- if (result != VK_SUCCESS) {
- std::terminate();
- }
- }
-
std::unique_ptr<AllocationType[]> allocations;
std::size_t num = 0;
VkDevice device = nullptr;
@@ -951,7 +930,8 @@ public:
std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const;
- VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept;
+ VkPhysicalDeviceMemoryProperties2 GetMemoryProperties(
+ void* next_structures = nullptr) const noexcept;
private:
VkPhysicalDevice physical_device = nullptr;